forked from algbio/SBWT
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmain.cpp
74 lines (56 loc) · 2.31 KB
/
main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#include "sbwt/SBWT.hh"
#include "sbwt/variants.hh"
using namespace sbwt;
typedef plain_matrix_sbwt_t sbwt_t;
struct Counter{
int32_t color;
int32_t count;
};
int main(int argc, char** argv){
string indexfile = argv[1];
throwing_ifstream in(indexfile, ios::binary);
string variant = load_string(in.stream); // read variant type
if(variant != "plain-matrix"){
cerr << "Error: this code only supports the plain matrix variant" << endl;
return 1;
}
cerr << "Loading SBWT from " << indexfile << endl;
sbwt_t sbwt;
sbwt.load(in.stream);
cerr << "SBWT loaded" << endl;
int64_t sbwt_length = sbwt.number_of_subsets();
vector<vector<Counter>> counters(sbwt_length); // K-mer handle -> list of counters
vector<bool> kmer_handles_found(sbwt_length); // Bit vector that marks which k-mer handles have at least 1 counter
// Arguments 2..(argc-1) are sequence files from which we want to compute the k-mer counts
for(int64_t i = 2; i < argc; i++){
int32_t color = i - 2;
string filename = argv[i];
seq_io::Reader<> reader(filename);
while(true){
int64_t length = reader.get_next_read_to_buffer();
if(length == 0) break; // All sequences have been read
const char* seq = reader.read_buf; // The DNA sequence
// Search all k-mers of seq
vector<int64_t> handles = sbwt.streaming_search(seq, length);
for(int64_t handle : handles){
if(handle == -1) continue; // This k-mer does not exist in the index
if(counters[handle].size() == 0 || counters[handle].back().color != color){
// No counter yet for this k-mer and this color
Counter C = {.color = color, .count = 0}; // Create a counter
counters[handle].push_back(C);
kmer_handles_found[handle] = 1;
}
counters[handle].back().count++; // Add to the count of this color in this k-mer
}
}
}
for(int64_t i = 0; i < counters.size(); i++){
if(kmer_handles_found[i]){
cout << i;
for(Counter C : counters[i]){
cout << " (" << C.color << ": " << C.count << ")";
}
cout << "\n";
}
}
}