Skip to content

Commit 3e1557b

Browse files
committed
Gather NAMs into a single vector
1 parent f28298b commit 3e1557b

File tree

1 file changed

+25
-46
lines changed

1 file changed

+25
-46
lines changed

src/aln.cpp

+25-46
Original file line numberDiff line numberDiff line change
@@ -1156,88 +1156,67 @@ void align_or_map_single(
11561156
Details details;
11571157
Timer strobe_timer;
11581158
std::string seq_revcomp = reverse_complement(record.seq);
1159-
std::vector<QueryRandstrobe> query_randstrobes[2];
1160-
std::vector<Nam> nams[2];
1159+
std::vector<Nam> nams;
11611160
for (bool revcomp : {false, true}) {
11621161
const std::string& seq = revcomp ? seq_revcomp : record.seq;
1163-
query_randstrobes[revcomp] = randstrobes_query(seq, index_parameters);
1162+
auto query_randstrobes = randstrobes_query(seq, index_parameters);
11641163

1165-
statistics.n_randstrobes += query_randstrobes[revcomp].size();
1164+
statistics.n_randstrobes += query_randstrobes.size();
11661165
statistics.tot_construct_strobemers += strobe_timer.duration();
11671166

11681167
// Find NAMs
11691168
Timer nam_timer;
1170-
auto [nonrepetitive_fraction, n_hits, nams_found] = find_nams(query_randstrobes[revcomp], index, map_param.use_mcs);
1171-
nams[revcomp] = nams_found;
1169+
auto [nonrepetitive_fraction, n_hits, nams_found] = find_nams(query_randstrobes, index, map_param.use_mcs);
11721170
statistics.tot_find_nams += nam_timer.duration();
11731171
statistics.n_hits += n_hits;
1174-
details.nams += nams[revcomp].size();
1172+
details.nams += nams_found.size();
11751173

1176-
if (map_param.rescue_level > 1 && (nams[revcomp].empty() || nonrepetitive_fraction < 0.7)) {
1174+
if (map_param.rescue_level > 1 && (nams_found.empty() || nonrepetitive_fraction < 0.7)) {
11771175
Timer rescue_timer;
11781176
int n_rescue_hits;
1179-
std::tie(n_rescue_hits, nams[revcomp]) = find_nams_rescue(query_randstrobes[revcomp], index, map_param.rescue_cutoff, map_param.use_mcs);
1180-
statistics.n_rescue_hits += n_rescue_hits;
1181-
details.rescue_nams += nams[revcomp].size();
1177+
std::tie(n_rescue_hits, nams_found) = find_nams_rescue(query_randstrobes, index, map_param.rescue_cutoff, map_param.use_mcs);
1178+
details.rescue_nams += nams_found.size();
11821179
details.nam_rescue = true;
1180+
statistics.n_rescue_hits += n_rescue_hits;
11831181
statistics.tot_time_rescue += rescue_timer.duration();
11841182
}
1185-
}
1186-
1187-
for (auto &n : nams[1]) {
1188-
n.is_rc = true;
1183+
for (auto &nam : nams_found) {
1184+
nam.is_rc = revcomp;
1185+
nams.push_back(nam);
1186+
}
11891187
}
11901188

11911189
Timer nam_sort_timer;
1192-
for (int i = 0; i < 2; ++i) {
1193-
std::sort(nams[i].begin(), nams[i].end(), by_score<Nam>);
1194-
shuffle_top_nams(nams[i], random_engine);
1195-
}
1190+
std::sort(nams.begin(), nams.end(), by_score<Nam>);
1191+
shuffle_top_nams(nams, random_engine);
11961192
statistics.tot_sort_nams += nam_sort_timer.duration();
11971193

11981194
#ifdef TRACE
11991195
std::cerr << "Query: " << record.name << '\n';
1200-
for (bool revcomp : {false, true}) {
1201-
std::cerr << "Found " << nams[revcomp].size() << " NAMs for "
1202-
<< (revcomp ? "reverse-complemented" : "forward") << " query\n";
1203-
for (auto& nam : nams[revcomp]) {
1204-
std::cerr << "- " << nam << '\n';
1205-
}
1196+
std::cerr << "Found " << nams.size() << " NAMs:\n";
1197+
for (auto& nam : nams) {
1198+
std::cerr << "- " << nam << '\n';
12061199
}
12071200
#endif
12081201

1209-
// Forward or reverse complement?
1210-
// TODO this does not allow us to have secondary hits in a different
1211-
// orientation. We should merge the NAMs into one vector again.
1212-
int orientation;
1213-
if (nams[0].empty()) {
1214-
orientation = 1;
1215-
} else if (nams[1].empty()) {
1216-
orientation = 0;
1217-
} else if (nams[0][0].score >= nams[1][0].score) {
1218-
orientation = 0;
1219-
} else {
1220-
orientation = 1;
1221-
}
1222-
12231202
Timer extend_timer;
12241203
size_t n_best = 0;
12251204
switch (map_param.output_format) {
12261205
case OutputFormat::Abundance: {
1227-
if (!nams[orientation].empty()){
1228-
for (auto &t : nams[orientation]) {
1229-
if (t.score == nams[orientation][0].score){
1206+
if (!nams.empty()){
1207+
for (auto &t : nams) {
1208+
if (t.score == nams[0].score){
12301209
++n_best;
12311210
} else{
12321211
break;
12331212
}
12341213
}
12351214

1236-
for (auto &nam: nams[orientation]) {
1215+
for (auto &nam: nams) {
12371216
if (nam.ref_start < 0) {
12381217
continue;
12391218
}
1240-
if (nam.score != nams[orientation][0].score){
1219+
if (nam.score != nams[0].score){
12411220
break;
12421221
}
12431222
abundances[nam.ref_id] += float(record.seq.length()) / float(n_best);
@@ -1246,12 +1225,12 @@ void align_or_map_single(
12461225
}
12471226
break;
12481227
case OutputFormat::PAF:
1249-
output_hits_paf(outstring, nams[orientation], record.name, references,
1228+
output_hits_paf(outstring, nams, record.name, references,
12501229
record.seq.length());
12511230
break;
12521231
case OutputFormat::SAM:
12531232
align_single(
1254-
aligner, sam, nams[orientation], record, index_parameters.syncmer.k,
1233+
aligner, sam, nams, record, index_parameters.syncmer.k,
12551234
references, details, map_param.dropoff_threshold, map_param.max_tries,
12561235
map_param.max_secondary, random_engine
12571236
);

0 commit comments

Comments
 (0)