Skip to content

Commit 35d9db3

Browse files
committed
Refactor: Factor out get_nams function
This removes some code duplicated between align_or_map_single and align_or_map_paired.
1 parent 80fc9a1 commit 35d9db3

File tree

1 file changed

+59
-72
lines changed

1 file changed

+59
-72
lines changed

src/aln.cpp

+59-72
Original file line numberDiff line numberDiff line change
@@ -1019,6 +1019,61 @@ bool has_shared_substring(const std::string& read_seq, const std::string& ref_se
10191019
return false;
10201020
}
10211021

1022+
1023+
/*
1024+
* Obtain NAMs for a sequence record, doing rescue if needed.
1025+
* Return NAMs sorted by decreasing score.
1026+
*/
1027+
std::vector<Nam> get_nams(
1028+
const KSeq& record,
1029+
const StrobemerIndex& index,
1030+
AlignmentStatistics& statistics,
1031+
Details& details,
1032+
const MappingParameters &map_param,
1033+
const IndexParameters& index_parameters,
1034+
std::minstd_rand& random_engine
1035+
) {
1036+
// Compute randstrobes
1037+
Timer strobe_timer;
1038+
auto query_randstrobes = randstrobes_query(record.seq, index_parameters);
1039+
statistics.n_randstrobes += query_randstrobes.size();
1040+
statistics.tot_construct_strobemers += strobe_timer.duration();
1041+
1042+
// Find NAMs
1043+
Timer nam_timer;
1044+
auto [nonrepetitive_fraction, n_hits, nams] = find_nams(query_randstrobes, index, map_param.use_mcs);
1045+
statistics.tot_find_nams += nam_timer.duration();
1046+
statistics.n_hits += n_hits;
1047+
details.nams = nams.size();
1048+
1049+
// Rescue if requested and needed
1050+
if (map_param.rescue_level > 1 && (nams.empty() || nonrepetitive_fraction < 0.7)) {
1051+
Timer rescue_timer;
1052+
int n_rescue_hits;
1053+
std::tie(n_rescue_hits, nams) = find_nams_rescue(query_randstrobes, index, map_param.rescue_cutoff, map_param.use_mcs);
1054+
statistics.n_rescue_hits += n_rescue_hits;
1055+
details.rescue_nams = nams.size();
1056+
details.nam_rescue = true;
1057+
statistics.tot_time_rescue += rescue_timer.duration();
1058+
}
1059+
1060+
// Sort by score
1061+
Timer nam_sort_timer;
1062+
std::sort(nams.begin(), nams.end(), by_score<Nam>);
1063+
shuffle_top_nams(nams, random_engine);
1064+
statistics.tot_sort_nams += nam_sort_timer.duration();
1065+
1066+
#ifdef TRACE
1067+
std::cerr << "Query: " << record.name << '\n';
1068+
std::cerr << "Found " << nams.size() << " NAMs\n";
1069+
for (const auto& nam : nams) {
1070+
std::cerr << "- " << nam << '\n';
1071+
}
1072+
#endif
1073+
1074+
return nams;
1075+
}
1076+
10221077
void align_or_map_paired(
10231078
const KSeq &record1,
10241079
const KSeq &record2,
@@ -1039,46 +1094,11 @@ void align_or_map_paired(
10391094

10401095
for (size_t is_r1 : {0, 1}) {
10411096
const auto& record = is_r1 == 0 ? record1 : record2;
1042-
1043-
Timer strobe_timer;
1044-
auto query_randstrobes = randstrobes_query(record.seq, index_parameters);
1045-
statistics.n_randstrobes += query_randstrobes.size();
1046-
statistics.tot_construct_strobemers += strobe_timer.duration();
1047-
1048-
// Find NAMs
1049-
Timer nam_timer;
1050-
auto [nonrepetitive_fraction, n_hits, nams] = find_nams(query_randstrobes, index, map_param.use_mcs);
1051-
statistics.tot_find_nams += nam_timer.duration();
1052-
statistics.n_hits += n_hits;
1053-
details[is_r1].nams = nams.size();
1054-
1055-
if (map_param.rescue_level > 1 && (nams.empty() || nonrepetitive_fraction < 0.7)) {
1056-
Timer rescue_timer;
1057-
int n_rescue_hits;
1058-
std::tie(n_rescue_hits, nams) = find_nams_rescue(query_randstrobes, index, map_param.rescue_cutoff, map_param.use_mcs);
1059-
details[is_r1].nam_rescue = true;
1060-
details[is_r1].rescue_nams = nams.size();
1061-
statistics.n_rescue_hits += n_rescue_hits;
1062-
statistics.tot_time_rescue += rescue_timer.duration();
1063-
}
1064-
Timer nam_sort_timer;
1065-
std::sort(nams.begin(), nams.end(), by_score<Nam>);
1066-
shuffle_top_nams(nams, random_engine);
1067-
statistics.tot_sort_nams += nam_sort_timer.duration();
1068-
nams_pair[is_r1] = nams;
1069-
}
1070-
10711097
#ifdef TRACE
1072-
for (int is_r1 : {0, 1}) {
1073-
const auto& record = is_r1 == 0 ? record1 : record2;
1074-
std::cerr << "R" << is_r1 + 1 << " name: " << record.name << '\n';
1075-
const auto& nams = nams_pair[is_r1];
1076-
std::cerr << "Found " << nams.size() << " NAMs\n";
1077-
for (auto& nam : nams) {
1078-
std::cerr << "- " << nam << '\n';
1079-
}
1080-
}
1098+
std::cerr << "R" << is_r1 + 1 << '\n';
10811099
#endif
1100+
nams_pair[is_r1] = get_nams(record, index, statistics, details[is_r1], map_param, index_parameters, random_engine);
1101+
}
10821102

10831103
Timer extend_timer;
10841104
if (map_param.output_format != OutputFormat::SAM) { // PAF or abundance
@@ -1182,40 +1202,7 @@ void align_or_map_single(
11821202
std::vector<double> &abundances
11831203
) {
11841204
Details details;
1185-
Timer strobe_timer;
1186-
auto query_randstrobes = randstrobes_query(record.seq, index_parameters);
1187-
statistics.n_randstrobes += query_randstrobes.size();
1188-
statistics.tot_construct_strobemers += strobe_timer.duration();
1189-
1190-
// Find NAMs
1191-
Timer nam_timer;
1192-
auto [nonrepetitive_fraction, n_hits, nams] = find_nams(query_randstrobes, index, map_param.use_mcs);
1193-
statistics.tot_find_nams += nam_timer.duration();
1194-
statistics.n_hits += n_hits;
1195-
details.nams = nams.size();
1196-
1197-
if (map_param.rescue_level > 1 && (nams.empty() || nonrepetitive_fraction < 0.7)) {
1198-
Timer rescue_timer;
1199-
int n_rescue_hits;
1200-
std::tie(n_rescue_hits, nams) = find_nams_rescue(query_randstrobes, index, map_param.rescue_cutoff, map_param.use_mcs);
1201-
statistics.n_rescue_hits += n_rescue_hits;
1202-
details.rescue_nams = nams.size();
1203-
details.nam_rescue = true;
1204-
statistics.tot_time_rescue += rescue_timer.duration();
1205-
}
1206-
1207-
Timer nam_sort_timer;
1208-
std::sort(nams.begin(), nams.end(), by_score<Nam>);
1209-
shuffle_top_nams(nams, random_engine);
1210-
statistics.tot_sort_nams += nam_sort_timer.duration();
1211-
1212-
#ifdef TRACE
1213-
std::cerr << "Query: " << record.name << '\n';
1214-
std::cerr << "Found " << nams.size() << " NAMs\n";
1215-
for (auto& nam : nams) {
1216-
std::cerr << "- " << nam << '\n';
1217-
}
1218-
#endif
1205+
std::vector<Nam> nams = get_nams(record, index, statistics, details, map_param, index_parameters, random_engine);
12191206

12201207
Timer extend_timer;
12211208
size_t n_best = 0;

0 commit comments

Comments
 (0)