@@ -1019,6 +1019,61 @@ bool has_shared_substring(const std::string& read_seq, const std::string& ref_se
1019
1019
return false ;
1020
1020
}
1021
1021
1022
+
1023
+ /*
1024
+ * Obtain NAMs for a sequence record, doing rescue if needed.
1025
+ * Return NAMs sorted by decreasing score.
1026
+ */
1027
+ std::vector<Nam> get_nams (
1028
+ const KSeq& record,
1029
+ const StrobemerIndex& index,
1030
+ AlignmentStatistics& statistics,
1031
+ Details& details,
1032
+ const MappingParameters &map_param,
1033
+ const IndexParameters& index_parameters,
1034
+ std::minstd_rand& random_engine
1035
+ ) {
1036
+ // Compute randstrobes
1037
+ Timer strobe_timer;
1038
+ auto query_randstrobes = randstrobes_query (record.seq , index_parameters);
1039
+ statistics.n_randstrobes += query_randstrobes.size ();
1040
+ statistics.tot_construct_strobemers += strobe_timer.duration ();
1041
+
1042
+ // Find NAMs
1043
+ Timer nam_timer;
1044
+ auto [nonrepetitive_fraction, n_hits, nams] = find_nams (query_randstrobes, index , map_param.use_mcs );
1045
+ statistics.tot_find_nams += nam_timer.duration ();
1046
+ statistics.n_hits += n_hits;
1047
+ details.nams = nams.size ();
1048
+
1049
+ // Rescue if requested and needed
1050
+ if (map_param.rescue_level > 1 && (nams.empty () || nonrepetitive_fraction < 0.7 )) {
1051
+ Timer rescue_timer;
1052
+ int n_rescue_hits;
1053
+ std::tie (n_rescue_hits, nams) = find_nams_rescue (query_randstrobes, index , map_param.rescue_cutoff , map_param.use_mcs );
1054
+ statistics.n_rescue_hits += n_rescue_hits;
1055
+ details.rescue_nams = nams.size ();
1056
+ details.nam_rescue = true ;
1057
+ statistics.tot_time_rescue += rescue_timer.duration ();
1058
+ }
1059
+
1060
+ // Sort by score
1061
+ Timer nam_sort_timer;
1062
+ std::sort (nams.begin (), nams.end (), by_score<Nam>);
1063
+ shuffle_top_nams (nams, random_engine);
1064
+ statistics.tot_sort_nams += nam_sort_timer.duration ();
1065
+
1066
+ #ifdef TRACE
1067
+ std::cerr << " Query: " << record.name << ' \n ' ;
1068
+ std::cerr << " Found " << nams.size () << " NAMs\n " ;
1069
+ for (const auto & nam : nams) {
1070
+ std::cerr << " - " << nam << ' \n ' ;
1071
+ }
1072
+ #endif
1073
+
1074
+ return nams;
1075
+ }
1076
+
1022
1077
void align_or_map_paired (
1023
1078
const KSeq &record1,
1024
1079
const KSeq &record2,
@@ -1039,46 +1094,11 @@ void align_or_map_paired(
1039
1094
1040
1095
for (size_t is_r1 : {0 , 1 }) {
1041
1096
const auto & record = is_r1 == 0 ? record1 : record2;
1042
-
1043
- Timer strobe_timer;
1044
- auto query_randstrobes = randstrobes_query (record.seq , index_parameters);
1045
- statistics.n_randstrobes += query_randstrobes.size ();
1046
- statistics.tot_construct_strobemers += strobe_timer.duration ();
1047
-
1048
- // Find NAMs
1049
- Timer nam_timer;
1050
- auto [nonrepetitive_fraction, n_hits, nams] = find_nams (query_randstrobes, index , map_param.use_mcs );
1051
- statistics.tot_find_nams += nam_timer.duration ();
1052
- statistics.n_hits += n_hits;
1053
- details[is_r1].nams = nams.size ();
1054
-
1055
- if (map_param.rescue_level > 1 && (nams.empty () || nonrepetitive_fraction < 0.7 )) {
1056
- Timer rescue_timer;
1057
- int n_rescue_hits;
1058
- std::tie (n_rescue_hits, nams) = find_nams_rescue (query_randstrobes, index , map_param.rescue_cutoff , map_param.use_mcs );
1059
- details[is_r1].nam_rescue = true ;
1060
- details[is_r1].rescue_nams = nams.size ();
1061
- statistics.n_rescue_hits += n_rescue_hits;
1062
- statistics.tot_time_rescue += rescue_timer.duration ();
1063
- }
1064
- Timer nam_sort_timer;
1065
- std::sort (nams.begin (), nams.end (), by_score<Nam>);
1066
- shuffle_top_nams (nams, random_engine);
1067
- statistics.tot_sort_nams += nam_sort_timer.duration ();
1068
- nams_pair[is_r1] = nams;
1069
- }
1070
-
1071
1097
#ifdef TRACE
1072
- for (int is_r1 : {0 , 1 }) {
1073
- const auto & record = is_r1 == 0 ? record1 : record2;
1074
- std::cerr << " R" << is_r1 + 1 << " name: " << record.name << ' \n ' ;
1075
- const auto & nams = nams_pair[is_r1];
1076
- std::cerr << " Found " << nams.size () << " NAMs\n " ;
1077
- for (auto & nam : nams) {
1078
- std::cerr << " - " << nam << ' \n ' ;
1079
- }
1080
- }
1098
+ std::cerr << " R" << is_r1 + 1 << ' \n ' ;
1081
1099
#endif
1100
+ nams_pair[is_r1] = get_nams (record, index , statistics, details[is_r1], map_param, index_parameters, random_engine);
1101
+ }
1082
1102
1083
1103
Timer extend_timer;
1084
1104
if (map_param.output_format != OutputFormat::SAM) { // PAF or abundance
@@ -1182,40 +1202,7 @@ void align_or_map_single(
1182
1202
std::vector<double > &abundances
1183
1203
) {
1184
1204
Details details;
1185
- Timer strobe_timer;
1186
- auto query_randstrobes = randstrobes_query (record.seq , index_parameters);
1187
- statistics.n_randstrobes += query_randstrobes.size ();
1188
- statistics.tot_construct_strobemers += strobe_timer.duration ();
1189
-
1190
- // Find NAMs
1191
- Timer nam_timer;
1192
- auto [nonrepetitive_fraction, n_hits, nams] = find_nams (query_randstrobes, index , map_param.use_mcs );
1193
- statistics.tot_find_nams += nam_timer.duration ();
1194
- statistics.n_hits += n_hits;
1195
- details.nams = nams.size ();
1196
-
1197
- if (map_param.rescue_level > 1 && (nams.empty () || nonrepetitive_fraction < 0.7 )) {
1198
- Timer rescue_timer;
1199
- int n_rescue_hits;
1200
- std::tie (n_rescue_hits, nams) = find_nams_rescue (query_randstrobes, index , map_param.rescue_cutoff , map_param.use_mcs );
1201
- statistics.n_rescue_hits += n_rescue_hits;
1202
- details.rescue_nams = nams.size ();
1203
- details.nam_rescue = true ;
1204
- statistics.tot_time_rescue += rescue_timer.duration ();
1205
- }
1206
-
1207
- Timer nam_sort_timer;
1208
- std::sort (nams.begin (), nams.end (), by_score<Nam>);
1209
- shuffle_top_nams (nams, random_engine);
1210
- statistics.tot_sort_nams += nam_sort_timer.duration ();
1211
-
1212
- #ifdef TRACE
1213
- std::cerr << " Query: " << record.name << ' \n ' ;
1214
- std::cerr << " Found " << nams.size () << " NAMs\n " ;
1215
- for (auto & nam : nams) {
1216
- std::cerr << " - " << nam << ' \n ' ;
1217
- }
1218
- #endif
1205
+ std::vector<Nam> nams = get_nams (record, index , statistics, details, map_param, index_parameters, random_engine);
1219
1206
1220
1207
Timer extend_timer;
1221
1208
size_t n_best = 0 ;
0 commit comments