From be3673a16017dfb89b971c856c247df37ccd0deb Mon Sep 17 00:00:00 2001 From: AndreaGuarracino Date: Wed, 16 Jun 2021 17:04:43 +0200 Subject: [PATCH 1/3] use `stoull` to convert strings in uint64_t numbers --- src/paf.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/paf.cpp b/src/paf.cpp index 111dbd8..918f93c 100644 --- a/src/paf.cpp +++ b/src/paf.cpp @@ -7,16 +7,16 @@ paf_row_t::paf_row_t(const std::string& line) { std::vector fields; tokenize(line, fields, " \t"); query_sequence_name = fields[0]; - query_sequence_length = std::stol(fields[1]); - query_start = std::stol(fields[2]); - query_end = std::stol(fields[3]); + query_sequence_length = std::stoull(fields[1]); + query_start = std::stoull(fields[2]); + query_end = std::stoull(fields[3]); query_target_same_strand = (fields[4] == "+"); target_sequence_name = fields[5]; - target_sequence_length = std::stol(fields[6]); - target_start = std::stol(fields[7]); - target_end = std::stol(fields[8]); - num_matches = std::stol(fields[9]); - alignment_block_length = std::stol(fields[10]); + target_sequence_length = std::stoull(fields[6]); + target_start = std::stoull(fields[7]); + target_end = std::stoull(fields[8]); + num_matches = std::stoull(fields[9]); + alignment_block_length = std::stoull(fields[10]); mapping_quality = std::stoi(fields[11]); // find the cigar in the last fields for (size_t i = 12; i < fields.size(); ++i) { @@ -66,7 +66,7 @@ std::vector> parse_paf_spec(const std::string& std::vector fields; tokenize(file, fields, ":"); if (fields.size() == 2) { - parsed.push_back(std::make_pair(fields.front(), std::stol(fields.back()))); + parsed.push_back(std::make_pair(fields.front(), std::stoull(fields.back()))); } else if (fields.size() == 1) { parsed.push_back(std::make_pair(fields.front(), 0)); } From 52b86ee3421bfd2c17e58d0f62ff96659dac4518 Mon Sep 17 00:00:00 2001 From: AndreaGuarracino Date: Wed, 16 Jun 2021 17:35:07 +0200 Subject: [PATCH 2/3] Check if the coordinates are reasonable for each PAF row --- src/alignments.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/alignments.cpp b/src/alignments.cpp index 092155c..1a76715 100644 --- a/src/alignments.cpp +++ b/src/alignments.cpp @@ -15,8 +15,15 @@ void paf_worker( std::getline(paf_in, line); paf_more.store(paf_in.good()); paf_in_mutex.unlock(); + // Check if there is something to parse if (line.empty()) break; paf_row_t paf(line); + // Check if the coordinates are reasonable + if (paf.query_sequence_length == 0 || paf.target_sequence_length == 0 || + // Query/Target start (0-based; BED-like; closed) + paf.query_start >= paf.query_sequence_length || paf.query_end > paf.query_sequence_length || + // Query/Target end (0-based; BED-like; open) + paf.target_start >= paf.target_sequence_length || paf.target_end > paf.target_sequence_length) break; size_t query_idx = seqidx.rank_of_seq_named(paf.query_sequence_name); size_t query_len = seqidx.nth_seq_length(query_idx); size_t target_idx = seqidx.rank_of_seq_named(paf.target_sequence_name); From b6539542a342d98c89615189fc8656f774bb88ea Mon Sep 17 00:00:00 2001 From: AndreaGuarracino Date: Wed, 16 Jun 2021 17:42:32 +0200 Subject: [PATCH 3/3] query/target start has to be lower than query/target end too --- src/alignments.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/alignments.cpp b/src/alignments.cpp index 1a76715..a45af0a 100644 --- a/src/alignments.cpp +++ b/src/alignments.cpp @@ -21,9 +21,9 @@ void paf_worker( // Check if the coordinates are reasonable if (paf.query_sequence_length == 0 || paf.target_sequence_length == 0 || // Query/Target start (0-based; BED-like; closed) - paf.query_start >= paf.query_sequence_length || paf.query_end > paf.query_sequence_length || + paf.query_start >= paf.query_sequence_length || paf.query_end > paf.query_sequence_length || paf.query_start >= paf.query_end || // Query/Target end (0-based; BED-like; open) - paf.target_start >= paf.target_sequence_length || paf.target_end > paf.target_sequence_length) break; + paf.target_start >= paf.target_sequence_length || paf.target_end > paf.target_sequence_length || paf.target_start >= paf.target_end) break; size_t query_idx = seqidx.rank_of_seq_named(paf.query_sequence_name); size_t query_len = seqidx.nth_seq_length(query_idx); size_t target_idx = seqidx.rank_of_seq_named(paf.target_sequence_name);