Skip to content
This repository has been archived by the owner on Jul 17, 2023. It is now read-only.

Commit

Permalink
MANTA-1398 Address review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
x-chen committed Nov 7, 2018
1 parent e184f84 commit 296ad36
Show file tree
Hide file tree
Showing 17 changed files with 35 additions and 42 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
## Unreleased

### Added
- Add a configurable option to allow overlapping reads considered as evidence (MANTA-1398)
- Add a configurable option to allow overlapping pairs to be used as evidence (MANTA-1398)
- The option is available in the configure file configureManta.py.ini

## v1.4.0 - 2018-04-25

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ isLowQualitySpanningSVAlignment(
apath_limit_ref_length(maxQCRefSpan,apath);

const unsigned readSize(apath_read_length(apath));
const unsigned clipSize(apath_soft_clip_trail_size(apath));
const unsigned clipSize(apath_soft_clip_right_size(apath));

assert(clipSize <= readSize);

Expand Down Expand Up @@ -393,7 +393,7 @@ isLowQualitySmallSVAlignment(
}

const unsigned pathSize(apath_read_length(apath));
const unsigned clipSize(apath_soft_clip_trail_size(apath));
const unsigned clipSize(apath_soft_clip_right_size(apath));

assert(clipSize <= pathSize);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -265,11 +265,11 @@ realignPairedRead(
{
if (isLeftOfInsert)
{
clipSize=apath_soft_clip_trail_size(readPath);
clipSize=apath_soft_clip_right_size(readPath);
}
else
{
clipSize=apath_soft_clip_lead_size(readPath);
clipSize=apath_soft_clip_left_size(readPath);
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/c++/lib/blt_util/align_path.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ unalignedSuffixSize(const path_t& apath)


unsigned
apath_soft_clip_lead_size(const path_t& apath)
apath_soft_clip_left_size(const path_t& apath)
{
unsigned val(0);
for (const path_segment& ps : apath)
Expand All @@ -241,7 +241,7 @@ apath_soft_clip_lead_size(const path_t& apath)


unsigned
apath_soft_clip_trail_size(const path_t& apath)
apath_soft_clip_right_size(const path_t& apath)
{
unsigned val(0);
BOOST_REVERSE_FOREACH(const path_segment& ps, apath)
Expand Down
4 changes: 2 additions & 2 deletions src/c++/lib/blt_util/align_path.hh
Original file line number Diff line number Diff line change
Expand Up @@ -249,11 +249,11 @@ unalignedSuffixSize(const path_t& apath);

/// how much soft_clip occurs before the first aligned base?
unsigned
apath_soft_clip_lead_size(const path_t& apath);
apath_soft_clip_left_size(const path_t& apath);

/// how much soft_clip occurs after the last aligned base?
unsigned
apath_soft_clip_trail_size(const path_t& apath);
apath_soft_clip_right_size(const path_t& apath);

/// how much clip (soft or hard) occurs before the first aligned base?
unsigned
Expand Down
2 changes: 1 addition & 1 deletion src/c++/lib/blt_util/test/align_path_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ BOOST_AUTO_TEST_CASE( test_apath_clip_trail )
ALIGNPATH::path_t path;
cigar_to_apath(testCigar.c_str(), path);

BOOST_REQUIRE_EQUAL(apath_soft_clip_trail_size(path),3u);
BOOST_REQUIRE_EQUAL(apath_soft_clip_right_size(path),3u);
}

BOOST_AUTO_TEST_SUITE_END()
14 changes: 7 additions & 7 deletions src/c++/lib/htsapi/bam_record_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,14 +84,14 @@ is_adapter_pair(
const SimpleAlignment mate(getKnownOrFakedMateAlignment(bamRead));
if (aln.is_fwd_strand)
{
unsigned const endpos = aln.pos + apath_ref_length(aln.path) + apath_soft_clip_trail_size(aln.path);
unsigned const mateStartPos = mate.pos + apath_ref_length(mate.path) + apath_soft_clip_trail_size(mate.path);
unsigned const endpos = aln.pos + apath_ref_length(aln.path) + apath_soft_clip_right_size(aln.path);
unsigned const mateStartPos = mate.pos + apath_ref_length(mate.path) + apath_soft_clip_right_size(mate.path);
return (endpos > mateStartPos);
}
else
{
unsigned const endpos = aln.pos - apath_soft_clip_lead_size(aln.path);
unsigned const mateStartPos = mate.pos - apath_soft_clip_lead_size(mate.path);
unsigned const endpos = aln.pos - apath_soft_clip_left_size(aln.path);
unsigned const mateStartPos = mate.pos - apath_soft_clip_left_size(mate.path);
return (endpos < mateStartPos);
}
}
Expand All @@ -100,8 +100,8 @@ is_adapter_pair(
// If we do not have mate cigar information use an aggressive heuristic:
// if the read contains soft clip on the 3' end, it likely runs into adapter.
unsigned const softClipSize(aln.is_fwd_strand ?
apath_soft_clip_trail_size(aln.path) :
apath_soft_clip_lead_size(aln.path));
apath_soft_clip_right_size(aln.path) :
apath_soft_clip_left_size(aln.path));
return (softClipSize > 0);
}
}
Expand All @@ -116,7 +116,7 @@ is_overlapping_pair(
if (! is_mapped_chrom_pair(bamRead)) return false;
if (bamRead.is_fwd_strand() == bamRead.is_mate_fwd_strand()) return false;

static const int reverseOrientDist(bamRead.read_size());
const int reverseOrientDist(bamRead.read_size());
int posDiff(bamRead.pos() - bamRead.mate_pos());
if (! bamRead.is_fwd_strand())
{
Expand Down
8 changes: 0 additions & 8 deletions src/c++/lib/htsapi/bam_record_util.hh
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,6 @@ bool
is_innie_pair(
const bam_record& bam_read);

/// Rough heuristic to detect cases where paired-end reads overlap in such a way as to suggest
/// a possible unfiltered read into adapter sequence (assuming innie pairs).
/// Used as an aggressive check for candidate/hypothesis generation to avoid large number of
/// spurious small indels for short insert data that are derived from adapter k-mers
bool
is_possible_adapter_pair(
const bam_record& bamRead);

/// Based on pair alignment information (MC tag) if available, detect cases where a read extends
/// past the beginning of its mate, suggesting the presence of unfiltered adapter sequence
/// If MC information is not available, use a more aggressive heuristic (any 3' softclipping)
Expand Down
2 changes: 1 addition & 1 deletion src/c++/lib/manta/SVCandidateAssembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,7 @@ getBreakendReads(
unsigned leadingMismatchLen(0);
unsigned trailingMismatchLen(0);
getSVBreakendCandidateSemiAlignedSimple(bamRead, bamAlign, refSeq,
_scanOpt.isUseOverlappingPair,
_scanOpt.useOverlapPairEvidence,
leadingMismatchLen, trailingMismatchLen);

if (isSearchForRightOpen)
Expand Down
4 changes: 2 additions & 2 deletions src/c++/lib/manta/SVLocusScanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,7 @@ getSVCandidatesFromSemiAligned(
unsigned trailingMismatchLen(0);
pos_t leadingRefPos(0), trailingRefPos(0);
getSVBreakendCandidateSemiAligned(bamRead, bamAlign, refSeq,
opt.isUseOverlappingPair,
opt.useOverlapPairEvidence,
leadingMismatchLen, leadingRefPos,
trailingMismatchLen, trailingRefPos);

Expand Down Expand Up @@ -1609,7 +1609,7 @@ isSemiAlignedEvidence(
{
unsigned leadingMismatchLen(0), trailingMismatchLen(0);
getSVBreakendCandidateSemiAlignedSimple(bamRead, bamAlign, refSeq,
_opt.isUseOverlappingPair,
_opt.useOverlapPairEvidence,
leadingMismatchLen, trailingMismatchLen);
return ((leadingMismatchLen >= _opt.minSemiAlignedMismatchLen) || (trailingMismatchLen >= _opt.minSemiAlignedMismatchLen));
}
Expand Down
4 changes: 2 additions & 2 deletions src/c++/lib/manta/SVLocusScannerSemiAligned.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ getSVBreakendCandidateSemiAligned(
const bam_record& bamRead,
const SimpleAlignment& bamAlign,
const reference_contig_segment& refSeq,
const bool isUseOverlappingPair,
const bool useOverlapPairEvidence,
unsigned& leadingEdgePoorAlignmentLength,
pos_t& leadingEdgeRefPos,
unsigned& trailingEdgePoorAlignmentLength,
Expand All @@ -244,7 +244,7 @@ getSVBreakendCandidateSemiAligned(
const bool isOverlappingReadPair(is_overlapping_pair(bamRead, bamAlign));
if (isOverlappingReadPair)
{
if ((! isUseOverlappingPair) || (is_adapter_pair(bamRead))) return;
if ((! useOverlapPairEvidence) || (is_adapter_pair(bamRead))) return;
}

using namespace ALIGNPATH;
Expand Down
10 changes: 5 additions & 5 deletions src/c++/lib/manta/SVLocusScannerSemiAligned.hh
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
///
/// To do so the following steps are taken:
/// 1. Reads where there is suspected adaptor 'read-through' due to short fragments size are filtered out.
/// 2. Reads which overlap their mate read are optionally filtered out (per function argument \p isUseOverlappingPair).
/// 2. Reads which overlap their mate read are optionally filtered out (per function argument \p useOverlapPairEvidence).
/// 3. Any soft-clipped segments in the input reed are 'unrolled' to the match state.
/// 4. The length of poorly aligned sequence on each read edge is found.
/// 5. Cases where the entire read is poorly aligned are filtered out.
Expand Down Expand Up @@ -69,7 +69,7 @@
/// \param[in] bamAlign A simplified representation of the bamRead's alignment. This information can be derived from
/// bamRead but is provided as a (presumed) cache optimization.
/// \param[in] refSeq Local reference sequence
/// \param[in] isUseOverlappingPair When false, filter out read pairs which overlap.
/// \param[in] useOverlapPairEvidence When false, filter out read pairs which overlap.
/// \param[in] isAggressiveAdaptorCheck When true, filter out read pairs that might run into adapters based on an aggressive check
/// \param[out] leadingEdgePoorAlignmentLength Length of possible breakend-associated poor alignment on the read's leading edge.
/// \param[out] leadingEdgeRefPos The reference position of the first aligned base after the leading poorly aligned segment.
Expand All @@ -85,7 +85,7 @@ getSVBreakendCandidateSemiAligned(
const bam_record& bamRead,
const SimpleAlignment& bamAlign,
const reference_contig_segment& refSeq,
const bool isUseOverlappingPair,
const bool useOverlapPairEvidence,
unsigned& leadingEdgePoorAlignmentLength,
pos_t& leadingEdgeRefPos,
unsigned& trailingEdgePoorAlignmentLength,
Expand All @@ -104,14 +104,14 @@ getSVBreakendCandidateSemiAlignedSimple(
const bam_record& bamRead,
const SimpleAlignment& bamAlign,
const reference_contig_segment& refSeq,
const bool isUseOverlappingPair,
const bool useOverlapPairEvidence,
unsigned& leadingMismatchLen,
unsigned& trailingMismatchLen)
{
pos_t leadingRefPos(0), trailingRefPos(0);
getSVBreakendCandidateSemiAligned(
bamRead, bamAlign, refSeq,
isUseOverlappingPair,
useOverlapPairEvidence,
leadingMismatchLen, leadingRefPos,
trailingMismatchLen, trailingRefPos);
}
2 changes: 1 addition & 1 deletion src/c++/lib/options/ReadScannerOptions.hh
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ struct ReadScannerOptions
unsigned minSingletonMapqCandidates = 15;

// \brief If true, consider an overlapping read pair as evidence.
bool isUseOverlappingPair = false;
bool useOverlapPairEvidence = false;

/// \brief If true, do not treat reads with the 'proper pair' bit set as SV evidence.
///
Expand Down
2 changes: 1 addition & 1 deletion src/c++/lib/options/ReadScannerOptionsParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ getOptionsDescription(ReadScannerOptions& opt)
"Reads with MAPQ less than this value will be ignored")
("edge-prob", po::value(&opt.breakendEdgeQuantileProb)->default_value(opt.breakendEdgeQuantileProb),
"Breakend range associated with each read will trimmed to expected fragment quantile range [p,(1-p)], p: edge-prob")
("use-overlapping-pair", po::value(&opt.isUseOverlappingPair)->zero_tokens(),
("use-overlapping-pair", po::value(&opt.useOverlapPairEvidence)->zero_tokens(),
"Consider an overlapping read pair as evidence")
("ignore-anom-proper-pair", po::value(&opt.isIgnoreAnomProperPair)->zero_tokens(),
"Disregard anomalous fragment sizes if the BAM record has the proper pair bit set. "
Expand Down
2 changes: 1 addition & 1 deletion src/c++/lib/test/testSVLocusScanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ buildTestSVLocusScanner(
{
ReadScannerOptions opts = ReadScannerOptions();
opts.minCandidateVariantSize = minCandidateVariantSizeInput;
opts.isUseOverlappingPair = isRNA;
opts.useOverlapPairEvidence = isRNA;
const ReadScannerOptions& constRefOpts(opts);

TestStatsFileMaker statsFile;
Expand Down
2 changes: 1 addition & 1 deletion src/python/bin/configManta.py.ini
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,4 @@ enableRemoteReadRetrievalForInsertionsInCancerCallingModes = 0

# Set if an overlapping read pair will be considered as evidence
# Set to 0 to skip overlapping read pairs
isUseOverlappingPair = 0
useOverlapPairEvidence = 0
6 changes: 3 additions & 3 deletions src/python/lib/mantaWorkflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,7 +581,7 @@ def isEnableRemoteReadRetrieval() :
if self.params.isIgnoreAnomProperPair :
hygenCmd.append("--ignore-anom-proper-pair")

if self.params.isUseOverlappingPair:
if self.params.useOverlapPairEvidence:
hygenCmd.append("--use-overlapping-pair")

if self.params.isRNA :
Expand Down Expand Up @@ -807,7 +807,7 @@ def __init__(self,params) :
# normalize boolean option input:
safeSetBool(self.params,"enableRemoteReadRetrievalForInsertionsInGermlineCallingModes")
safeSetBool(self.params,"enableRemoteReadRetrievalForInsertionsInCancerCallingModes")
safeSetBool(self.params,"isUseOverlappingPair")
safeSetBool(self.params,"useOverlapPairEvidence")

# Use RNA option for minCandidate size
if self.params.isRNA:
Expand Down Expand Up @@ -857,7 +857,7 @@ def __init__(self,params) :

# always use overlapping pairs for RNA calling
if (self.params.isRNA) :
self.params.isUseOverlappingPair = True
self.params.useOverlapPairEvidence = True


def setCallMemMb(self) :
Expand Down

0 comments on commit 296ad36

Please sign in to comment.