Skip to content
This repository has been archived by the owner on Jul 17, 2023. It is now read-only.

Commit

Permalink
MANTA-1398 Add a configurable option for overlapping reads
Browse files Browse the repository at this point in the history
  • Loading branch information
x-chen committed Nov 7, 2018
1 parent a7011c8 commit e184f84
Show file tree
Hide file tree
Showing 14 changed files with 39 additions and 30 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
## Unreleased

### Added
- Add a configurable option to allow overlapping reads considered as evidence (MANTA-1398)

## v1.4.0 - 2018-04-25

This is a major bugfix update from v1.3.2, featuring improved precision and vcf representation, in addition to minor user friendly improvements.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ runGSC(
EdgeRuntimeTracker edgeTracker(opt.edgeRuntimeFilename);
GSCEdgeStatsManager edgeStatMan(opt.edgeStatsFilename);

const SVLocusScanner readScanner(opt.scanOpt, opt.statsFilename, opt.alignFileOpt.alignmentFilenames, opt.isRNA, !opt.isUnstrandedRNA);
const SVLocusScanner readScanner(opt.scanOpt, opt.statsFilename, opt.alignFileOpt.alignmentFilenames, !opt.isUnstrandedRNA);

SVFinder svFind(opt, readScanner, edgeTracker,edgeStatMan);
MultiJunctionFilter svMJFilter(opt,edgeStatMan);
Expand Down
4 changes: 2 additions & 2 deletions src/c++/lib/htsapi/bam_record_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,12 @@ is_adapter_pair(
if (aln.is_fwd_strand)
{
unsigned const endpos = aln.pos + apath_ref_length(aln.path) + apath_soft_clip_trail_size(aln.path);
unsigned const mateStartPos = mate.pos + apath_ref_length(mate.path) + apath_soft_clip_lead_size(mate.path);
unsigned const mateStartPos = mate.pos + apath_ref_length(mate.path) + apath_soft_clip_trail_size(mate.path);
return (endpos > mateStartPos);
}
else
{
unsigned const endpos = aln.pos - apath_soft_clip_trail_size(aln.path);
unsigned const endpos = aln.pos - apath_soft_clip_lead_size(aln.path);
unsigned const mateStartPos = mate.pos - apath_soft_clip_lead_size(mate.path);
return (endpos < mateStartPos);
}
Expand Down
2 changes: 1 addition & 1 deletion src/c++/lib/manta/SVCandidateAssembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,7 @@ getBreakendReads(
unsigned leadingMismatchLen(0);
unsigned trailingMismatchLen(0);
getSVBreakendCandidateSemiAlignedSimple(bamRead, bamAlign, refSeq,
_readScanner.isUseOverlappingPairs(),
_scanOpt.isUseOverlappingPair,
leadingMismatchLen, trailingMismatchLen);

if (isSearchForRightOpen)
Expand Down
7 changes: 3 additions & 4 deletions src/c++/lib/manta/SVLocusScanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,7 @@ getSVCandidatesFromSemiAligned(
unsigned trailingMismatchLen(0);
pos_t leadingRefPos(0), trailingRefPos(0);
getSVBreakendCandidateSemiAligned(bamRead, bamAlign, refSeq,
dopt.isUseOverlappingPairs,
opt.isUseOverlappingPair,
leadingMismatchLen, leadingRefPos,
trailingMismatchLen, trailingRefPos);

Expand Down Expand Up @@ -1429,10 +1429,9 @@ SVLocusScanner(
const ReadScannerOptions& opt,
const std::string& statsFilename,
const std::vector<std::string>& /*alignmentFilename*/,
const bool isRNA,
const bool isTranscriptStrandKnown) :
_opt(opt),
_dopt(opt, isRNA, isTranscriptStrandKnown)
_dopt(opt, isTranscriptStrandKnown)
{
using namespace illumina::common;

Expand Down Expand Up @@ -1610,7 +1609,7 @@ isSemiAlignedEvidence(
{
unsigned leadingMismatchLen(0), trailingMismatchLen(0);
getSVBreakendCandidateSemiAlignedSimple(bamRead, bamAlign, refSeq,
_dopt.isUseOverlappingPairs,
_opt.isUseOverlappingPair,
leadingMismatchLen, trailingMismatchLen);
return ((leadingMismatchLen >= _opt.minSemiAlignedMismatchLen) || (trailingMismatchLen >= _opt.minSemiAlignedMismatchLen));
}
Expand Down
13 changes: 0 additions & 13 deletions src/c++/lib/manta/SVLocusScanner.hh
Original file line number Diff line number Diff line change
Expand Up @@ -83,23 +83,17 @@ struct ReadScannerDerivOptions
{
ReadScannerDerivOptions(
const ReadScannerOptions& opt,
const bool isRNA,
const bool initIsTranscriptStrandKnown) :
isSmallCandidates(opt.minCandidateVariantSize<=opt.maxCandidateSizeForLocalAssmEvidence),
beforeBreakend(opt.minPairBreakendSize/2),
afterBreakend(opt.minPairBreakendSize-beforeBreakend),
isUseOverlappingPairs(isRNA),
isTranscriptStrandKnown(initIsTranscriptStrandKnown)
{}

const bool isSmallCandidates;
const pos_t beforeBreakend;
const pos_t afterBreakend;

/// TODO standardize the overlapping pair treatment to be the same for DNA/RNA modes, then
/// remove this bit:
const bool isUseOverlappingPairs;

/// \brief True if running in RNA-Seq mode with a stranded input RNA assay
const bool isTranscriptStrandKnown;
};
Expand All @@ -119,7 +113,6 @@ struct SVLocusScanner
const ReadScannerOptions& opt,
const std::string& statsFilename,
const std::vector<std::string>& alignmentFilename,
const bool isRNA,
const bool isTranscriptStrandKnown = false);

/// QC check if the read length implied by cigar string matches the length of read sequence
Expand Down Expand Up @@ -317,12 +310,6 @@ struct SVLocusScanner
LinearScaler<int> largeEventRegionScaler;
};

bool
isUseOverlappingPairs() const
{
return _dopt.isUseOverlappingPairs;
}

private:

/// \brief Classify read pair fragment sizes into a pre-defined size category.
Expand Down
4 changes: 2 additions & 2 deletions src/c++/lib/manta/SVLocusScannerSemiAligned.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ getSVBreakendCandidateSemiAligned(
const bam_record& bamRead,
const SimpleAlignment& bamAlign,
const reference_contig_segment& refSeq,
const bool isUseOverlappingPairs,
const bool isUseOverlappingPair,
unsigned& leadingEdgePoorAlignmentLength,
pos_t& leadingEdgeRefPos,
unsigned& trailingEdgePoorAlignmentLength,
Expand All @@ -244,7 +244,7 @@ getSVBreakendCandidateSemiAligned(
const bool isOverlappingReadPair(is_overlapping_pair(bamRead, bamAlign));
if (isOverlappingReadPair)
{
if ((! isUseOverlappingPairs) || (is_adapter_pair(bamRead))) return;
if ((! isUseOverlappingPair) || (is_adapter_pair(bamRead))) return;
}

using namespace ALIGNPATH;
Expand Down
10 changes: 5 additions & 5 deletions src/c++/lib/manta/SVLocusScannerSemiAligned.hh
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
///
/// To do so the following steps are taken:
/// 1. Reads where there is suspected adaptor 'read-through' due to short fragments size are filtered out.
/// 2. Reads which overlap their mate read are optionally filtered out (per function argument \p isUseOverlappingPairs).
/// 2. Reads which overlap their mate read are optionally filtered out (per function argument \p isUseOverlappingPair).
/// 3. Any soft-clipped segments in the input reed are 'unrolled' to the match state.
/// 4. The length of poorly aligned sequence on each read edge is found.
/// 5. Cases where the entire read is poorly aligned are filtered out.
Expand Down Expand Up @@ -69,7 +69,7 @@
/// \param[in] bamAlign A simplified representation of the bamRead's alignment. This information can be derived from
/// bamRead but is provided as a (presumed) cache optimization.
/// \param[in] refSeq Local reference sequence
/// \param[in] isUseOverlappingPairs When false, filter out read pairs which overlap.
/// \param[in] isUseOverlappingPair When false, filter out read pairs which overlap.
/// \param[in] isAggressiveAdaptorCheck When true, filter out read pairs that might run into adapters based on an aggressive check
/// \param[out] leadingEdgePoorAlignmentLength Length of possible breakend-associated poor alignment on the read's leading edge.
/// \param[out] leadingEdgeRefPos The reference position of the first aligned base after the leading poorly aligned segment.
Expand All @@ -85,7 +85,7 @@ getSVBreakendCandidateSemiAligned(
const bam_record& bamRead,
const SimpleAlignment& bamAlign,
const reference_contig_segment& refSeq,
const bool isUseOverlappingPairs,
const bool isUseOverlappingPair,
unsigned& leadingEdgePoorAlignmentLength,
pos_t& leadingEdgeRefPos,
unsigned& trailingEdgePoorAlignmentLength,
Expand All @@ -104,14 +104,14 @@ getSVBreakendCandidateSemiAlignedSimple(
const bam_record& bamRead,
const SimpleAlignment& bamAlign,
const reference_contig_segment& refSeq,
const bool isUseOverlappingPairs,
const bool isUseOverlappingPair,
unsigned& leadingMismatchLen,
unsigned& trailingMismatchLen)
{
pos_t leadingRefPos(0), trailingRefPos(0);
getSVBreakendCandidateSemiAligned(
bamRead, bamAlign, refSeq,
isUseOverlappingPairs,
isUseOverlappingPair,
leadingMismatchLen, leadingRefPos,
trailingMismatchLen, trailingRefPos);
}
3 changes: 1 addition & 2 deletions src/c++/lib/manta/test/SVLocusScannerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -334,10 +334,9 @@ BOOST_AUTO_TEST_CASE( test_SemiAlignedReads )

BOOST_AUTO_TEST_CASE( test_getSVCandidatesFromReadIndels )
{
const bool isRNA(false);
const bool isStranded(true);
const ReadScannerOptions opt;
const ReadScannerDerivOptions dopt(opt,isRNA,isStranded);
const ReadScannerDerivOptions dopt(opt,isStranded);

ALIGNPATH::path_t inputPath;
cigar_to_apath("100M2000D100M",inputPath);
Expand Down
3 changes: 3 additions & 0 deletions src/c++/lib/options/ReadScannerOptions.hh
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ struct ReadScannerOptions
/// \brief Minimum mapping quality for shadow mate used for candidate assembly and scoring
unsigned minSingletonMapqCandidates = 15;

// \brief If true, consider an overlapping read pair as evidence.
bool isUseOverlappingPair = false;

/// \brief If true, do not treat reads with the 'proper pair' bit set as SV evidence.
///
/// This is typically set true for RNA-Seq analysis, where proper-pair is used to signal intron-spanning pairs.
Expand Down
2 changes: 2 additions & 0 deletions src/c++/lib/options/ReadScannerOptionsParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ getOptionsDescription(ReadScannerOptions& opt)
"Reads with MAPQ less than this value will be ignored")
("edge-prob", po::value(&opt.breakendEdgeQuantileProb)->default_value(opt.breakendEdgeQuantileProb),
"Breakend range associated with each read will trimmed to expected fragment quantile range [p,(1-p)], p: edge-prob")
("use-overlapping-pair", po::value(&opt.isUseOverlappingPair)->zero_tokens(),
"Consider an overlapping read pair as evidence")
("ignore-anom-proper-pair", po::value(&opt.isIgnoreAnomProperPair)->zero_tokens(),
"Disregard anomalous fragment sizes if the BAM record has the proper pair bit set. "
"This flag is typically set for RNA-SEQ analysis where the proper-pair bit is used to indicate an intron-spanning read pair.")
Expand Down
1 change: 1 addition & 0 deletions src/c++/lib/test/testSVLocusScanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ buildTestSVLocusScanner(
{
ReadScannerOptions opts = ReadScannerOptions();
opts.minCandidateVariantSize = minCandidateVariantSizeInput;
opts.isUseOverlappingPair = isRNA;
const ReadScannerOptions& constRefOpts(opts);

TestStatsFileMaker statsFile;
Expand Down
4 changes: 4 additions & 0 deletions src/python/bin/configManta.py.ini
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,7 @@ minPassSomaticScore = 30
# all other calling modes.
enableRemoteReadRetrievalForInsertionsInGermlineCallingModes = 1
enableRemoteReadRetrievalForInsertionsInCancerCallingModes = 0

# Set if an overlapping read pair will be considered as evidence
# Set to 0 to skip overlapping read pairs
isUseOverlappingPair = 0
9 changes: 9 additions & 0 deletions src/python/lib/mantaWorkflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -580,6 +580,10 @@ def isEnableRemoteReadRetrieval() :

if self.params.isIgnoreAnomProperPair :
hygenCmd.append("--ignore-anom-proper-pair")

if self.params.isUseOverlappingPair:
hygenCmd.append("--use-overlapping-pair")

if self.params.isRNA :
hygenCmd.append("--rna")
if self.params.isUnstrandedRNA :
Expand Down Expand Up @@ -803,6 +807,7 @@ def __init__(self,params) :
# normalize boolean option input:
safeSetBool(self.params,"enableRemoteReadRetrievalForInsertionsInGermlineCallingModes")
safeSetBool(self.params,"enableRemoteReadRetrievalForInsertionsInCancerCallingModes")
safeSetBool(self.params,"isUseOverlappingPair")

# Use RNA option for minCandidate size
if self.params.isRNA:
Expand Down Expand Up @@ -850,6 +855,10 @@ def __init__(self,params) :
self.params.isHighDepthFilter = (not (self.params.isExome or self.params.isRNA))
self.params.isIgnoreAnomProperPair = (self.params.isRNA)

# always use overlapping pairs for RNA calling
if (self.params.isRNA) :
self.params.isUseOverlappingPair = True


def setCallMemMb(self) :
"Setup default task memory requirements"
Expand Down

0 comments on commit e184f84

Please sign in to comment.