Skip to content
This repository has been archived by the owner on Jul 17, 2023. It is now read-only.

Commit

Permalink
[MANTA-287]
Browse files Browse the repository at this point in the history
add the unit test of consistency of supporting reads
  • Loading branch information
x-chen committed Apr 12, 2016
1 parent f32702b commit 2ac0b3c
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 47 deletions.
89 changes: 44 additions & 45 deletions src/c++/lib/assembly/SmallAssembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@


// compile with this macro to get verbose output:
#define DEBUG_ASBL
//#define DEBUG_ASBL


// stream used by DEBUG_ASBL:
Expand Down Expand Up @@ -183,32 +183,32 @@ walk(const SmallAssemblerOptions& opt,
contig.seq = seed;

// collecting rejecting reads for the seed from the unselected branches
for (const char symbol : opt.alphabet)
{
// the seed itself
if (symbol == seed[wordLength-1]) continue;
for (const char symbol : opt.alphabet)
{
// the seed itself
if (symbol == seed[wordLength-1]) continue;

// add rejecting reads from an unselected word/branch
const std::string tmpBack = getEnd(seed, wordLength-1, false);
const std::string newKey(addBase(tmpBack, symbol, true));
// add rejecting reads from an unselected word/branch
const std::string tmpBack = getEnd(seed, wordLength-1, false);
const std::string newKey(addBase(tmpBack, symbol, true));
#ifdef DEBUG_ASBL
log_os << "Extending end backwords: base " << symbol << " " << newKey << "\n";
log_os << "Extending end backwords: base " << symbol << " " << newKey << "\n";
#endif

wordReadsIter= wordReads.find(newKey);
if (wordReadsIter == wordReadsEnd) continue;
const std::set<unsigned>& unselectedReads(wordReadsIter->second);
wordReadsIter= wordReads.find(newKey);
if (wordReadsIter == wordReadsEnd) continue;
const std::set<unsigned>& unselectedReads(wordReadsIter->second);
#ifdef DEBUG_ASBL
log_os << "Supporting reads for the backwards word : ";
print_readSet(unselectedReads);
log_os << "Supporting reads for the backwards word : ";
print_readSet(unselectedReads);
#endif

contig.rejectReads.insert(unselectedReads.begin(), unselectedReads.end());
contig.rejectReads.insert(unselectedReads.begin(), unselectedReads.end());
#ifdef DEBUG_ASBL
log_os << "seed's rejecting reads : ";
print_readSet(contig.rejectReads);
log_os << "seed's rejecting reads : ";
print_readSet(contig.rejectReads);
#endif
}
}

seenEdgeBefore.clear();
seenEdgeBefore.insert(seed);
Expand All @@ -224,8 +224,8 @@ walk(const SmallAssemblerOptions& opt,

while (true)
{
const std::string previousWord = getEnd(contig.seq, wordLength, isEnd);
const std::string trunk(getEnd(contig.seq, wordLength-1, isEnd));
const std::string previousWord = getEnd(contig.seq, wordLength, isEnd);
const std::string trunk(getEnd(contig.seq, wordLength-1, isEnd));

#ifdef DEBUG_ASBL
log_os << "# current contig : " << contig.seq << " size : " << contig.seq.size() << "\n"
Expand Down Expand Up @@ -345,35 +345,35 @@ walk(const SmallAssemblerOptions& opt,

// TODO: can add threshold for the count or percentage of shared reads
{
// walk backwards for one step at a branching point
if (maxWordReads != previousWordReads)
{
const char tmpSymbol = (isEnd? previousWord[0] : previousWord[wordLength-1]);
for (const char symbol : opt.alphabet)
{
// the selected branch
if (symbol == tmpSymbol) continue;
// walk backwards for one step at a branching point
if (maxWordReads != previousWordReads)
{
const char tmpSymbol = (isEnd? previousWord[0] : previousWord[wordLength-1]);
for (const char symbol : opt.alphabet)
{
// the selected branch
if (symbol == tmpSymbol) continue;

// add rejecting reads from an unselected branch
const std::string newKey(addBase(trunk, symbol, !isEnd));
// add rejecting reads from an unselected branch
const std::string newKey(addBase(trunk, symbol, !isEnd));
#ifdef DEBUG_ASBL
log_os << "Extending end backwords: base " << symbol << " " << newKey << "\n";
log_os << "Extending end backwords: base " << symbol << " " << newKey << "\n";
#endif
wordReadsIter= wordReads.find(newKey);
if (wordReadsIter == wordReadsEnd) continue;
const std::set<unsigned>& backWordReads(wordReadsIter->second);
wordReadsIter= wordReads.find(newKey);
if (wordReadsIter == wordReadsEnd) continue;
const std::set<unsigned>& backWordReads(wordReadsIter->second);
#ifdef DEBUG_ASBL
log_os << "Supporting reads for the backwards word : ";
print_readSet(backWordReads);
log_os << "Supporting reads for the backwards word : ";
print_readSet(backWordReads);
#endif
rejectReads2Add.insert(backWordReads.begin(), backWordReads.end());
rejectReads2Add.insert(backWordReads.begin(), backWordReads.end());
#ifdef DEBUG_ASBL
log_os << "rejectReads2Add upated : ";
print_readSet(rejectReads2Add);
log_os << "rejectReads2Add upated : ";
print_readSet(rejectReads2Add);
#endif
}
}
previousWordReads = maxWordReads;
}
}
previousWordReads = maxWordReads;

#ifdef DEBUG_ASBL
log_os << "Adding rejecting reads " << "\n"
Expand Down Expand Up @@ -604,13 +604,12 @@ buildContigs(

while (! maxWords.empty())
{
maxWord=(*maxWords.begin());
maxWords.erase(maxWords.begin());
#ifdef DEBUG_ASBL
log_os << logtag << "Seeding kmer : " << maxWord << "\n";
#endif

maxWord=(*maxWords.begin());
maxWords.erase(maxWords.begin());

AssembledContig newContig;
walk(opt, maxWord, wordLength, wordCount, wordSupportReads, seenEdgeBefore, newContig);

Expand Down
52 changes: 52 additions & 0 deletions src/c++/lib/assembly/test/SmallAssemblerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,5 +102,57 @@ BOOST_AUTO_TEST_CASE( test_PoisonRead )
}



BOOST_AUTO_TEST_CASE( test_supportingReadConsistency )
{
// test against observed case where a single bad read could kill the whole assembly

SmallAssemblerOptions assembleOpt;

assembleOpt.minWordLength = 6;
assembleOpt.maxWordLength = 6;
assembleOpt.minCoverage = 2;
assembleOpt.minSeedReads = 3;

AssemblyReadInput reads;
reads.push_back( "AAACGTGTATTA");
reads.push_back( "ACGTGTATTACC");
reads.push_back( "CGTGTATTACCT");
reads.push_back( "GTGTATTACCTA");
reads.push_back( "ATTACCTAGTAC");
reads.push_back( "TACCTAGTACTC");
// the above reads build a contig ACGTG TATTACC TAGTAC
//
// Notice ACGTG should not be extended by adding 'A' to the left => AACGTG
// using the reads below, because they have a different suffix after ACGTG *GCC*
// Instead, the reads below build a contig CTTA GCTA ACGTG GCC
reads.push_back("CCCTTAGCTAAC");
reads.push_back( "CTTAGCTAACGT");
reads.push_back( "TAGCTAACGTGG");
reads.push_back( "GCTAACGTGGCC");
reads.push_back( "AACGTGGCCTAG");


AssemblyReadOutput readInfo;
Assembly contigs;

runSmallAssembler(assembleOpt, reads, readInfo, contigs);

BOOST_REQUIRE_EQUAL(contigs.size(),2u);
BOOST_REQUIRE_EQUAL(contigs[0].seq,"AACGTGTATTACCTAGTAC");
BOOST_REQUIRE_EQUAL(contigs[1].seq,"CTTAGCTAACGTGGCC");
for (unsigned i(0); i<6; ++i)
{
BOOST_REQUIRE(readInfo[i].isUsed);
BOOST_REQUIRE_EQUAL(readInfo[i].contigId,0u);
}

for (unsigned i(6); i<11; ++i)
{
BOOST_REQUIRE(readInfo[i].isUsed);
BOOST_REQUIRE_EQUAL(readInfo[i].contigId,1u);
}
}

BOOST_AUTO_TEST_SUITE_END()

2 changes: 1 addition & 1 deletion src/c++/lib/manta/SVCandidateAssembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
#include <iostream>

//#define DEBUG_REMOTES
#define DEBUG_ASBL
//#define DEBUG_ASBL


static
Expand Down
2 changes: 1 addition & 1 deletion src/c++/lib/svgraph/SVLocusNode.hh
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ typedef unsigned NodeIndexType;
#if 0
class customConstEdgeIterator
: public boost::iterator_adaptor<
customConstEdgeIterator // Derived
customConstEdgeIterator // Derived
, Finite_vertices_iterator // Base
, Vertex_handle // Value
, boost::forward_traversal_tag // Traversal type
Expand Down

0 comments on commit 2ac0b3c

Please sign in to comment.