Skip to content

Commit

Permalink
Some fixes and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
mikessh committed Mar 18, 2015
1 parent aa25be8 commit 3bab422
Show file tree
Hide file tree
Showing 17 changed files with 175,730 additions and 296 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
*.class
target/
.idea/
*.iml
*.iml
test_output/
7 changes: 1 addition & 6 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1 @@
secure: "Z9/A+tvE+9H+xTyEvqpbpmpeqgKr/fTGGV+YsR8t1/fxe/VVoD2vRvQ2levPLZSbU17nqcDVNdwypntox5yfMZ2L2+kiY4KAjo69xGLNDi4/X1o5oRfj7P1Hwh/ldVRyTZsVaCNK25pJ5K8idOWb3DKfjuCsF+tIsjeOH5/E5f8="
language: java
install:
- mvn install -DskipTests=true -Dgpg.skip=true -q
after_success:
- mvn clean cobertura:cobertura coveralls:report -Dcoveralls.token=${COVERALLS_TOKEN} -q
language: java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import cc.redberry.pipe.Processor;
import com.milaboratory.core.sequence.mutations.Mutations;
import com.milaboratory.core.sequence.nucleotide.NucleotideAlphabet;
import com.milaboratory.oncomigec.ReadSpecific;
import com.milaboratory.oncomigec.core.PipelineBlock;
import com.milaboratory.oncomigec.core.align.processor.Aligner;
Expand Down Expand Up @@ -79,7 +80,7 @@ public AlignerReferenceLibrary getAlignerReferenceLibrary() {
public int getAlignedMigs() {
return alignedMigs.get();
}

public int getSkippedMigs() {
return skippedMigs.get();
}
Expand All @@ -96,25 +97,36 @@ public int getChimericMigs() {
public String getHeader() {
String header = "reference\tpos", subst = "", ins = "", del = "";

for (byte i = 0; i < 4; i++) {
char symbol = NucleotideAlphabet.INSTANCE.symbolFromCode(i);
subst += "\t" + symbol;
ins += "\tI:" + symbol;
del += "\tD:" + symbol;
}

return header + subst + ins + del;
}

@Override
public String getBody() {
StringBuilder stringBuilder = new StringBuilder();
for (Reference reference : alignerReferenceLibrary.getReferenceLibrary().getReferences()) {
for (int i = 0; i < reference.getSequence().size(); i++) {
stringBuilder.append(reference.getFullName()).append("\t").
append(i + 1);
StringBuilder subst = new StringBuilder(), ins = new StringBuilder(), del = new StringBuilder();
for (byte j = 0; j < 4; j++) {
int insCode = Mutations.createInsertion(i, j), delCode = Mutations.createDeletion(i, j);
MutationsAndCoverage mutationsAndCoverage = alignerReferenceLibrary.getMutationsAndCoverage(reference);
subst.append("\t").append(mutationsAndCoverage.getMajorNucleotideMigCount(i, j));
ins.append("\t").append(mutationsAndCoverage.getMajorIndelMigCount(insCode));
del.append("\t").append(mutationsAndCoverage.getMajorIndelMigCount(delCode));
MutationsAndCoverage mutationsAndCoverage = alignerReferenceLibrary.getMutationsAndCoverage(reference);

if (mutationsAndCoverage.wasUpdated()) {
for (int i = 0; i < reference.getSequence().size(); i++) {
stringBuilder.append(reference.getFullName()).append("\t").
append(i + 1);
StringBuilder subst = new StringBuilder(), ins = new StringBuilder(), del = new StringBuilder();
for (byte j = 0; j < 4; j++) {
int insCode = Mutations.createInsertion(i, j), delCode = Mutations.createDeletion(i, j);

subst.append("\t").append(mutationsAndCoverage.getMajorNucleotideMigCount(i, j));
ins.append("\t").append(mutationsAndCoverage.getMajorIndelMigCount(insCode));
del.append("\t").append(mutationsAndCoverage.getMajorIndelMigCount(delCode));
}
stringBuilder.append(subst).append(ins).append(del).append("\n");
}
stringBuilder.append(subst).append(ins).append(del).append("\n");
}
}
return stringBuilder.toString();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ public CorrectedConsensus correct(AlignedConsensus alignedConsensus) {
// Check if we've covered all holes in the reference, discard otherwise
if (mustHaveMutationsCount < mutationFilter.getMustHaveMutationsCount())
return null;

// Collect corrected mutations
totalMutations.append(mutations);

Expand All @@ -127,36 +127,42 @@ public CorrectorReferenceLibrary getCorrectorReferenceLibrary() {

@Override
public String getHeader() {
String subst = "", substP = "";
String subst = "", substP = "", substV = "";
for (byte i = 0; i < 4; i++) {
char bp = NucleotideAlphabet.INSTANCE.symbolFromCode(i);
subst += "\t" + bp;
substP += "\t" + bp + ".prob";
substV += "\t" + bp + ".varinat";
}
return "reference\tpos\thas.reference\tgood.coverage\tgood.quality\t" +
subst + substP;
return "reference\tpos\thas.reference\tgood.coverage\tgood.quality" +
subst + substP + substV;
}

@Override
public String getBody() {
StringBuilder stringBuilder = new StringBuilder();
for (Reference reference : correctorReferenceLibrary.getReferenceLibrary().getReferences()) {
MutationFilter mutationFilter = correctorReferenceLibrary.getMutationFilter(reference);
for (int i = 0; i < reference.getSequence().size(); i++) {
stringBuilder.append(reference.getFullName()).append("\t").
append(i).append("\t").
append(mutationFilter.hasReference(i)).append("\t").
append(mutationFilter.goodCoverage(i)).append("\t").
append(mutationFilter.goodQuality(i)).append("\t");

for (byte j = 0; j < 4; j++) {
stringBuilder.append("\t").append(correctorReferenceLibrary.getMajorCount(reference, i, j));
}
for (byte j = 0; j < 4; j++) {
stringBuilder.append("\t").append(1.0 - correctorReferenceLibrary.getPValue(reference, i, j));
}
if (mutationFilter.updated()) {
for (int i = 0; i < reference.getSequence().size(); i++) {
stringBuilder.append(reference.getFullName()).append("\t").
append(i).append("\t").
append(mutationFilter.hasReference(i) ? 1 : 0).append("\t").
append(mutationFilter.goodCoverage(i) ? 1 : 0).append("\t").
append(mutationFilter.goodQuality(i) ? 1 : 0);

for (byte j = 0; j < 4; j++) {
stringBuilder.append("\t").append(correctorReferenceLibrary.getMajorCount(reference, i, j));
}
for (byte j = 0; j < 4; j++) {
stringBuilder.append("\t").append(1.0 - correctorReferenceLibrary.getPValue(reference, i, j));
}
for (byte j = 0; j < 4; j++) {
stringBuilder.append("\t").append(mutationFilter.hasSubstitution(i, j) ? 1 : 0);
}

stringBuilder.append("\n");
stringBuilder.append("\n");
}
}
}
return stringBuilder.toString();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,25 +78,28 @@ public CorrectorReferenceLibrary(AlignerReferenceLibrary alignerReferenceLibrary
}

private void init() {
Collection<Reference> skippedReferences = new LinkedList<>();

for (Reference reference : references) {
MutationsAndCoverage mutationsAndCoverage =
alignerReferenceLibrary.getMutationsAndCoverage(reference);

if (mutationsAndCoverage.wasUpdated()) {
int n = reference.getSequence().size();
int numberOfMigs = mutationsAndCoverage.getMigCount();
boolean[][] substitutionsByPosition = new boolean[n][4];
int[][] majorSubstitutionCounts = new int[n][4];
int[] majorInsertionCounts = new int[n], majorDeletionCounts = new int[n];
double[][] majorSubstitutionPvalues = new double[n][4];
double[] majorInsertionPvalues = new double[n], majorDeletionPvalues = new double[n];
boolean[] referencePresenceByPosition = new boolean[n],
coverageFilterByPosition = new boolean[n],
qualityFilterByPosition = new boolean[n];

int nMustHaveMutations = 0, nBadBases = 0;
int n = reference.getSequence().size();
int numberOfMigs = mutationsAndCoverage.getMigCount();
boolean[][] substitutionsByPosition = new boolean[n][4];
int[][] majorSubstitutionCounts = new int[n][4];
int[] majorInsertionCounts = new int[n], majorDeletionCounts = new int[n];
double[][] majorSubstitutionPvalues = new double[n][4];
double[] majorInsertionPvalues = new double[n], majorDeletionPvalues = new double[n];
boolean[] referencePresenceByPosition = new boolean[n],
coverageFilterByPosition = new boolean[n],
qualityFilterByPosition = new boolean[n];

int nMustHaveMutations = 0, nBadBases = 0;

boolean good = false, updated = mutationsAndCoverage.wasUpdated();

Set<Integer> indels = new HashSet<>();

if (updated) {

final VariantContainer variantContainer = variantLibrary.getVariantContainer(reference);

Expand Down Expand Up @@ -156,12 +159,12 @@ private void init() {
}

// Is sufficiently covered?
boolean good = (nBadBases / (double) n) <= maxBasePairsMaskedRatio &&
good = (nBadBases / (double) n) <= maxBasePairsMaskedRatio &&
mutationsAndCoverage.getMigCount() >= minMigCount;

// Finally deal with INDELS
// NOTE completely frequency-based for now
Set<Integer> indels = new HashSet<>();
indels = new HashSet<>();
for (Integer indel : mutationsAndCoverage.getMajorIndelCodes()) {
int majorCount = mutationsAndCoverage.getMajorIndelMigCount(indel);
boolean isDeletion = Mutations.isDeletion(indel);
Expand All @@ -182,24 +185,22 @@ private void init() {
majorInsertionPvalues[pos] = Double.NaN;
}

// Mutation filter -

mutationFilterByReference.put(reference, new MutationFilter(substitutionsByPosition,
referencePresenceByPosition, qualityFilterByPosition, coverageFilterByPosition,
indels, good, nMustHaveMutations));
}

// Mutation filter -
mutationFilterByReference.put(reference, new MutationFilter(substitutionsByPosition,
referencePresenceByPosition, qualityFilterByPosition, coverageFilterByPosition,
indels, good, updated, nMustHaveMutations));

majorSubstitutionCountMap.put(reference, majorSubstitutionCounts);
majorInsertionCountMap.put(reference, majorInsertionCounts);
majorDeletionCountMap.put(reference, majorDeletionCounts);
majorSubstitutionCountMap.put(reference, majorSubstitutionCounts);
majorInsertionCountMap.put(reference, majorInsertionCounts);
majorDeletionCountMap.put(reference, majorDeletionCounts);

majorSubstitutionPvalueMap.put(reference, majorSubstitutionPvalues);
majorInsertionPvalueMap.put(reference, majorInsertionPvalues);
majorDeletionPvalueMap.put(reference, majorDeletionPvalues);
} else
skippedReferences.add(reference);
majorSubstitutionPvalueMap.put(reference, majorSubstitutionPvalues);
majorInsertionPvalueMap.put(reference, majorInsertionPvalues);
majorDeletionPvalueMap.put(reference, majorDeletionPvalues);
}

this.references.removeAll(skippedReferences);
}

/*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,20 @@
public final class MutationFilter implements Serializable {
private final boolean[][] substitutionMask;
private final boolean[] referenceMask, qualityMask, coverageMask;
private final boolean good;
private final boolean good, updated;
private final int mustHaveMutationsCount;
private final Set<Integer> indels;

public MutationFilter(boolean[][] substitutionMask, boolean[] referenceMask,
boolean[] qualityMask, boolean[] coverageMask, Set<Integer> indels,
boolean good, int mustHaveMutationsCount) {
boolean good, boolean updated, int mustHaveMutationsCount) {
this.substitutionMask = substitutionMask;
this.referenceMask = referenceMask;
this.coverageMask = coverageMask;
this.qualityMask = qualityMask;
this.indels = indels;
this.good = good;
this.updated = updated;
this.mustHaveMutationsCount = mustHaveMutationsCount;
}

Expand Down Expand Up @@ -66,6 +67,10 @@ public boolean good() {
return good;
}

public boolean updated() {
return updated;
}

public int getMustHaveMutationsCount() {
return mustHaveMutationsCount;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,9 @@ protected synchronized PMig take(String sampleName, int sizeThreshold) {
}

// Try to overlap reads
// NOTE: the overlapper just creates a pair of reads
// those are non-overlapping and have overlap region equally distributed upon mates
// this is done for code conciseness sake
ReadOverlapper.OverlapResult overlapResult =
readOverlapper.overlap(new PSequencingReadImpl(0, null, null, read1, read2),
barcodeOffset);
Expand All @@ -139,9 +142,11 @@ protected synchronized PMig take(String sampleName, int sizeThreshold) {
read2 = overlapResult.getReadPair().getData(0).getRC();
}
}
// NOTE: Otherwise the checkout processor is a HeaderExtractor
// For preprocessed data, we have a convention that
// a) header of both reads contains UMI sequence (UMI:seq:qual)
// b) reads are oriented in correct direction, they are overlapped if possible and on the same strand

// Note that we don't need to worry for Illumina RC of mates
// even if Overlapper has failed, it performs Illumina RC
readList1.add(read1);
readList2.add(read2);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ public static void main(String[] args) throws Exception {
}

private static final String OPT_HELP_SHORT = "h", OPT_HELP_LONG = "help", OPT_VERSION_SHORT = "v", OPT_VERSION_LONG = "version",
OPT_VERBOSITY = "VERBOSITY", OPT_THREADS = "threads", OPT_LIMIT = "limit",
OPT_VERBOSITY = "verbosity", OPT_THREADS = "threads", OPT_LIMIT = "limit",
OPT_INSTRUMENT = "instrument", OPT_IMPORT_PRESET = "import-preset", OPT_EXPORT_PRESET = "export-preset",
OPT_CLASSIFIER_FILE = "classifier", OPT_INPUT_LONG = "input", OPT_INPUT_SHORT = "I",
OPT_OUTPUT_LONG = "output-path", OPT_OUTPUT_SHORT = "O", OPT_NO_BINARY = "no-binary", OPT_VARIANT_DUMP = "variant-dump";
Expand Down
Loading

0 comments on commit 3bab422

Please sign in to comment.