From e35fddbe02a46da061c6198bebfa34be97c1f75d Mon Sep 17 00:00:00 2001 From: evantarbell Date: Sun, 27 Jan 2019 12:29:23 -0500 Subject: [PATCH] bug correct for scoring --- src/HMMR_ATAC/ArgParser.java | 24 ++- src/HMMR_ATAC/HMMRTracksToBedgraph.java | 60 +++++++ src/HMMR_ATAC/Main_HMMR_Driver.java | 202 ++++++++++++++++-------- src/HMMR_ATAC/TrackHolder.java | 5 + 4 files changed, 219 insertions(+), 72 deletions(-) create mode 100644 src/HMMR_ATAC/HMMRTracksToBedgraph.java diff --git a/src/HMMR_ATAC/ArgParser.java b/src/HMMR_ATAC/ArgParser.java index 5eec2bf..79b617d 100644 --- a/src/HMMR_ATAC/ArgParser.java +++ b/src/HMMR_ATAC/ArgParser.java @@ -34,15 +34,23 @@ public class ArgParser { private int vitWindow = 25000000; private File modelFile=null; private boolean stopAfterModel = false; + private boolean printHMMRTracks = false; + private String version; /** * Main constructor * @param an Array of Strings passed from the main program */ - public ArgParser(String[] a){ + public ArgParser(String[] a,String ver){ args = a; + version = ver; set(); } + /** + * Access print hmmr tracks + * @return a boolean to determine whether to print the hmmr decomposed signal tracks + */ + public boolean getPrintHMMRTracks(){return printHMMRTracks;} /** * Access stop after model * @return a boolean to determine whether to stop the program after model generation @@ -361,6 +369,14 @@ private void set(){ modelFile = new File(args[i+1]); i++; break; + case"printTracks": + String TEMP_print = args[i+1].toLowerCase(); + if (TEMP_print.contains("t")){ + printHMMRTracks=true; + } + else{printHMMRTracks=false;} + i++; + break; case"modelonly": String TEMP = args[i+1].toLowerCase(); if (TEMP.contains("t")){ @@ -381,7 +397,8 @@ private void set(){ * Print usage statement */ public void printUsage(){ - System.out.println("Usage: java -jar HMMRATAC_V1.0_exe.jar"); + System.out.println("HMMRATAC Version:"+"\t"+version); + System.out.println("Usage: java -jar HMMRATAC_V#_exe.jar"); System.out.println("\nRequired Parameters:"); System.out.println("\t-b , --bam Sorted BAM file containing the ATAC-seq reads"); System.out.println("\t-i , --index Index file for the sorted BAM File"); @@ -395,7 +412,7 @@ public void printUsage(){ System.out.println("\t-u , --upper Upper limit on fold change range for choosing training sites. Default = 20"); System.out.println("\t-l , --lower Lower limit on fold change range for choosing training sites. Default = 10"); System.out.println("\t-z , --zscore Zscored read depth to mask during Viterbi decoding. Default = 100"); - System.out.println("\t-o , --output Name for output files. Default = NA"); + System.out.println("\t-o , --output Name for output files. Default = NA"); System.out.println("\t-e , --blacklist bed file of blacklisted regions to exclude"); System.out.println("\t-p , --peaks Whether to report peaks in bed format. Default = true"); System.out.println("\t-k , --kmeans Number of States in the model. Default = 3. If not k=3, recommend NOT calling peaks, use bedgraph"); @@ -408,6 +425,7 @@ public void printUsage(){ System.out.println("\t--window Size of the bins to split the genome into for Viterbi decoding.\n\t To save memory, the genome is split into long bins and viterbi decoding occurs across each bin. \n\tDefault = 25000000. Note: For machines with limited memory, it is recomended to reduce the size of the bins."); System.out.println("\t--model Binary model file (generated from previous HMMR run) to use instead of creating new one"); System.out.println("\t--modelonly Whether or not to stop the program after generating model. Default = false"); +// System.out.println("\t--printTracks Whether or not to print the decomposed HMMRATAC signal tracks. Tracks will be labeled as Output_NFR.bedgraph, Output_Mono.bedgraph etc. Default = false"); System.out.println("\t-h , --help Print this help message and exit."); System.exit(0); } diff --git a/src/HMMR_ATAC/HMMRTracksToBedgraph.java b/src/HMMR_ATAC/HMMRTracksToBedgraph.java new file mode 100644 index 0000000..0c3f216 --- /dev/null +++ b/src/HMMR_ATAC/HMMRTracksToBedgraph.java @@ -0,0 +1,60 @@ +package HMMR_ATAC; + +import java.util.ArrayList; + +import FormatConverters.PileupToBedGraph; +import Node.PileupNode2; +import Node.TagNode; + +public class HMMRTracksToBedgraph { + + private ArrayList tracks; + private TagNode interval; + private int step; + + private ArrayList nfr; + private ArrayList mono; + private ArrayList di; + private ArrayList tri; + + public HMMRTracksToBedgraph(ArrayList t, TagNode i,int s){ + tracks = t; + interval = i; + step = s; + run(); + } + + public ArrayList getShort(){return nfr;} + public ArrayList getMono(){return mono;} + public ArrayList getDi(){return di;} + public ArrayList getTri(){return tri;} + + private void run(){ + ArrayList nfr = new ArrayList(); + ArrayList mono = new ArrayList(); + ArrayList di = new ArrayList(); + ArrayList tri = new ArrayList(); + + nfr.addAll(runOneCol(0)); + mono.addAll(runOneCol(1)); + di.addAll(runOneCol(2)); + tri.addAll(runOneCol(3)); + + } + + private ArrayList runOneCol(int c){ + ArrayList temp = new ArrayList(); + int start = interval.getStart(); + ArrayList pile = new ArrayList(); + int remainder = interval.getLength() % step; + int i; + for ( i = 0;i < tracks.size()-1;i++){ + PileupNode2 pNode = new PileupNode2(start+(i*step),tracks.get(i)[c],interval.getChrom()); + pile.add(pNode); + } + PileupNode2 pNode = new PileupNode2(start+(((i)*step)-remainder),tracks.get(i)[c],interval.getChrom()); + pile.add(pNode); + temp.addAll(new PileupToBedGraph(pile,step).getBedGraph()); + return temp; + } +} diff --git a/src/HMMR_ATAC/Main_HMMR_Driver.java b/src/HMMR_ATAC/Main_HMMR_Driver.java index 8581b1e..8cb4bdc 100644 --- a/src/HMMR_ATAC/Main_HMMR_Driver.java +++ b/src/HMMR_ATAC/Main_HMMR_Driver.java @@ -65,13 +65,19 @@ public class Main_HMMR_Driver { private static int vitWindow = 25000000; private static File modelFile; private static boolean stopAfterModel = false; + private static boolean printHMMRTracks = false; private static String trainingRegions; + /* + * Version number. Change as needed + */ + private static String versionNum = "1.2.2"; + @SuppressWarnings("unchecked") public static void main(String[] args) throws IOException { - ArgParser p = new ArgParser(args); + ArgParser p = new ArgParser(args,versionNum); bam = p.getBam(); index = p.getIndex(); genomeFile=p.getGenome(); @@ -95,6 +101,7 @@ public static void main(String[] args) throws IOException { trainingRegions = p.getTrainingRegions(); vitWindow = p.getWindow(); modelFile = p.getModelFile(); +// printHMMRTracks = p.getPrintHMMRTracks(); //For run time calculation Long startTime = System.currentTimeMillis(); @@ -109,6 +116,16 @@ public static void main(String[] args) throws IOException { p.printUsage(); System.exit(1); } + + /* + * Report version into log file + * + */ + log.println("Version:"+"\t"+versionNum); + + /* + * Report all input arguments into log file + */ log.println("Arguments Used:"); for (int i = 0; i < args.length-1;i+=2){ log.println(args[i]+"\t"+args[i+1]); @@ -413,6 +430,16 @@ public static void main(String[] args) throws IOException { * Run viterbi on the whole genome */ //PrintStream out = new PrintStream(output+".pileup"); + PrintStream NFR = null; + PrintStream MONO = null; + PrintStream DI=null; + PrintStream TRI=null; + if(printHMMRTracks){ + NFR = new PrintStream(output+"_nfr.bedgraph"); + MONO = new PrintStream(output+"_mono.bedgraph"); + DI = new PrintStream(output+"_di.bedgraph"); + TRI = new PrintStream(output+"_tri.bedgraph"); + } ArrayList genomeAnnotation = new ArrayList(); for (int i = 0;i < vitBed.size();i++){ if (vitBed.get(i).getLength() >= 10){ @@ -421,7 +448,36 @@ public static void main(String[] args) throws IOException { FragPileupGen vGen = new FragPileupGen(bam, index, tempBed, mode, fragMeans, fragStddevs,minMapQ); TrackHolder vHolder = new TrackHolder(vGen.transformTracks(vGen.getAverageTracks()),trim);// 8/30/16 transformed tracks - //System.out.println(vitBed.get(i).toString()+"\t"+"viterbi bed"); + if (printHMMRTracks){ + HMMRTracksToBedgraph tracks = new HMMRTracksToBedgraph(vHolder.getRawData(),vitBed.get(i),10); + ArrayList nfr = tracks.getShort(); + ArrayList mono = tracks.getMono(); + ArrayList di = tracks.getDi(); + ArrayList tri = tracks.getTri(); + + + if (nfr != null) { + for (int w = 0; w < nfr.size(); w++) { + NFR.println(nfr.get(w).toString2()); + } + } + if (mono != null) { + for (int d = 0; d < mono.size(); d++) { + MONO.println(mono.get(d).toString2()); + } + } + if (di != null) { + for (int e = 0; e < di.size(); e++) { + DI.println(di.get(e).toString2()); + } + } + if (tri != null) { + for (int f = 0; f < tri.size(); f++) { + TRI.println(tri.get(f).toString2()); + } + } + + } vGen = null; RobustHMM HMM = new RobustHMM(vHolder.getObs(),null,hmm,false,0,"Vector",0); @@ -449,7 +505,7 @@ public static void main(String[] args) throws IOException { } } //out.close(); - + NFR.close();MONO.close();DI.close();TRI.close(); /** * Report the final results as peaks, bedgraphs and summits, if desired */ @@ -472,82 +528,90 @@ public static void main(String[] args) throws IOException { for (String chr : hmmrBdg.keySet()){ ArrayList hmmr = hmmrBdg.get(chr); ArrayList signal = bdg.get(chr); - Collections.sort(hmmr, TagNode.basepairComparator); - if (signal != null){ - Collections.sort(signal, TagNode.basepairComparator); - } - int index = 0; - for (int i = 0; i < hmmr.size();i++){ - TagNode temp = hmmr.get(i); + if (signal != null) { + Collections.sort(hmmr, TagNode.basepairComparator); - /** - * Execute the scoring commands if the state is a peak or if bedgraph scoring is on - */ - if ((int)temp.getScore2() == peak || BGScore){ - boolean hasHadOverlap = false; - ArrayList overlaps = new ArrayList(); - for (int a = index;a overlaps = new ArrayList(); + for (int a = index; a < signal.size(); a++) { + if (SubtractBed.overlap(temp, signal.get(a)) + .hasHit()) { + overlaps.add(signal.get(a)); + hasHadOverlap = true; + } else { + if (hasHadOverlap) { + index = a; + break; + } } + } - - } - ScoreNode scores = bedGraphMath.set(temp, overlaps); - if (scoreSys.equals("ave")){temp.setScore3(scores.getMean());} - else if (scoreSys.equals("fc")){temp.setScore3(scores.getMean()/genomeMean);} - else if (scoreSys.equals("zscore")){temp.setScore3((scores.getMean()-genomeMean)/genomeStd);} - else if (scoreSys.equals("med")){temp.setScore3(scores.getMedian());} - else{temp.setScore3(scores.getMax());} - if ((int)temp.getScore2() == peak){ - temp = bedGraphMath.setSmooth(20, temp, overlaps); - temp.setID("Peak_"+counter); - if (i > 0){ - temp.setUpstream(hmmr.get(i-1)); - } - else{ - temp.setUpstream(hmmr.get(i)); - } - if (i < hmmr.size()-1){ - temp.setDownstream(hmmr.get(i+1)); + ScoreNode scores = bedGraphMath.set(temp, overlaps); + if (scoreSys.equals("ave")) { + temp.setScore3(scores.getMean()); + } else if (scoreSys.equals("fc")) { + temp.setScore3(scores.getMean() / genomeMean); + } else if (scoreSys.equals("zscore")) { + temp.setScore3((scores.getMean() - genomeMean) + / genomeStd); + } else if (scoreSys.equals("med")) { + temp.setScore3(scores.getMedian()); + } else { + temp.setScore3(scores.getMax()); } - else{ - temp.setDownstream(hmmr.get(i)); + if ((int) temp.getScore2() == peak) { + temp = bedGraphMath.setSmooth(20, temp, overlaps); + temp.setID("Peak_" + counter); + if (i > 0) { + temp.setUpstream(hmmr.get(i - 1)); + } else { + temp.setUpstream(hmmr.get(i)); + } + if (i < hmmr.size() - 1) { + temp.setDownstream(hmmr.get(i + 1)); + } else { + temp.setDownstream(hmmr.get(i)); + } + counter++; } - counter++; - } - - } - /** - * report the bedgraph, is desired - */ - if (bg){ - if (!BGScore){ - bedgraph.println(temp.toString2()); + } - else{ - bedgraph.println(temp.toString_ScoredBdg()); + /** + * report the bedgraph, is desired + */ + if (bg) { + if (!BGScore) { + bedgraph.println(temp.toString2()); + } else { + bedgraph.println(temp.toString_ScoredBdg()); + } } - } - /** - * report the peaks and summits, if desired - */ - if (peaks && (int)temp.getScore2() == peak && temp.getLength() >= minLength){ - if(temp.getSummit() != null){ - summits.println(temp.getSummit().toString_ScoredSummit()); + /** + * report the peaks and summits, if desired + */ + if (peaks && (int) temp.getScore2() == peak + && temp.getLength() >= minLength) { + if (temp.getSummit() != null) { + summits.println(temp.getSummit() + .toString_ScoredSummit()); + } + pks.println(temp.toString_gappedPeak()); } - pks.println(temp.toString_gappedPeak()); + } - - } - } + + }//for loop through chroms if (bg){ bedgraph.close(); } diff --git a/src/HMMR_ATAC/TrackHolder.java b/src/HMMR_ATAC/TrackHolder.java index dc977f0..fd3f2ed 100644 --- a/src/HMMR_ATAC/TrackHolder.java +++ b/src/HMMR_ATAC/TrackHolder.java @@ -23,6 +23,11 @@ public TrackHolder(ArrayList t,int trim){ tracks = trim(t,trim); //positions = pos; } + /** + * Access the data as an ArrayList of double arrays + * @return An ArrayList of double arrays representing the data + */ + public ArrayList getRawData(){return tracks;} /** * Access the positions */