Skip to content

Commit

Permalink
bug correct for scoring
Browse files Browse the repository at this point in the history
  • Loading branch information
evantarbell committed Jan 27, 2019
1 parent 30248db commit e35fddb
Show file tree
Hide file tree
Showing 4 changed files with 219 additions and 72 deletions.
24 changes: 21 additions & 3 deletions src/HMMR_ATAC/ArgParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,23 @@ public class ArgParser {
private int vitWindow = 25000000;
private File modelFile=null;
private boolean stopAfterModel = false;
private boolean printHMMRTracks = false;
private String version;

/**
* Main constructor
* @param an Array of Strings passed from the main program
*/
public ArgParser(String[] a){
public ArgParser(String[] a,String ver){
args = a;
version = ver;
set();
}
/**
* Access print hmmr tracks
* @return a boolean to determine whether to print the hmmr decomposed signal tracks
*/
public boolean getPrintHMMRTracks(){return printHMMRTracks;}
/**
* Access stop after model
* @return a boolean to determine whether to stop the program after model generation
Expand Down Expand Up @@ -361,6 +369,14 @@ private void set(){
modelFile = new File(args[i+1]);
i++;
break;
case"printTracks":
String TEMP_print = args[i+1].toLowerCase();
if (TEMP_print.contains("t")){
printHMMRTracks=true;
}
else{printHMMRTracks=false;}
i++;
break;
case"modelonly":
String TEMP = args[i+1].toLowerCase();
if (TEMP.contains("t")){
Expand All @@ -381,7 +397,8 @@ private void set(){
* Print usage statement
*/
public void printUsage(){
System.out.println("Usage: java -jar HMMRATAC_V1.0_exe.jar");
System.out.println("HMMRATAC Version:"+"\t"+version);
System.out.println("Usage: java -jar HMMRATAC_V#_exe.jar");
System.out.println("\nRequired Parameters:");
System.out.println("\t-b , --bam <BAM> Sorted BAM file containing the ATAC-seq reads");
System.out.println("\t-i , --index <BAI> Index file for the sorted BAM File");
Expand All @@ -395,7 +412,7 @@ public void printUsage(){
System.out.println("\t-u , --upper <int> Upper limit on fold change range for choosing training sites. Default = 20");
System.out.println("\t-l , --lower <int> Lower limit on fold change range for choosing training sites. Default = 10");
System.out.println("\t-z , --zscore <int> Zscored read depth to mask during Viterbi decoding. Default = 100");
System.out.println("\t-o , --output <File> Name for output files. Default = NA");
System.out.println("\t-o , --output <String> Name for output files. Default = NA");
System.out.println("\t-e , --blacklist <BED_File> bed file of blacklisted regions to exclude");
System.out.println("\t-p , --peaks <true || false> Whether to report peaks in bed format. Default = true");
System.out.println("\t-k , --kmeans <int> Number of States in the model. Default = 3. If not k=3, recommend NOT calling peaks, use bedgraph");
Expand All @@ -408,6 +425,7 @@ public void printUsage(){
System.out.println("\t--window <int> Size of the bins to split the genome into for Viterbi decoding.\n\t To save memory, the genome is split into <int> long bins and viterbi decoding occurs across each bin. \n\tDefault = 25000000. Note: For machines with limited memory, it is recomended to reduce the size of the bins.");
System.out.println("\t--model <File> Binary model file (generated from previous HMMR run) to use instead of creating new one");
System.out.println("\t--modelonly <true || false> Whether or not to stop the program after generating model. Default = false");
// System.out.println("\t--printTracks <true || false> Whether or not to print the decomposed HMMRATAC signal tracks. Tracks will be labeled as Output_NFR.bedgraph, Output_Mono.bedgraph etc. Default = false");
System.out.println("\t-h , --help Print this help message and exit.");
System.exit(0);
}
Expand Down
60 changes: 60 additions & 0 deletions src/HMMR_ATAC/HMMRTracksToBedgraph.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package HMMR_ATAC;

import java.util.ArrayList;

import FormatConverters.PileupToBedGraph;
import Node.PileupNode2;
import Node.TagNode;

public class HMMRTracksToBedgraph {

private ArrayList<double[]> tracks;
private TagNode interval;
private int step;

private ArrayList<TagNode> nfr;
private ArrayList<TagNode> mono;
private ArrayList<TagNode> di;
private ArrayList<TagNode> tri;

public HMMRTracksToBedgraph(ArrayList<double[]> t, TagNode i,int s){
tracks = t;
interval = i;
step = s;
run();
}

public ArrayList<TagNode> getShort(){return nfr;}
public ArrayList<TagNode> getMono(){return mono;}
public ArrayList<TagNode> getDi(){return di;}
public ArrayList<TagNode> getTri(){return tri;}

private void run(){
ArrayList<TagNode> nfr = new ArrayList<TagNode>();
ArrayList<TagNode> mono = new ArrayList<TagNode>();
ArrayList<TagNode> di = new ArrayList<TagNode>();
ArrayList<TagNode> tri = new ArrayList<TagNode>();

nfr.addAll(runOneCol(0));
mono.addAll(runOneCol(1));
di.addAll(runOneCol(2));
tri.addAll(runOneCol(3));

}

private ArrayList<TagNode> runOneCol(int c){
ArrayList<TagNode> temp = new ArrayList<TagNode>();
int start = interval.getStart();
ArrayList<PileupNode2> pile = new ArrayList<PileupNode2>();
int remainder = interval.getLength() % step;
int i;
for ( i = 0;i < tracks.size()-1;i++){
PileupNode2 pNode = new PileupNode2(start+(i*step),tracks.get(i)[c],interval.getChrom());
pile.add(pNode);
}
PileupNode2 pNode = new PileupNode2(start+(((i)*step)-remainder),tracks.get(i)[c],interval.getChrom());
pile.add(pNode);
temp.addAll(new PileupToBedGraph(pile,step).getBedGraph());
return temp;
}
}
202 changes: 133 additions & 69 deletions src/HMMR_ATAC/Main_HMMR_Driver.java
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,19 @@ public class Main_HMMR_Driver {
private static int vitWindow = 25000000;
private static File modelFile;
private static boolean stopAfterModel = false;
private static boolean printHMMRTracks = false;

private static String trainingRegions;

/*
* Version number. Change as needed
*/
private static String versionNum = "1.2.2";

@SuppressWarnings("unchecked")
public static void main(String[] args) throws IOException {

ArgParser p = new ArgParser(args);
ArgParser p = new ArgParser(args,versionNum);
bam = p.getBam();
index = p.getIndex();
genomeFile=p.getGenome();
Expand All @@ -95,6 +101,7 @@ public static void main(String[] args) throws IOException {
trainingRegions = p.getTrainingRegions();
vitWindow = p.getWindow();
modelFile = p.getModelFile();
// printHMMRTracks = p.getPrintHMMRTracks();
//For run time calculation
Long startTime = System.currentTimeMillis();

Expand All @@ -109,6 +116,16 @@ public static void main(String[] args) throws IOException {
p.printUsage();
System.exit(1);
}

/*
* Report version into log file
*
*/
log.println("Version:"+"\t"+versionNum);

/*
* Report all input arguments into log file
*/
log.println("Arguments Used:");
for (int i = 0; i < args.length-1;i+=2){
log.println(args[i]+"\t"+args[i+1]);
Expand Down Expand Up @@ -413,6 +430,16 @@ public static void main(String[] args) throws IOException {
* Run viterbi on the whole genome
*/
//PrintStream out = new PrintStream(output+".pileup");
PrintStream NFR = null;
PrintStream MONO = null;
PrintStream DI=null;
PrintStream TRI=null;
if(printHMMRTracks){
NFR = new PrintStream(output+"_nfr.bedgraph");
MONO = new PrintStream(output+"_mono.bedgraph");
DI = new PrintStream(output+"_di.bedgraph");
TRI = new PrintStream(output+"_tri.bedgraph");
}
ArrayList<TagNode> genomeAnnotation = new ArrayList<TagNode>();
for (int i = 0;i < vitBed.size();i++){
if (vitBed.get(i).getLength() >= 10){
Expand All @@ -421,7 +448,36 @@ public static void main(String[] args) throws IOException {
FragPileupGen vGen = new FragPileupGen(bam, index, tempBed, mode, fragMeans, fragStddevs,minMapQ);
TrackHolder vHolder = new TrackHolder(vGen.transformTracks(vGen.getAverageTracks()),trim);// 8/30/16 transformed tracks

//System.out.println(vitBed.get(i).toString()+"\t"+"viterbi bed");
if (printHMMRTracks){
HMMRTracksToBedgraph tracks = new HMMRTracksToBedgraph(vHolder.getRawData(),vitBed.get(i),10);
ArrayList<TagNode> nfr = tracks.getShort();
ArrayList<TagNode> mono = tracks.getMono();
ArrayList<TagNode> di = tracks.getDi();
ArrayList<TagNode> tri = tracks.getTri();


if (nfr != null) {
for (int w = 0; w < nfr.size(); w++) {
NFR.println(nfr.get(w).toString2());
}
}
if (mono != null) {
for (int d = 0; d < mono.size(); d++) {
MONO.println(mono.get(d).toString2());
}
}
if (di != null) {
for (int e = 0; e < di.size(); e++) {
DI.println(di.get(e).toString2());
}
}
if (tri != null) {
for (int f = 0; f < tri.size(); f++) {
TRI.println(tri.get(f).toString2());
}
}

}
vGen = null;

RobustHMM HMM = new RobustHMM(vHolder.getObs(),null,hmm,false,0,"Vector",0);
Expand Down Expand Up @@ -449,7 +505,7 @@ public static void main(String[] args) throws IOException {
}
}
//out.close();

NFR.close();MONO.close();DI.close();TRI.close();
/**
* Report the final results as peaks, bedgraphs and summits, if desired
*/
Expand All @@ -472,82 +528,90 @@ public static void main(String[] args) throws IOException {
for (String chr : hmmrBdg.keySet()){
ArrayList<TagNode> hmmr = hmmrBdg.get(chr);
ArrayList<TagNode> signal = bdg.get(chr);
Collections.sort(hmmr, TagNode.basepairComparator);
if (signal != null){
Collections.sort(signal, TagNode.basepairComparator);
}
int index = 0;
for (int i = 0; i < hmmr.size();i++){
TagNode temp = hmmr.get(i);
if (signal != null) {
Collections.sort(hmmr, TagNode.basepairComparator);

/**
* Execute the scoring commands if the state is a peak or if bedgraph scoring is on
*/
if ((int)temp.getScore2() == peak || BGScore){
boolean hasHadOverlap = false;
ArrayList<TagNode> overlaps = new ArrayList<TagNode>();
for (int a = index;a<signal.size();a++){
if (SubtractBed.overlap(temp, signal.get(a)).hasHit()){
overlaps.add(signal.get(a));
hasHadOverlap=true;
}
else{
if(hasHadOverlap){
index=a;
break;
Collections.sort(signal, TagNode.basepairComparator);

int index = 0;
for (int i = 0; i < hmmr.size(); i++) {
TagNode temp = hmmr.get(i);

/**
* Execute the scoring commands if the state is a peak or if bedgraph scoring is on
*/
if ((int) temp.getScore2() == peak || BGScore) {
boolean hasHadOverlap = false;
ArrayList<TagNode> overlaps = new ArrayList<TagNode>();
for (int a = index; a < signal.size(); a++) {
if (SubtractBed.overlap(temp, signal.get(a))
.hasHit()) {
overlaps.add(signal.get(a));
hasHadOverlap = true;
} else {
if (hasHadOverlap) {
index = a;
break;
}
}

}

}
ScoreNode scores = bedGraphMath.set(temp, overlaps);
if (scoreSys.equals("ave")){temp.setScore3(scores.getMean());}
else if (scoreSys.equals("fc")){temp.setScore3(scores.getMean()/genomeMean);}
else if (scoreSys.equals("zscore")){temp.setScore3((scores.getMean()-genomeMean)/genomeStd);}
else if (scoreSys.equals("med")){temp.setScore3(scores.getMedian());}
else{temp.setScore3(scores.getMax());}
if ((int)temp.getScore2() == peak){
temp = bedGraphMath.setSmooth(20, temp, overlaps);
temp.setID("Peak_"+counter);
if (i > 0){
temp.setUpstream(hmmr.get(i-1));
}
else{
temp.setUpstream(hmmr.get(i));
}
if (i < hmmr.size()-1){
temp.setDownstream(hmmr.get(i+1));
ScoreNode scores = bedGraphMath.set(temp, overlaps);
if (scoreSys.equals("ave")) {
temp.setScore3(scores.getMean());
} else if (scoreSys.equals("fc")) {
temp.setScore3(scores.getMean() / genomeMean);
} else if (scoreSys.equals("zscore")) {
temp.setScore3((scores.getMean() - genomeMean)
/ genomeStd);
} else if (scoreSys.equals("med")) {
temp.setScore3(scores.getMedian());
} else {
temp.setScore3(scores.getMax());
}
else{
temp.setDownstream(hmmr.get(i));
if ((int) temp.getScore2() == peak) {
temp = bedGraphMath.setSmooth(20, temp, overlaps);
temp.setID("Peak_" + counter);
if (i > 0) {
temp.setUpstream(hmmr.get(i - 1));
} else {
temp.setUpstream(hmmr.get(i));
}
if (i < hmmr.size() - 1) {
temp.setDownstream(hmmr.get(i + 1));
} else {
temp.setDownstream(hmmr.get(i));
}
counter++;
}
counter++;
}

}
/**
* report the bedgraph, is desired
*/
if (bg){
if (!BGScore){
bedgraph.println(temp.toString2());

}
else{
bedgraph.println(temp.toString_ScoredBdg());
/**
* report the bedgraph, is desired
*/
if (bg) {
if (!BGScore) {
bedgraph.println(temp.toString2());
} else {
bedgraph.println(temp.toString_ScoredBdg());
}
}
}
/**
* report the peaks and summits, if desired
*/
if (peaks && (int)temp.getScore2() == peak && temp.getLength() >= minLength){
if(temp.getSummit() != null){
summits.println(temp.getSummit().toString_ScoredSummit());
/**
* report the peaks and summits, if desired
*/
if (peaks && (int) temp.getScore2() == peak
&& temp.getLength() >= minLength) {
if (temp.getSummit() != null) {
summits.println(temp.getSummit()
.toString_ScoredSummit());
}
pks.println(temp.toString_gappedPeak());
}
pks.println(temp.toString_gappedPeak());

}


}
}

}//for loop through chroms
if (bg){
bedgraph.close();
}
Expand Down
Loading

0 comments on commit e35fddb

Please sign in to comment.