-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added code for streamlining feature extraction.
Added pre-compiled model of CoRE-ATAC.
- Loading branch information
ajt986
committed
Feb 3, 2020
1 parent
ee2e28e
commit f4d3b06
Showing
28 changed files
with
5,739,635 additions
and
37 deletions.
There are no files selected for viewing
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
#!/bin/bash | ||
args=() | ||
((index=0)) | ||
for i in "$@" | ||
do | ||
args[${index}]="$i" | ||
((index++)) | ||
done | ||
|
||
inDir="${args[0]}" | ||
prefix="${args[1]}" | ||
outDir="${args[2]}" | ||
fasta="${args[3]}" | ||
homerref="${args[4]}" | ||
homermotifs="${args[5]}" | ||
conservation="${args[6]}" | ||
ctcfmotifs="${args[7]}" | ||
|
||
inputpeaks="${args[8]}" | ||
|
||
cd "${outDir}" | ||
|
||
mkdir peak_features | ||
cd peak_features | ||
|
||
jarpath="${args[9]}/" | ||
|
||
|
||
##Start with peaks and extract features. | ||
##Sort BAM #TODO add option to skip this step if already sorted | ||
echo "--- Sorting bam file. ---" | ||
#echo "${inDir}/${prefix}.bam" | ||
samtools sort -T PEASEXTRACT_${prefix} -o ${prefix}_sorted.bam "${inDir}/${prefix}.bam" | ||
samtools index ${prefix}_sorted.bam | ||
|
||
#cp "${inDir}/${prefix}.bam" ${prefix}_sorted.bam | ||
|
||
# | ||
|
||
echo "--- Calling annotations & known motifs. ---" | ||
#HOMER Annotations | ||
annotatePeaks.pl "${inputpeaks}" "${homerref}" -m "${homermotifs}" -nmotifs > ${prefix}_peaks_annotated.bed | ||
|
||
#call denovo motifs | ||
echo "--- Calling denovo motifs. ---" | ||
findMotifsGenome.pl "${inputpeaks}" "${fasta}" "${outDir}/denovo" | ||
|
||
mkdir "${outDir}/denovo/merge" | ||
cp "${outDir}/denovo/homerResults/"*.motif "${outDir}/denovo/merge" | ||
rm "${outDir}/denovo/merge/"*.similar* | ||
rm "${outDir}/denovo/merge/"*RV.motif | ||
cat "${outDir}/denovo/merge/"*.motif >> "${outDir}/denovo/merge/merged.motifs" | ||
#call motifs with homer again using denovo motifs file homerMotifs.all.motifs | ||
annotatePeaks.pl "${inputpeaks}" "${homerref}" -m "${outDir}/denovo/merge/merged.motifs" -nmotifs > ${prefix}_peaks_denovo.bed | ||
|
||
echo "--- Calling CTCF motifs. ---" | ||
annotatePeaks.pl "${inputpeaks}" "${homerref}" -m "${ctcfmotifs}" -nmotifs > ${prefix}_peaks_ctcf.bed | ||
|
||
#Get the insert size threshold to remove outlier inserts | ||
echo "--- Getting insert size threshold. ---" | ||
java -jar "${jarpath}PEASTools.jar" insertsizethresh "${prefix}_sorted.bam" "${outDir}/peak_features" --keepduplicates #!!!TODO!!!# | ||
thresh=$(cat "thresh.txt") | ||
|
||
|
||
#Get Insert features | ||
echo "--- Getting insert features. ---" | ||
for i in {1..22} | ||
do | ||
chr=chr$i | ||
java -jar "${jarpath}PEASTools.jar" insertmetrics "${chr}" "${chr}.bam" "${inputpeaks}" "${prefix}_${chr}_insertmetrics.txt" "$thresh" --keepduplicates #!!!TODO!!!# | ||
rm ${chr}.bam | ||
cat ${prefix}_${chr}_insertmetrics.txt >> ${prefix}_insertmetrics.txt | ||
rm "${prefix}_${chr}_insertmetrics.txt" | ||
done | ||
|
||
echo "--- Getting conservation scores. ---" | ||
#Get Conservation Scores | ||
java -jar "${jarpath}PEASTools.jar" conservation "${inputpeaks}" "${conservation}" "${prefix}_conservation.txt" | ||
|
||
echo "--- Merging features. ---" | ||
java -jar "${jarpath}PEASTools.jar" mergedl "${inputpeaks}" "${prefix}_peaks_annotated.bed" "${prefix}_insertmetrics.txt" "${prefix}_conservation.txt" "${prefix}_peaks_denovo.bed" "${prefix}_peaks_ctcf.bed" "${prefix}_features.txt" "MERGED" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
#!/bin/bash | ||
|
||
args=() | ||
((index=0)) | ||
for i in "$@" | ||
do | ||
args[${index}]="$i" | ||
((index++)) | ||
done | ||
|
||
bamfile="${args[0]}" | ||
peakfile="${args[1]}" | ||
outdir="${args[2]}" | ||
fasta="${args[3]}" | ||
sepfasta="${args[4]}" | ||
path="${args[5]}/" | ||
|
||
prefix=$(basename ${bamfile} | sed 's/.bam//g') | ||
inDir=$(dirname ${bamfile}) | ||
reformattedpeaks=${outdir}${prefix}_peaks.txt | ||
|
||
|
||
INSERTTHRESH=900 | ||
CHARPERLINE=50 | ||
HOMERREF="hg19" | ||
HOMERMOTIFS=${path}PEAS/humantop_Nov2016_HOMER.motifs | ||
CONSERVATION=${path}PEAS/phastCons46wayPlacental.bed | ||
CTCFMOTIFS=${path}PEAS/CTCF.motifs | ||
|
||
|
||
cd "${outDir}" | ||
|
||
#Step 1: Reformat peaks for 600 bp windows | ||
python ${path}PeakFormatter.py ${peakfile} ${sepfasta} ${reformattedpeaks} | ||
|
||
|
||
#Step 2: Extract features from BAM | ||
java -jar ${path}BAMExtractor.jar ${bamfile} "${reformattedpeaks}" ${sepfasta} ${CHARPERLINE} ${outdir} ${prefix} ${INSERTTHRESH} | ||
|
||
|
||
#Step 3: Extract PEAS features | ||
${path}DeepLearningPEAS.sh ${inDir} ${prefix} ${outdir} ${fasta} ${HOMERREF} ${HOMERMOTIFS} ${CONSERVATION} ${CTCFMOTIFS} "${reformattedpeaks}_original.txt" ${path}PEAS | ||
|
||
|
||
#Step 4: Move PEAS features to directory | ||
#mv ${outDir}/peak_features/${prefix}_features.txt ${outDir} | ||
|
||
#Done! | ||
|
Oops, something went wrong.