Skip to content

Commit

Permalink
Merge pull request #14 from Marmzy/13-convert-r-to-python
Browse files Browse the repository at this point in the history
13 convert r to python
  • Loading branch information
Marmzy authored Dec 7, 2022
2 parents 9cbee50 + ba590ad commit ea83079
Show file tree
Hide file tree
Showing 8 changed files with 268 additions and 268 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

### Description

Rank Ordering of Super-Enhancers aka ROSE is a tool for identifying super-enhancers. It does this by separating super-enhancers from typical enhancers using sequencing data (.bam) given a file of previsously identified constituent enhancers (.gff). The original ROSE tool was developed by Charles Y. Lin, David A. Orlando and Brian J. Abraham at Young Lab Whitehead Institute/MIT. This new ROSE version is an attempt to update the code from Python 2 to 3, use newer versions of tools, make the code more readable to allow for better in-depth understanding of the algorithm and to increase the computational speed.
Rank Ordering of Super-Enhancers aka ROSE is a tool for identifying super-enhancers. It does this by separating super-enhancers from typical enhancers using sequencing data (.bam) given a file of previsously identified constituent enhancers (.gff). The original ROSE tool was developed by Charles Y. Lin, David A. Orlando and Brian J. Abraham at Young Lab Whitehead Institute/MIT. This new ROSE version is an attempt to update the code from Python 2 to 3, convert the few R code to Python, use newer versions of tools, make the code more readable to allow for better in-depth understanding of the algorithm and to increase the computational speed.

This version of ROSE was developed using `Python 3.8.10`, `R 4.2.1`, and `SAMtools 1.10`
This version of ROSE was developed using `Python 3.8.10`, and `SAMtools 1.10`

---

Expand Down
7 changes: 7 additions & 0 deletions ROSE.sh
Original file line number Diff line number Diff line change
Expand Up @@ -106,4 +106,11 @@ if [ "$VALUE_C" ]; then
Rscript src/ROSE_callSuper.R -o ${PWD}/${VALUE_O} -d $DENSITY -g $VALUE_I -c $VALUE_C
else
Rscript src/ROSE_callSuper.R -o ${PWD}/${VALUE_O} -d $DENSITY -g $VALUE_I
fi

#Identifing and visualising superenhancers
if [ "$VALUE_C" ]; then
python3 src/ROSE_callSuper.py -o ${PWD}/${VALUE_O} -d $DENSITY -g $VALUE_I -c $VALUE_C
else
python3 src/ROSE_callSuper.py -o ${PWD}/${VALUE_O} -d $DENSITY -g $VALUE_I
fi
83 changes: 0 additions & 83 deletions src/ROSE_callSuper.R

This file was deleted.

90 changes: 90 additions & 0 deletions src/ROSE_callSuper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#!/usr/bin/env python

import argparse
import numpy as np
import pandas as pd

from pathlib import Path
from scipy.stats import rankdata
from superenhancers.output import convert_stitched_to_bed, hockey_stick_plot, write_enhancer_table
from superenhancers.super_enhancer import calculate_cutoff
from utils.file_helper import check_file, check_path


def parseArgs() -> argparse.Namespace:
"""Parse arguments from CLI
Returns:
argparse.Namespace: Argparse space containing parsed arguments
"""

parser = argparse.ArgumentParser(description="")

#Required arguments
parser.add_argument("-o", "--output", type=str, help="Output directory name")
parser.add_argument("-d", "--density", type=str, help="Stitched enhancer loci signal density file")
parser.add_argument("-g", "--gff", type=str, help="File (.bed, .gff or .gtf) containing binding sites to make enhancers")

#Optional arguments
parser.add_argument("-c", "--control", type=str, nargs="?", help="Control (.bam) file")


#Printing arguments to the command line
args = parser.parse_args()

print("Called with args:")
print(f"{args}\n")

return args


def main() -> None:

#Parse arguments from the command line
args = parseArgs()

#Read stitched enhancer loci density signal file as dataframe
stitched_regions = pd.read_csv(check_file(args.density), sep="\t")

#Subtract control signal if control is available
if args.control:
rankBy_vector = stitched_regions.iloc[:, 6] - stitched_regions.iloc[:, 7]
else:
rankBy_vector = stitched_regions.iloc[:, 6]

#Setting negative values to 0
rankBy_vector[rankBy_vector < 0] = 0

#Calculate the superenhancer density signal cut-off value
y_cutoff = calculate_cutoff(np.asarray(rankBy_vector.copy()))

#Get superenhancers based on their density signal
superEnhancerRows = np.where(rankBy_vector > y_cutoff)[0]

#Create output file header
enhancerDescription = f"{Path(args.gff).name} Enhancers\nCreated from {Path(args.density).name}"
enhancerDescription += f"\nRanked by {stitched_regions.columns[6]}\nUsing cutoff of {y_cutoff} for Super-Enhancers"

#Creating hockey stick plot
hockey_stick_plot(np.asarray(rankBy_vector), y_cutoff, superEnhancerRows, args.output, args.gff, args.control, stitched_regions.columns[6])

#Rank stitched enhancer loci by control corrected density signal and output to .bed file
bedName = str(Path(args.output, str(Path(args.gff).stem) + "_enhancers_withSuper.bed"))
convert_stitched_to_bed(stitched_regions, enhancerDescription, bedName, np.asarray(rankBy_vector), superEnhancerRows)

#Calculate stitched enhancer loci rankings and super status
enhancer_rank = len(stitched_regions)-rankdata(rankBy_vector, method="ordinal")+1
super_status = [1 if sr in superEnhancerRows else 0 for sr in range(0, len(stitched_regions))]
additional_data = pd.DataFrame({"enhancerRank": enhancer_rank, "isSuper": super_status})

#Output rankings and status dataframe
enhancer_file = check_path(Path(args.output, f"{str(Path(args.gff).stem)}_AllEnhancers.table.txt"))
write_enhancer_table(stitched_regions, enhancerDescription, enhancer_file, additional_data)

super_file = check_path(Path(args.output, f"{str(Path(args.gff).stem)}_SuperEnhancers.table.txt"))
write_enhancer_table(stitched_regions.iloc[superEnhancerRows, :], enhancerDescription, super_file, additional_data.iloc[superEnhancerRows, :])



if __name__ == "__main__":
main()
140 changes: 0 additions & 140 deletions src/superenhancers/output.R

This file was deleted.

Loading

0 comments on commit ea83079

Please sign in to comment.