Skip to content

Commit

Permalink
update peak call format to ENCODE MPRA/STARR-seq BED6+5 common file s…
Browse files Browse the repository at this point in the history
…pecification
  • Loading branch information
Donghoon Lee committed Aug 12, 2021
1 parent efb0287 commit 0efa023
Showing 1 changed file with 7 additions and 3 deletions.
10 changes: 7 additions & 3 deletions starrpeaker/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -745,19 +745,23 @@ def call_peak(prefix, bedFile, bctFile, chromSize, bwFile, covFile=None, thresho

### merge peak
print("[%s] Merge peaks" % (timestamp()))
pybedtools.BedTool(prefix + ".peak.centered.bed").merge(c=[4, 6, 7, 8], o=["max", "max", "max", "max"]).saveas(prefix + ".peak.centered.merged.bed")
# pybedtools.BedTool(prefix + ".peak.centered.bed").merge(c=[4, 6, 7, 8], o=["max", "max", "max", "max"]).saveas(prefix + ".peak.centered.merged.bed")
pybedtools.BedTool(prefix + ".peak.centered.bed").merge(c=[4, 5, 6, 7, 8], o=["max", "max", "max", "max", "max"]).saveas(prefix + ".peak.centered.merged.bed")

### finalize peak
print("[%s] Finalize peaks" % (timestamp()))
peak = pd.read_csv(prefix + ".peak.centered.merged.bed", sep='\t', header=None)
peak.columns = ['chr', 'start', 'end', 'fc', 'cov', 'nlog10pval', 'nlog10qval']
# peak.columns = ['chr', 'start', 'end', 'fc', 'cov', 'nlog10pval', 'nlog10qval']
peak.columns = ['chr', 'start', 'end', 'fc', 'icov', 'ocov', 'nlog10pval', 'nlog10qval']
peak['log2fc'] = np.log2(peak['fc'])
peak['idx'] = peak.index
peak['strand'] = "."
peak['score'] = [min(int(round(x)), 1000) for x in peak['fc'] * 100]
peak = peak.sort_values(by=['fc'], ascending=False)
peak['name'] = ['peak_' + str(x) for x in peak.reset_index().index + 1]
peak = peak.sort_values(by=['idx'])
final = peak[['chr', 'start', 'end', 'name', 'score', 'strand', 'fc', 'cov', 'nlog10pval', 'nlog10qval']]
# final = peak[['chr', 'start', 'end', 'name', 'score', 'strand', 'fc', 'cov', 'nlog10pval', 'nlog10qval']]
final = peak[['chr', 'start', 'end', 'name', 'score', 'strand', 'log2fc', 'icov', 'ocov', 'nlog10pval', 'nlog10qval']]
final.to_csv(prefix + '.peak.final.bed', sep='\t', float_format='%.3f', index=False, header=False)

### remove intermediate peak files
Expand Down

0 comments on commit 0efa023

Please sign in to comment.