-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcount_ploidy_evidence.py
94 lines (83 loc) · 3.29 KB
/
count_ploidy_evidence.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Jul 26 11:54:57 2018
@author: lpsmith
"""
from __future__ import division
from os import walk
from os import path
from os import readlink
from os import mkdir
from os.path import isfile
from copy import deepcopy
import lucianSNPLibrary as lsl
#Use this value to set up whether to use the 'rejoined' segments or not
evidence_file = "calling_evidence.tsv"
outfile= "evidence_counts.tsv"
onlysomepatients = False
somepatients = ["572"]
def readEvidence():
evidence = {}
fcf = open(evidence_file, "r")
for line in fcf:
if "Patient" in line:
continue
(patient, sample, tomcall, VAFcat, flowratio, close_dip, close_tet, better_acc, acc_diff, NYGC_closer, goodness, Xcloser, finalcall) = line.rstrip().split("\t")
if patient not in evidence:
evidence[patient] = {}
evidence[patient][sample] = {}
evidence[patient][sample]["Tom's Partek Call"] = tomcall
evidence[patient][sample]["2N VAF histogram category"] = VAFcat
evidence[patient][sample]["Close diploid flow?"] = close_dip
evidence[patient][sample]["Close tetraploid flow?"] = close_tet
evidence[patient][sample]["Better accuracy"] = better_acc
evidence[patient][sample]["NYGC closer to:"] = NYGC_closer
evidence[patient][sample]["Goodness diff"] = goodness
evidence[patient][sample]["Xiaohong closer"] = Xcloser
evidence[patient][sample]["final"] = finalcall
return evidence
def countCategories(evidence):
counts = {}
for patient in evidence:
for sample in evidence[patient]:
finalcall = evidence[patient][sample]["final"]
if finalcall not in ("Diploid", "Tetraploid"):
continue
for category in evidence[patient][sample]:
if category == "final":
continue
if category not in counts:
counts[category] = {}
call = evidence[patient][sample][category]
if call not in counts[category]:
counts[category][call] = {}
counts[category][call]["Diploid"] = 0
counts[category][call]["Tetraploid"] = 0
counts[category][call][finalcall] += 1
return counts
def writeCounts(counts):
outcounts = open(outfile, "w")
outcounts.write("Evidence Category")
outcounts.write("\tEvidence call")
outcounts.write("\tnum_dip")
outcounts.write("\tdip_running_total")
outcounts.write("\tnum_tet")
outcounts.write("\ttet_running_total")
outcounts.write("\n")
for category in counts:
dip_total = 0
tet_total = 0
for call in counts[category]:
outcounts.write(category)
outcounts.write("\t" + call)
outcounts.write("\t" + str(counts[category][call]["Diploid"]))
dip_total += counts[category][call]["Diploid"]
outcounts.write("\t" + str(dip_total))
outcounts.write("\t" + str(counts[category][call]["Tetraploid"]))
tet_total += counts[category][call]["Tetraploid"]
outcounts.write("\t" + str(tet_total))
outcounts.write("\n")
evidence = readEvidence()
counts = countCategories(evidence)
writeCounts(counts)