-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathp_rawsegs.py
69 lines (55 loc) · 1.86 KB
/
p_rawsegs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# collate ASCAT data from all challenge patients; optimized for use of ncpus CPUs
# (change ncpus to run on Mac)
# assumes existance of directories run1, run2, ... runNCPUS
# DEBUG ncpus = 6
ncpus = 6
import os.path
import p_rawsegs_module # routine to collate one ascat rawsegs file
samples = []
for line in open("samples","r"):
samples.append(line.rstrip())
# divide samples into patients
pats = {}
for sample in samples:
pid = sample.split("_")[0]
if pid in pats:
pats[pid].append(sample)
else:
pats[pid] = [sample,]
# REMOVE all patients with less than 3 samples
for key in pats.keys():
if len(pats[key]) < 3:
print "Deleting patient",key,"due to having only",len(pats[key]),"samples"
del pats[key]
pids = pats.keys()
pids.sort()
npat = len(pids)
if npat < ncpus:
print "ERROR: trying to divide",npat,"runs across",ncpus,"processors"
exit()
nperdir = int(float(npat)/ncpus) + 1 # +1 handles the remainder
startpat = 0
endpat = nperdir
# create outfile and write header to it
outfile = open("all_lesions.txt","w")
outline = "patient\tbiopsy\tchrom\tsegstart\tsegend\t"
outline += "rawA\trawB\tintA\tintB\n"
outfile.write(outline)
# make probe location dictionary (done just once for speed!)
markerlocations = p_rawsegs_module.makeprobedict()
# collate data from each file into "outfile"
for n in range(1,ncpus+1):
dirname = "run"+str(n)+"/"
# locate each rawsegs file
for x in range(startpat, endpat):
mypid = pids[x]
for sample in pats[mypid]:
pid, sid = sample.split("_")[0:2]
print "collating",dirname,pid,sid
goodresult = p_rawsegs_module.collate(dirname, pid, sid, outfile,
markerlocations)
if not goodresult:
print "P-ASCAT failed on",pid,sid
startpat = endpat
endpat = min(endpat + nperdir,npat)
outfile.close()