-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathreporter_files.py
80 lines (78 loc) · 3.82 KB
/
reporter_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import encodedcc
def files(objList, fileCheckedItems, connection):
for obj in objList:
exp = encodedcc.get_ENCODE(obj, connection)
exps = encodedcc.get_ENCODE(
'/search/?type=File&dataset=/experiments/{}/'.format(obj), connection)
expfiles = [e['uuid'] for e in exps['@graph']]
for f in expfiles:
fileob = {}
file = encodedcc.get_ENCODE(f, connection)
for field in fileCheckedItems:
fileob[field] = file.get(field)
fileob["submitted_by"] = encodedcc.get_ENCODE(
file["submitted_by"], connection)["title"]
fileob["experiment"] = exp["accession"]
fileob["experiment-lab"] = encodedcc.get_ENCODE(
exp["lab"], connection)["name"]
fileob["biosample"] = exp.get("biosample_term_name", "")
fileob["flowcell"] = []
fileob["lane"] = []
fileob["Uniquely mapped reads number"] = ""
fileob["biological_replicate"] = ""
fileob["technical_replicate"] = ""
fileob["replicate_id"] = ""
if file.get("file_format", "") == "bam":
for q in file.get("quality_metrics", []):
if "star-quality-metrics" in q:
star = encodedcc.get_ENCODE(q, connection)
fileob["Uniquely mapped reads number"] = star["Uniquely mapped reads number"]
for fcd in file["flowcell_details"]:
fileob["flowcell"].append(fcd.get("flowcell", ""))
fileob["lane"].append(fcd.get("lane"))
try:
fileob["platform"] = encodedcc.get_ENCODE(
fileob["platform"], connection)["title"]
except:
fileob["platform"] = None
if "replicates" in exp:
temp_rep = encodedcc.get_ENCODE(
exp["replicates"][0], connection)
if "library" in temp_rep:
temp_lib = encodedcc.get_ENCODE(
temp_rep["library"], connection)
if "biosample" in temp_lib:
temp_bio = encodedcc.get_ENCODE(
temp_lib["biosample"], connection)
if "donor" in temp_bio:
temp_don = encodedcc.get_ENCODE(
temp_bio["donor"], connection)
if "organism" in temp_don:
temp_org = encodedcc.get_ENCODE(
temp_don["organism"], connection)
fileob["species"] = temp_org["name"]
else:
fileob["species"] = ""
if "replicate" in file:
rep = encodedcc.get_ENCODE(file["replicate"], connection)
fileob["biological_replicate"] = rep["biological_replicate_number"]
fileob["technical_replicate"] = rep["technical_replicate_number"]
fileob["replicate_id"] = rep["uuid"]
if "library" in rep:
library = encodedcc.get_ENCODE(rep["library"], connection)
try:
fileob["library_aliases"] = library["aliases"]
except:
fileob["library_aliases"] = ""
if "biosample" in library:
bio = encodedcc.get_ENCODE(
library["biosample"], connection)
fileob["biosample_aliases"] = bio["aliases"]
if any(exp.get("aliases", [])):
fileob["alias"] = exp["aliases"][0]
else:
fileob["alias"] = ""
row = []
for j in fileCheckedItems:
row.append(repr(fileob[j]))
print('\t'.join(row))