-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathCELE_gff.py
37 lines (33 loc) · 961 Bytes
/
CELE_gff.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
def gff_key(path,keyword):
with open(path) as f:
m = f.readlines()
anno = {}
keys = list(anno.keys())
num = 0
for i in m:
if i[0] != '#':
line = i.split('\t')
if line[0] not in anno.keys():
anno[line[0]] = []
if line[2] == keyword:
anno[line[0]].append((line[3], line[4]))
return anno
def exon_freq(anno,length):
heatmap = []
res = {}
for k, v in anno.items():
line = []
i = length
count = 0
for j in v:
if int(j[1]) <= i and j != v[-1]:
count += (int(j[1]) - int(j[0]))
else:
line.append(count)
count = 0
i += length
heatmap.append(line)
key = ['I','II','III','IV','V','X','MtDNA']
for i in range(len(heatmap)):
res[key[i]] = heatmap[i]
return res