-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgrph.py
49 lines (42 loc) · 1.2 KB
/
grph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import os
import sys
def load_data(infile):
genes = dict()
gene = str()
with open(infile, 'r', encoding='ISO-8859-1') as f:
line = f.readline()
gene_name = line[1:].rstrip()
while True:
line = f.readline()
if not line:
break
if line[0] == '>':
genes[gene_name] = gene
gene_name = line[1:].rstrip()
gene = str()
else:
gene += line.rstrip()
genes[gene_name] = gene
return genes
def create_adjacency_list(genes):
adjacency_list = list()
for key_i in genes.keys():
for key_j in genes.keys():
if key_i != key_j:
if genes[key_i][-3:] == genes[key_j][0:3]:
adjacency_list.append(str(key_i) + ' ' + str(key_j))
return adjacency_list
def write_data(outfile):
# not used
if os.path.exists(outfile):
os.remove(outfile)
f = open(outfile, 'a+')
#f.write('')
f.close()
def main(argv):
genes = load_data(argv[0])
adjacency_list = create_adjacency_list(genes)
for item in adjacency_list:
print(item)
if __name__ == "__main__":
main(sys.argv[1:])