-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcsvtofasta.py
executable file
·40 lines (34 loc) · 1.3 KB
/
csvtofasta.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/usr/bin/env python3
"""
Read a predefined file (taxa.csv), parse it and do some type revision.
Write out a FASTA file for down stream processing.
"""
import click
import numpy as np
import pandas as pd
@click.command()
@click.option('--input', '-i', 'input_',
required=True, type=str, default="taxa.csv")
@click.option('--output', '-o', 'output_',
required=True, type=str, default="16s_seq.fna")
def csvtofasta(input_, output_):
"""
Read a predefined file (taxa.csv), parse it and do some type revision.
Write out a FASTA file for down stream processing.
"""
# the input csv has column 0 as a numbered index, we want taxonid
# use the rest of the colums
print(f"reading from {input_}")
csv = pd.read_csv(input_, usecols=range(1,11))
csv.set_index("taxid", inplace=True)
seqfield = "16s_seq"
# note: use the trailing underscore below for parsing downstream
# files with sed or other string matching to prevent partial
# matches from replacing, e.g. 1234 matching 12345
# useful for subbing in full tax or other info
print(f"saving to {output_}")
with open(output_, "w") as outf:
for index, row in csv.iterrows():
outf.write(f">taxid_{index}_\n{row[seqfield]}\n")
if __name__ == '__main__':
csvtofasta()