generated from Leslie-C/QAA
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDemuxPt1.py
executable file
·54 lines (47 loc) · 1.78 KB
/
DemuxPt1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import Bioinfo
import gzip
import argparse
def get_args():
parser = argparse.ArgumentParser(
description="Plots median quality scores for a given fastq file"
)
parser.add_argument("-l", "--length", help="length of reads being plotted", required=True, type = int)
parser.add_argument("-f", "--file", help="input filename", required=True)
parser.add_argument("-o", "--output", help="output filename", required=True)
return parser.parse_args()
args = get_args()
def init_list(lst: list, value: float=0.0) -> list:
'''This function takes an empty list and will populate it with
the value passed in "value". If no value is passed, initializes list
with 101 values of 0.0.'''
for i in range(args.length):
lst.append(value)
return lst
my_list: list = []
my_list = init_list(my_list)
file = args.file
def populate_list(file):
"""Takes fastq file and returns list with total quality score by index and total number of lines
in the file as an int"""
with gzip.open(file, "rt") as fh:
ls = []
init_list(ls)
counter = 0
for line in fh:
line = line.strip()
counter += 1
if counter % 4 == 0:
for index, char in enumerate(line):
ls[index] += Bioinfo.convert_phred(char)
return ls, counter
my_list, num_lines = populate_list(file)
print("# Base Pair\tMean Quality Score")
for index, total in enumerate(my_list):
my_list[index] = total/(num_lines/4)
print(str(index) + "\t" + str(my_list[index]))
import matplotlib.pyplot as plt
plt.bar(x=range(args.length), height=my_list)
plt.title("Average Quality Score Based on Index Position")
plt.ylabel("Average Quality Score")
plt.xlabel("Index in Seqeunce")
plt.savefig("{}".format(args.output))