-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDNA_mRNA_Protein.py
128 lines (123 loc) · 4.3 KB
/
DNA_mRNA_Protein.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# ---------- About this Program ----------
"""
Name: DNA to mRNA to Protein
Creator: Kapilesh Pennichetty
Description: DNA to mRNA to Protein is a program created by Kapilesh Pennichetty to assist in converting a DNA sequence to an mRNA sequence and amino acids (protein) through the processes of transcription and translation. This program takes user input (DNA sequence) via the Python shell, performs transcription and translation, and returns the corresponding amino acid and mRNA codon as output.
"""
# ---------- Import Statements ----------
import sys
# ---------- Dictionaries for Transcription and Translation ----------
transcription = {
"A": "U",
"T": "A",
"C": "G",
"G": "C",
" ": " ",
}
translation = {
"['U', 'U', 'U']": "Phenylalanine",
"['U', 'U', 'C']": "Phenylalanine",
"['U', 'U', 'A']": "Leucine",
"['U', 'U', 'G']": "Leucine",
"['U', 'C', 'U']": "Serine",
"['U', 'C', 'C']": "Serine",
"['U', 'C', 'A']": "Serine",
"['U', 'C', 'G']": "Serine",
"['U', 'A', 'U']": "Tyrosine",
"['U', 'A', 'C']": "Tyrosine",
"['U', 'A', 'A']": "STOP",
"['U', 'A', 'G']": "STOP",
"['U', 'G', 'U']": "Cysteine",
"['U', 'G', 'C']": "Cysteine",
"['U', 'G', 'A']": "STOP",
"['U', 'G', 'G']": "Tryptophan",
"['C', 'U', 'U']": "Leucine",
"['C', 'U', 'C']": "Leucine",
"['C', 'U', 'A']": "Leucine",
"['C', 'U', 'G']": "Leucine",
"['C', 'C', 'U']": "Proline",
"['C', 'C', 'C']": "Proline",
"['C', 'C', 'A']": "Proline",
"['C', 'C', 'G']": "Proline",
"['C', 'A', 'U']": "Histidine",
"['C', 'A', 'C']": "Histidine",
"['C', 'A', 'A']": "Glutamine",
"['C', 'A', 'G']": "Glutamine",
"['C', 'G', 'U']": "Arginine",
"['C', 'G', 'C']": "Arginine",
"['C', 'G', 'A']": "Arginine",
"['C', 'G', 'G']": "Arginine",
"['A', 'U', 'U']": "Isoleucine",
"['A', 'U', 'C']": "Isoleucine",
"['A', 'U', 'A']": "Isoleucine",
"['A', 'U', 'G']": "Methionine (START)",
"['A', 'C', 'U']": "Threonine",
"['A', 'C', 'C']": "Threonine",
"['A', 'C', 'A']": "Threonine",
"['A', 'C', 'G']": "Threonine",
"['A', 'A', 'U']": "Asparagine",
"['A', 'A', 'C']": "Asparagine",
"['A', 'A', 'A']": "Lysine",
"['A', 'A', 'G']": "Lysine",
"['A', 'G', 'U']": "Serine",
"['A', 'G', 'C']": "Serine",
"['A', 'G', 'A']": "Arginine",
"['A', 'G', 'G']": "Arginine",
"['G', 'U', 'U']": "Valine",
"['G', 'U', 'C']": "Valine",
"['G', 'U', 'A']": "Valine",
"['G', 'U', 'G']": "Valine",
"['G', 'C', 'U']": "Alanine",
"['G', 'C', 'C']": "Alanine",
"['G', 'C', 'A']": "Alanine",
"['G', 'C', 'G']": "Alanine",
"['G', 'A', 'U']": "Aspartate",
"['G', 'A', 'C']": "Aspartate",
"['G', 'A', 'A']": "Glutamate",
"['G', 'A', 'G']": "Glutamate",
"['G', 'G', 'U']": "Glycine",
"['G', 'G', 'C']": "Glycine",
"['G', 'G', 'A']": "Glycine",
"['G', 'G', 'G']": "Glycine",
}
# ---------- User Input (DNA Sequence) ----------
input_prompt = input(
"Please enter the DNA sequence to be converted to mRNA. Please make sure that your DNA sequence starts with the start codon for accurate results: "
)
user_input = input_prompt.upper()
dna_sequence = user_input
list_dna_sequence = list(dna_sequence)
mRNA_sequence = ""
try:
for i in dna_sequence:
mRNA_sequence += transcription[i]
except:
print(
"Please make sure that your DNA sequence is valid. Re-run this program to re-enter a valid sequence."
)
sys.exit()
modified_mRNA_sequence = mRNA_sequence.replace(" ", "")
list_mRNA_sequence = list(modified_mRNA_sequence)
formatted_mRNA_list = [
str(list_mRNA_sequence[x:x + 3])
for x in range(0, len(list_mRNA_sequence), 3)
]
# ---------- Output (Amino Acids and mRNA Codons) ----------
amino_acid_number = 0
print(
"After transcription and translation, here are the amino acids and their respective mRNA codons:"
)
try:
for i in formatted_mRNA_list:
amino_acid_number += 1
if translation.get(i) == "STOP":
print(amino_acid_number, ".", translation.get(i), i)
print("\n")
sys.exit()
else:
print(amino_acid_number, ".", translation.get(i), i)
except:
print(
"Please make sure that your DNA sequence is valid. Re-run this program to re-enter a valid sequence."
)
sys.exit()