-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathformula.py
104 lines (96 loc) · 4.21 KB
/
formula.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
class FormulaError(ValueError):
"""FormulaError is the type of error that the parse_formula
function will raise if a formula is invalid.
"""
def parse_formula(formula, periodic_table_dict):
"""Convert a chemical formula for a molecule into a compound
list that stores the quantity of atoms of each element
in the molecule. For example, this function will convert
"H2O" to [["H", 2], ["O", 1]] and
"PO4H2(CH2)12CH3" to [["P", 1], ["O", 4], ["H", 29], ["C", 13]]
Parameters
formula is a string that contains a chemical formula
periodic_table_dict is the compound dictionary returned
from make_periodic_table
Return: a compound list that contains chemical symbols and
quantities like this [["Fe", 2], ["O", 3]]
"""
assert isinstance(formula, str), \
"wrong data type for parameter formula; " \
f"formula is a {type(formula)} but must be a string"
assert isinstance(periodic_table_dict, dict), \
"wrong data type for parameter periodic_table_dict; " \
f"periodic_table_dict is a {type(periodic_table_dict)} " \
"but must be a dictionary"
def parse_quant(formula, index):
quant = 1
if index < len(formula) and formula[index].isdecimal():
if formula[index] == "0":
raise FormulaError("invalid formula, "
"quantity begins with zero (0), perhaps "
"you meant to type capital O for Oxygen "
"instead of zero", formula, index)
start = index
index += 1
while index<len(formula) and formula[index].isdecimal():
index += 1
quant = int(formula[start:index])
return quant, index
def get_quant(elem_dict, symbol):
return 0 if symbol not in elem_dict else elem_dict[symbol]
def parse_r(formula, index, level):
start_index = index
start_level = level
elem_dict = {}
while index < len(formula):
ch = formula[index]
if ch == "(":
group_dict, index = parse_r(formula,index+1,level+1)
quant, index = parse_quant(formula, index)
for symbol in group_dict:
prev = get_quant(elem_dict, symbol)
curr = prev + group_dict[symbol] * quant
elem_dict[symbol] = curr
elif ch.isalpha():
symbol = formula[index:index+2]
if symbol in periodic_table_dict:
index += 2
else:
symbol = formula[index:index+1]
if symbol in periodic_table_dict:
index += 1
else:
raise FormulaError("invalid formula; "
f"unknown element symbol: {symbol}",
formula, index)
quant, index = parse_quant(formula, index)
prev = get_quant(elem_dict, symbol)
elem_dict[symbol] = prev + quant
elif ch == ")":
if level == 0:
raise FormulaError("invalid formula; "
"unmatched close parenthesis",
formula, index)
level -= 1
index += 1
break
else:
if ch.isdecimal():
# Decimal digit not preceded by an
# element symbol or close parenthesis
message = "invalid formula"
else:
# Illegal character: [^()0-9a-zA-Z]
message = "invalid formula; " + \
f"illegal character: {ch}"
raise FormulaError(message, formula, index)
if level > 0 and level >= start_level:
raise FormulaError("invalid formula; "
"unmatched open parenthesis",
formula, start_index - 1)
return elem_dict, index
# Return the compound list of element symbols and
# quantities. Each element in the compound list
# will be a list in this form: ["symbol", quantity]
elem_dict, _ = parse_r(formula, 0, 0)
return list(elem_dict.items())