-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnumbers_parser.py
101 lines (76 loc) · 3.89 KB
/
numbers_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
WORD_TO_NUMBERS_ZERO = {'ноль': 0, 'нуль': 0}
WORD_TO_NUMBERS_1_9 = {
'один': 1, 'два': 2, 'три': 3, 'четыре': 4, 'пять': 5, 'шесть': 6, 'семь': 7, 'восемь': 8, 'девять': 9,
}
WORD_TO_NUMBERS_10_19 = {
'десять': 10, 'одиннадцать': 11, 'одинадцать': 11, 'двенадцать': 12, 'тринадцать': 13, 'четырнадцать': 14,
'пятнадцать': 15, 'шестнадцать': 16, 'семнадцать': 17, 'восемнадцать': 18, 'девятнадцать': 19,
}
WORD_TO_NUMBERS_20_90 = {
'двадцать': 20, 'тридцать': 30, 'сорок': 40, 'пятьдесят': 50,
'шестьдесят': 60, 'семьдесят': 70, 'восемьдесят': 80, 'девяносто': 90,
}
WORD_TO_NUMBERS_100_900 = {
'сто': 100, 'двести': 200, 'триста': 300, 'четыреста': 400, 'пятьсот': 500,
'шестьсот': 600, 'семьсот': 700, 'восемьсот': 800, 'девятьсот': 900,
}
NUMBER_WORDS = set(WORD_TO_NUMBERS_ZERO).union(set(WORD_TO_NUMBERS_1_9)).union(
set(WORD_TO_NUMBERS_10_19)).union(set(WORD_TO_NUMBERS_20_90)).union(set(WORD_TO_NUMBERS_100_900))
class NumbersParser:
@staticmethod
def parse(tokens):
output_tokens = []
current_number = None
last_number_order = None
def reset_state():
nonlocal current_number, last_number_order
output_tokens.append(current_number)
current_number = None
last_number_order = None
for token in tokens:
if token not in NUMBER_WORDS:
if current_number is not None:
reset_state()
output_tokens.append(token)
continue
if current_number is None:
if token in WORD_TO_NUMBERS_ZERO:
output_tokens.append(WORD_TO_NUMBERS_ZERO[token])
elif token in WORD_TO_NUMBERS_1_9:
output_tokens.append(WORD_TO_NUMBERS_1_9[token])
elif token in WORD_TO_NUMBERS_10_19:
output_tokens.append(WORD_TO_NUMBERS_10_19[token])
elif token in WORD_TO_NUMBERS_20_90:
current_number = WORD_TO_NUMBERS_20_90[token]
last_number_order = 10
else: # token in WORD_TO_NUMBERS_100_900
current_number = WORD_TO_NUMBERS_100_900[token]
last_number_order = 100
else:
if token in WORD_TO_NUMBERS_ZERO:
reset_state()
output_tokens.append(WORD_TO_NUMBERS_ZERO[token])
elif token in WORD_TO_NUMBERS_1_9:
current_number += WORD_TO_NUMBERS_1_9[token]
reset_state()
elif token in WORD_TO_NUMBERS_10_19:
if last_number_order == 10:
reset_state()
output_tokens.append(WORD_TO_NUMBERS_10_19[token])
else: # last_number_order == 100
current_number += WORD_TO_NUMBERS_10_19[token]
reset_state()
elif token in WORD_TO_NUMBERS_20_90:
if last_number_order == 10:
reset_state()
output_tokens.append(WORD_TO_NUMBERS_20_90[token])
else: # last_number_order == 100
current_number += WORD_TO_NUMBERS_20_90[token]
last_number_order = 10
else: # token in WORD_TO_NUMBERS_100_900:
reset_state()
current_number = WORD_TO_NUMBERS_100_900[token]
last_number_order = 100
if current_number is not None:
reset_state()
return [str(t) for t in output_tokens]