8
8
# 'is greater than or equal to' is the longest multi-word operator
9
9
MAX_NUMBER_OF_OPERATOR_WORDS = 6
10
10
11
- # TODO: These imports could potentially be cleaned up
12
- OPERATOR_DICTIONARY = Dictionary .operators
13
- MULTI_WORD_OPERATOR_DICTIONARY = Dictionary .multi_word_operators_dictionary
14
- ESCAPE_DICTIONARY = Dictionary .escape_characters
15
- FLOAT = Dictionary .FLOAT
16
- INTEGER = Dictionary .INTEGER
17
- NUMERIC_CHARACTERS = set (Dictionary .NUMERIC_CHARACTERS )
18
- ALPHABETIC_CHARACTERS = set (Dictionary .ALPHABETIC_CHARACTERS )
19
-
20
11
logger = logging .getLogger (__name__ )
21
12
22
13
class Lexer :
@@ -45,16 +36,16 @@ def tokenize(self):
45
36
# Loop through the input string and tokenize accordingly
46
37
while self .current_character is not None :
47
38
# Digits
48
- if self .current_character in NUMERIC_CHARACTERS :
39
+ if self .current_character in Dictionary . NUMERIC_CHARACTERS :
49
40
tokens .append (self .digit_tokenize ())
50
41
# Arithmetic operators (single symbols)
51
- elif self .current_character in OPERATOR_DICTIONARY :
52
- tokens .append (OPERATOR_DICTIONARY [self .current_character ])
42
+ elif self .current_character in Dictionary . operators :
43
+ tokens .append (Dictionary . operators [self .current_character ])
53
44
# Keywords, identifiers and multi-word arithmetic operators
54
- elif self .current_character in ALPHABETIC_CHARACTERS :
45
+ elif self .current_character in Dictionary . ALPHABETIC_CHARACTERS :
55
46
tokens .append (self .keyword_tokenize ())
56
47
# White spaces and escape characters
57
- elif self .current_character in ESCAPE_DICTIONARY :
48
+ elif self .current_character in Dictionary . escape_characters :
58
49
tokens .append (self .escape_tokenize ())
59
50
else :
60
51
logger .error (f"Illegal character: \' { self .current_character } \' at { self .position } " )
@@ -70,7 +61,7 @@ def digit_tokenize(self):
70
61
numeral_string = ""
71
62
72
63
# Loop the input string until a non-digit character is found
73
- while (self .current_character is not None ) and (self .current_character in NUMERIC_CHARACTERS ):
64
+ while (self .current_character is not None ) and (self .current_character in Dictionary . NUMERIC_CHARACTERS ):
74
65
if self .current_character == "." :
75
66
# Break out if value (numeral_string) already is a float
76
67
if "." in numeral_string :
@@ -82,27 +73,27 @@ def digit_tokenize(self):
82
73
numeral_string += self .current_character
83
74
84
75
# Break out if next character is not a digit
85
- if self .peek () not in NUMERIC_CHARACTERS :
76
+ if self .peek () not in Dictionary . NUMERIC_CHARACTERS :
86
77
break
87
78
else :
88
79
self .next_character ()
89
80
90
81
# Return integer or float token
91
82
if is_float :
92
- return Token (FLOAT , float (numeral_string ))
83
+ return Token (Dictionary . FLOAT , float (numeral_string ))
93
84
else :
94
- return Token (INTEGER , int (numeral_string ))
85
+ return Token (Dictionary . INTEGER , int (numeral_string ))
95
86
96
87
# Tokenize keywords, identifiers and multi-word operators
97
88
def keyword_tokenize (self ):
98
89
alphanumerical_string = ""
99
90
100
91
# Loop the input string for a sequence of alphabetic characters
101
- while (self .current_character is not None ) and (self .current_character in ALPHABETIC_CHARACTERS or self .current_character in NUMERIC_CHARACTERS ):
92
+ while (self .current_character is not None ) and (self .current_character in Dictionary . ALPHABETIC_CHARACTERS or self .current_character in Dictionary . NUMERIC_CHARACTERS ):
102
93
alphanumerical_string += self .current_character
103
94
104
95
# Break out on white spaces and escape characters
105
- if self .peek () in ESCAPE_DICTIONARY :
96
+ if self .peek () in Dictionary . escape_characters :
106
97
break
107
98
else :
108
99
self .next_character ()
@@ -144,8 +135,8 @@ def handle_multi_word_operator(self, alphanumerical_string):
144
135
145
136
# Return either multi-word operator or assignment token (single-word operator)
146
137
if alphanumerical_string in Dictionary .multi_word_operators :
147
- # print(f"Returning token as multi-word operator: '{alphanumerical_string}'")
148
- return Token (MULTI_WORD_OPERATOR_DICTIONARY [alphanumerical_string ])
138
+ print (f"Returning token as multi-word operator: '{ alphanumerical_string } '" )
139
+ return Token (Dictionary . multi_word_operators [alphanumerical_string ])
149
140
elif alphanumerical_string == "is" :
150
141
return Token (Dictionary .ASSIGNMENT )
151
142
else :
@@ -163,7 +154,7 @@ def peek_word_ahead(self):
163
154
current_index += 1
164
155
165
156
# Loop until next white space or escape character is found
166
- while (current_index < len (self .input_string )) and (self .input_string [current_index ] not in ESCAPE_DICTIONARY ):
157
+ while (current_index < len (self .input_string )) and (self .input_string [current_index ] not in Dictionary . escape_characters ):
167
158
peeked_word += self .input_string [current_index ]
168
159
current_index += 1
169
160
@@ -178,7 +169,7 @@ def advance_n(self, n):
178
169
179
170
# Tokenize white spaces and escape characters (newline and tab)
180
171
def escape_tokenize (self ):
181
- return ESCAPE_DICTIONARY .get (self .current_character )
172
+ return Dictionary . escape_characters .get (self .current_character )
182
173
183
174
184
175
@@ -246,7 +237,7 @@ def peek_until_escape_character(self):
246
237
while True:
247
238
temp_string = self.peek()
248
239
249
- if temp_string in ESCAPE_DICTIONARY :
240
+ if temp_string in Dictionary.escape_characters or temp_string == " " :
250
241
self.next_character()
251
242
break
252
243
0 commit comments