Skip to content

Commit c884600

Browse files
committed
cleaned up lexer a little bit
1 parent a975ad9 commit c884600

File tree

2 files changed

+26
-32
lines changed

2 files changed

+26
-32
lines changed

dictionary.py

+10-7
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,6 @@ class Dictionary:
44
NUMERIC_CHARACTERS = '0123456789.'
55
ALPHABETIC_CHARACTERS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
66
KEYWORDS = ['if', 'else if', 'else', 'return']
7-
multi_word_operator_parts = ['is', 'not', 'equal', 'to', 'greater', 'or', 'less', 'than']
8-
multi_word_operators = ['is equal to',
9-
'is not equal to',
10-
'is greater than',
11-
'is less than',
12-
'is greater than or equal to',
13-
'is less than or equal to']
147

158
# TOKEN TYPES
169
INTEGER = 'INT'
@@ -34,6 +27,16 @@ class Dictionary:
3427
GREATER_THAN_OR_EQUAL_TO = '>='
3528
LESS_THAN_OR_EQUAL_TO = '<='
3629

30+
multi_word_operator_parts = ['is', 'not', 'equal', 'to', 'greater', 'or', 'less', 'than']
31+
multi_word_operators = {
32+
'is equal to': EQUAL_TO,
33+
'is not equal to': NOT_EQUAL_TO,
34+
'is greater than': GREATER_THAN,
35+
'is less than': LESS_THAN,
36+
'is greater than or equal to': GREATER_THAN_OR_EQUAL_TO,
37+
'is less than or equal to': LESS_THAN_OR_EQUAL_TO
38+
}
39+
3740
operators = {
3841
'+': Token(PLUS),
3942
'-': Token(MINUS),

lexer.py

+16-25
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,6 @@
88
# 'is greater than or equal to' is the longest multi-word operator
99
MAX_NUMBER_OF_OPERATOR_WORDS = 6
1010

11-
# TODO: These imports could potentially be cleaned up
12-
OPERATOR_DICTIONARY = Dictionary.operators
13-
MULTI_WORD_OPERATOR_DICTIONARY = Dictionary.multi_word_operators_dictionary
14-
ESCAPE_DICTIONARY = Dictionary.escape_characters
15-
FLOAT = Dictionary.FLOAT
16-
INTEGER = Dictionary.INTEGER
17-
NUMERIC_CHARACTERS = set(Dictionary.NUMERIC_CHARACTERS)
18-
ALPHABETIC_CHARACTERS = set(Dictionary.ALPHABETIC_CHARACTERS)
19-
2011
logger = logging.getLogger(__name__)
2112

2213
class Lexer:
@@ -45,16 +36,16 @@ def tokenize(self):
4536
# Loop through the input string and tokenize accordingly
4637
while self.current_character is not None:
4738
# Digits
48-
if self.current_character in NUMERIC_CHARACTERS:
39+
if self.current_character in Dictionary.NUMERIC_CHARACTERS:
4940
tokens.append(self.digit_tokenize())
5041
# Arithmetic operators (single symbols)
51-
elif self.current_character in OPERATOR_DICTIONARY:
52-
tokens.append(OPERATOR_DICTIONARY[self.current_character])
42+
elif self.current_character in Dictionary.operators:
43+
tokens.append(Dictionary.operators[self.current_character])
5344
# Keywords, identifiers and multi-word arithmetic operators
54-
elif self.current_character in ALPHABETIC_CHARACTERS:
45+
elif self.current_character in Dictionary.ALPHABETIC_CHARACTERS:
5546
tokens.append(self.keyword_tokenize())
5647
# White spaces and escape characters
57-
elif self.current_character in ESCAPE_DICTIONARY:
48+
elif self.current_character in Dictionary.escape_characters:
5849
tokens.append(self.escape_tokenize())
5950
else:
6051
logger.error(f"Illegal character: \'{self.current_character}\' at {self.position}")
@@ -70,7 +61,7 @@ def digit_tokenize(self):
7061
numeral_string = ""
7162

7263
# Loop the input string until a non-digit character is found
73-
while (self.current_character is not None) and (self.current_character in NUMERIC_CHARACTERS):
64+
while (self.current_character is not None) and (self.current_character in Dictionary.NUMERIC_CHARACTERS):
7465
if self.current_character == ".":
7566
# Break out if value (numeral_string) already is a float
7667
if "." in numeral_string:
@@ -82,27 +73,27 @@ def digit_tokenize(self):
8273
numeral_string += self.current_character
8374

8475
# Break out if next character is not a digit
85-
if self.peek() not in NUMERIC_CHARACTERS:
76+
if self.peek() not in Dictionary.NUMERIC_CHARACTERS:
8677
break
8778
else:
8879
self.next_character()
8980

9081
# Return integer or float token
9182
if is_float:
92-
return Token(FLOAT, float(numeral_string))
83+
return Token(Dictionary.FLOAT, float(numeral_string))
9384
else:
94-
return Token(INTEGER, int(numeral_string))
85+
return Token(Dictionary.INTEGER, int(numeral_string))
9586

9687
# Tokenize keywords, identifiers and multi-word operators
9788
def keyword_tokenize(self):
9889
alphanumerical_string = ""
9990

10091
# Loop the input string for a sequence of alphabetic characters
101-
while (self.current_character is not None) and (self.current_character in ALPHABETIC_CHARACTERS or self.current_character in NUMERIC_CHARACTERS):
92+
while (self.current_character is not None) and (self.current_character in Dictionary.ALPHABETIC_CHARACTERS or self.current_character in Dictionary.NUMERIC_CHARACTERS):
10293
alphanumerical_string += self.current_character
10394

10495
# Break out on white spaces and escape characters
105-
if self.peek() in ESCAPE_DICTIONARY:
96+
if self.peek() in Dictionary.escape_characters:
10697
break
10798
else:
10899
self.next_character()
@@ -144,8 +135,8 @@ def handle_multi_word_operator(self, alphanumerical_string):
144135

145136
# Return either multi-word operator or assignment token (single-word operator)
146137
if alphanumerical_string in Dictionary.multi_word_operators:
147-
#print(f"Returning token as multi-word operator: '{alphanumerical_string}'")
148-
return Token(MULTI_WORD_OPERATOR_DICTIONARY[alphanumerical_string])
138+
print(f"Returning token as multi-word operator: '{alphanumerical_string}'")
139+
return Token(Dictionary.multi_word_operators[alphanumerical_string])
149140
elif alphanumerical_string == "is":
150141
return Token(Dictionary.ASSIGNMENT)
151142
else:
@@ -163,7 +154,7 @@ def peek_word_ahead(self):
163154
current_index += 1
164155

165156
# Loop until next white space or escape character is found
166-
while (current_index < len(self.input_string)) and (self.input_string[current_index] not in ESCAPE_DICTIONARY):
157+
while (current_index < len(self.input_string)) and (self.input_string[current_index] not in Dictionary.escape_characters):
167158
peeked_word += self.input_string[current_index]
168159
current_index += 1
169160

@@ -178,7 +169,7 @@ def advance_n(self, n):
178169

179170
# Tokenize white spaces and escape characters (newline and tab)
180171
def escape_tokenize(self):
181-
return ESCAPE_DICTIONARY.get(self.current_character)
172+
return Dictionary.escape_characters.get(self.current_character)
182173

183174

184175

@@ -246,7 +237,7 @@ def peek_until_escape_character(self):
246237
while True:
247238
temp_string = self.peek()
248239
249-
if temp_string in ESCAPE_DICTIONARY:
240+
if temp_string in Dictionary.escape_characters or temp_string == " ":
250241
self.next_character()
251242
break
252243

0 commit comments

Comments
 (0)