Skip to content

Commit 5dcf01e

Browse files
committed
Added support for string tokens. OBS: parser spasser ud, men lexer virker fint. Closes: #1
1 parent 6cf349f commit 5dcf01e

File tree

3 files changed

+30
-5
lines changed

3 files changed

+30
-5
lines changed

dictionary.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ class Dictionary:
1010
FLOAT = 'FLOAT'
1111
CELL = 'CELL'
1212
PLUS = 'PLUS'
13+
STRING = 'STR'
1314
MINUS = 'MINUS'
1415
MULTIPLICATION = 'MULT'
1516
DIVISION = 'DIV'
@@ -29,6 +30,8 @@ class Dictionary:
2930
LESS_THAN_OR_EQUAL_TO = '<='
3031

3132
multi_word_operator_parts = ['is', 'not', 'equal', 'to', 'greater', 'or', 'less', 'than']
33+
34+
# TODO: Can this be optimized?
3235
multi_word_operators = {
3336
'is equal to': EQUAL_TO,
3437
'is not equal to': NOT_EQUAL_TO,
@@ -38,7 +41,7 @@ class Dictionary:
3841
'is less than or equal to': LESS_THAN_OR_EQUAL_TO
3942
}
4043

41-
operators = {
44+
arithmetic_operators = {
4245
'+': Token(PLUS),
4346
'-': Token(MINUS),
4447
'*': Token(MULTIPLICATION),
@@ -49,6 +52,7 @@ class Dictionary:
4952
'is': Token(ASSIGNMENT)
5053
}
5154

55+
# TODO: Isn't this redundant?
5256
multi_word_operators_dictionary = {
5357
'is equal to': Token(EQUAL_TO),
5458
'is not equal to': Token(NOT_EQUAL_TO),

lexer.py

+21-3
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,14 @@ def tokenize(self):
3939
if self.current_character in Dictionary.NUMERIC_CHARACTERS:
4040
tokens.append(self.digit_tokenize())
4141
# Arithmetic operators (single symbols)
42-
elif self.current_character in Dictionary.operators:
43-
tokens.append(Dictionary.operators[self.current_character])
42+
elif self.current_character in Dictionary.arithmetic_operators:
43+
tokens.append(Dictionary.arithmetic_operators[self.current_character])
4444
# Keywords, identifiers and multi-word arithmetic operators
4545
elif self.current_character in Dictionary.ALPHABETIC_CHARACTERS:
4646
tokens.append(self.keyword_tokenize())
47+
# Strings
48+
elif self.current_character == "\"" or self.current_character == "\'":
49+
tokens.append(self.string_tokenize())
4750
# White spaces and escape characters
4851
elif self.current_character in Dictionary.escape_characters:
4952
tokens.append(self.escape_tokenize())
@@ -180,4 +183,19 @@ def handle_excel_cell(self):
180183

181184
# Tokenize white spaces and escape characters (newline and tab)
182185
def escape_tokenize(self):
183-
return Dictionary.escape_characters.get(self.current_character)
186+
return Dictionary.escape_characters.get(self.current_character)
187+
188+
# Generate string token (e.g. STR:'Hasta la vista, baby.' or STR:"Say hello to my little friend!")
189+
def string_tokenize(self):
190+
string = ""
191+
quote_type = self.current_character # Single or double quote
192+
self.next_character()
193+
194+
while (self.current_character is not None) and (self.current_character != quote_type):
195+
string += self.current_character
196+
self.next_character()
197+
198+
# Add opening and closing quotes to string
199+
string = quote_type + string + quote_type
200+
201+
return Token(Dictionary.STRING, string)

text_input.txt

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,5 @@
11
if 2 + 40 is equal to 42
2-
x is 69
2+
x is "Skynet status: online"
3+
y is 'Oh shit!'
4+
5+
cell R2D2 is 1337

0 commit comments

Comments
 (0)