Skip to content

Commit

Permalink
Improve lexical scanner to not use O(n^2) space when scanning.
Browse files Browse the repository at this point in the history
  • Loading branch information
cpressey committed Feb 6, 2022
1 parent c1d97e9 commit dfddf83
Showing 1 changed file with 17 additions and 9 deletions.
26 changes: 17 additions & 9 deletions src/castile/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,29 +27,33 @@ def __init__(self, text):
self.text = text
self.token = None
self.type = None
self.pos = 0
self.scan()
# for parser...
self.locals = None

# ### SCANNER ### #

def near_text(self, length=10):
return self.text[self.pos:self.pos + length]

def scan_pattern(self, pattern, type, token_group=1, rest_group=2):
pattern = r'^(' + pattern + r')(.*?)$'
match = re.match(pattern, self.text, re.DOTALL)
pattern = r'(' + pattern + r')'
regexp = re.compile(pattern, flags=re.DOTALL)
match = regexp.match(self.text, pos=self.pos)
if not match:
return False
else:
self.type = type
self.token = match.group(token_group)
self.text = match.group(rest_group)
# print(self.type, self.token)
self.pos += len(match.group(0))
return True

def scan(self):
self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
while self.text.startswith('/*'):
self.scan_pattern(r'\/\*.*?\*\/[ \t\n\r]*', 'comment')
if not self.text:
while self.scan_pattern(r'\/\*.*?\*\/[ \t\n\r]*', 'comment'):
self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
if self.pos >= len(self.text):
self.token = None
self.type = 'EOF'
return
Expand Down Expand Up @@ -91,7 +95,9 @@ def expect(self, token):
self.scan()
else:
raise CastileSyntaxError(
"Expected '%s', but found '%s'" % (token, self.token)
"Expected '%s', but found '%s' (near '%s')" % (
token, self.token, self.near_text()
)
)

def expect_type(self, type):
Expand All @@ -112,7 +118,9 @@ def on_type(self, type):
def check_type(self, type):
if not self.type == type:
raise CastileSyntaxError(
"Expected %s, but found %s ('%s')" % (type, self.type, self.token)
"Expected %s, but found %s ('%s') (near '%s')" % (
type, self.type, self.token, self.near_text()
)
)

def consume(self, token):
Expand Down

0 comments on commit dfddf83

Please sign in to comment.