diff --git a/spitfire/compiler/scanner.py b/spitfire/compiler/scanner.py index 3887e69..a138b8f 100644 --- a/spitfire/compiler/scanner.py +++ b/spitfire/compiler/scanner.py @@ -6,12 +6,10 @@ # determine what token to return. I'm not sure how fragille this is long-term, # but it seems to have been the right solution for a number of small problems # allong the way. +_restrict_cache = {} -class SpitfireScanner(spitfire.compiler.parser._SpitfireParserScanner): - def __init__(self, *args): - super(SpitfireScanner, self).__init__(*args) - self._restrict_cache = {} +class SpitfireScanner(spitfire.compiler.parser._SpitfireParserScanner): def token(self, i, restrict=0): """Get the i'th token, and if i is one past the end, then scan for another token; restrict is a list of tokens that @@ -20,59 +18,49 @@ def token(self, i, restrict=0): self.scan(restrict) if i < len(self.tokens): # Make sure the restriction is more restricted - if restrict and self.restrictions[i]: - if not self.restrictions[i].issuperset(restrict): + restriction = self.restrictions[i] + if restrict and restriction: + if not restriction.issuperset(restrict): raise NotImplementedError( "Unimplemented: restriction set changed", restrict, self.restrictions[i]) return self.tokens[i] - elif not restrict and not self.restrictions[i]: + elif not restrict and not restriction: return self.tokens[i] raise yappsrt.NoMoreTokens(i, len(self.tokens), self.tokens[i], restrict, self.restrictions[i], self.tokens) def scan(self, restrict): """Should scan another token and add it to the list, self.tokens, and add the restriction to self.restrictions""" - # Keep looking for a token, ignoring any in self.ignore - while True: - # Search the patterns for the longest match, with earlier - # tokens in the list having preference - best_match = -1 - best_pat = '(error)' - - # Cache the list of patterns we check to avoid unnecessary iteration - restrict = frozenset(restrict) - patterns = self._restrict_cache.get(restrict, None) - if patterns is None: - patterns = [pair for pair in self.patterns if not restrict or pair[0] in restrict] - self._restrict_cache[restrict] = patterns + # Cache the list of patterns we check to avoid unnecessary iteration + restrict = frozenset(restrict) + try: + patterns = _restrict_cache[restrict] + except KeyError: + patterns = [pair for pair in self.patterns if not restrict or pair[0] in restrict] + _restrict_cache[restrict] = patterns - for p, regexp in patterns: - m = regexp.match(self.input, self.pos) - if m and len(m.group(0)) > best_match: - # We got a match that's better than the previous one - best_pat = p - best_match = len(m.group(0)) - # msolo: use the first match, not the 'best' - break + _input, _pos = self.input, self.pos + for best_pat, regexp in patterns: + m = regexp.match(_input, _pos) + if m: + tname = m.group(0) + best_match = len(tname) + # msolo: use the first match, not the 'best' + break + else: # If we didn't find anything, raise an error - if best_pat == '(error)' and best_match < 0: - msg = "Bad Token" - if restrict: - msg = "Trying to find one of " + ', '.join(restrict) - raise yappsrt.SyntaxError(self.pos, msg) + msg = "Bad Token" + if restrict: + msg = "Trying to find one of " + ', '.join(restrict) + raise yappsrt.SyntaxError(self.pos, msg) - # If we found something that isn't to be ignored, return it - if best_pat not in self.ignore: - # Create a token with this data - token = (self.pos, self.pos+best_match, best_pat, - self.input[self.pos:self.pos+best_match]) - self.pos = self.pos + best_match - # Only add this token if it's not in the list - # (to prevent looping) - if not self.tokens or token != self.tokens[-1]: - self.tokens.append(token) - self.restrictions.append(restrict) - return - else: - # This token should be ignored .. - self.pos = self.pos + best_match + # Create a token with this data + end = _pos + best_match + token = (_pos, end, best_pat, tname) + self.pos = end + # Only add this token if it's not in the list + # (to prevent looping) + if not self.tokens or token != self.tokens[-1]: + self.tokens.append(token) + self.restrictions.append(restrict) + return