X-Git-Url: https://code.kerkeslager.com/?p=fur;a=blobdiff_plain;f=tokenization.py;h=819a0de9ea7fd5e07a9c2c0a5c7d93b55d6f0fda;hp=3131c3581d6597efbd8f9c7e12709bf95f00abee;hb=c7f381fbcb57ba1b7e33558a28fdb34f31234c07;hpb=730c0a3faa442985f6fdb73d91a1e6b9f72e6165 diff --git a/tokenization.py b/tokenization.py index 3131c35..819a0de 100644 --- a/tokenization.py +++ b/tokenization.py @@ -5,12 +5,19 @@ import util Token = collections.namedtuple( 'Token', - [ + ( 'type', 'match', + 'metadata', + ), +) + +NodeMetadata = collections.namedtuple( + 'NodeMetadata', + ( 'index', 'line', - ], + ), ) def _make_token_matcher(definition): @@ -26,22 +33,34 @@ def _make_token_matcher(definition): return ( True, index + len(match.group()), - Token(type=name, match=match.group(), index=index, line=line), + Token( + type=name, + match=match.group(), + metadata=NodeMetadata( + index=index, + line=line, + ), + ), ) return token_matcher - _TOKEN_MATCHERS = [ + ('keyword', r'(def|do|else|end|if|lambda)(?![a-z_])'), + ('open_bracket', r'\['), + ('close_bracket', r'\]'), ('open_parenthese', r'\('), ('close_parenthese', r'\)'), ('comma', r','), + ('colon', r':'), + ('period', r'\.'), ('integer_literal', r'\d+'), - ('symbol', r'[a-z]+'), + ('symbol', r'[a-z_]+'), ('single_quoted_string_literal', r"'.*?'"), - ('equality_level_operator', r'(<=|>=|==|!=|<|>)'), + ('double_quoted_string_literal', r'".*?"'), + ('comparison_level_operator', r'(<=|>=|==|!=|<|>)'), ('assignment_operator', r'='), - ('addition_level_operator', r'(\+|-)'), + ('addition_level_operator', r'(\+\+|\+|-)'), ('multiplication_level_operator', r'(\*|//|%)'), ('newline', r'\n'), ] @@ -58,6 +77,12 @@ def tokenize(source): index += 1 continue + if source[index] == '#': + while index < len(source) and source[index] != '\n': + index += 1 + + continue + success = False for matcher in _TOKEN_MATCHERS: @@ -205,7 +230,7 @@ if __name__ == '__main__': self.assertEqual( tokenize('=='), (Token( - type='equality_level_operator', + type='comparison_level_operator', match='==', index=0, line=1, @@ -216,7 +241,7 @@ if __name__ == '__main__': self.assertEqual( tokenize('>='), (Token( - type='equality_level_operator', + type='comparison_level_operator', match='>=', index=0, line=1, @@ -227,7 +252,7 @@ if __name__ == '__main__': self.assertEqual( tokenize('<='), (Token( - type='equality_level_operator', + type='comparison_level_operator', match='<=', index=0, line=1, @@ -238,7 +263,7 @@ if __name__ == '__main__': self.assertEqual( tokenize('>'), (Token( - type='equality_level_operator', + type='comparison_level_operator', match='>', index=0, line=1, @@ -249,7 +274,7 @@ if __name__ == '__main__': self.assertEqual( tokenize('<'), (Token( - type='equality_level_operator', + type='comparison_level_operator', match='<', index=0, line=1, @@ -260,7 +285,7 @@ if __name__ == '__main__': self.assertEqual( tokenize('!='), (Token( - type='equality_level_operator', + type='comparison_level_operator', match='!=', index=0, line=1,