X-Git-Url: https://code.kerkeslager.com/?a=blobdiff_plain;f=tokenization.py;h=a736912603e0832efc974b07252cf03ebe6fdc38;hb=0d43b38c70255f1bdb69ceede2b67ef4b293468b;hp=e6cad0a0bc7c3ac035294f6d0720c5e46a0a56e4;hpb=a859f78651b4da2d31890b9c7b01a431fa7a93f6;p=fur diff --git a/tokenization.py b/tokenization.py index e6cad0a..a736912 100644 --- a/tokenization.py +++ b/tokenization.py @@ -39,10 +39,11 @@ _TOKEN_MATCHERS = [ ('integer_literal', r'\d+'), ('symbol', r'[a-z]+'), ('single_quoted_string_literal', r"'.*?'"), - ('equality_level_operator', r'(<=|>=|==|!=|<|>)'), + ('comparison_level_operator', r'(<=|>=|==|!=|<|>)'), + ('assignment_operator', r'='), ('addition_level_operator', r'(\+|-)'), ('multiplication_level_operator', r'(\*|//|%)'), - ('assignment_operator', r'='), + ('newline', r'\n'), ] _TOKEN_MATCHERS = list(map(_make_token_matcher, _TOKEN_MATCHERS)) @@ -67,11 +68,13 @@ def tokenize(source): break if not success: - raise Exception('Unexpected character "{}"'.format(source[index])) + raise Exception('Unexpected character "{}" on line {}'.format( + source[index], + line, + )) - while index < len(source) and source[index] in set(['\n']): + if token.type == 'newline': line += 1 - index += 1 if __name__ == '__main__': import unittest @@ -202,7 +205,7 @@ if __name__ == '__main__': self.assertEqual( tokenize('=='), (Token( - type='equality_level_operator', + type='comparison_level_operator', match='==', index=0, line=1, @@ -213,7 +216,7 @@ if __name__ == '__main__': self.assertEqual( tokenize('>='), (Token( - type='equality_level_operator', + type='comparison_level_operator', match='>=', index=0, line=1, @@ -224,7 +227,7 @@ if __name__ == '__main__': self.assertEqual( tokenize('<='), (Token( - type='equality_level_operator', + type='comparison_level_operator', match='<=', index=0, line=1, @@ -235,7 +238,7 @@ if __name__ == '__main__': self.assertEqual( tokenize('>'), (Token( - type='equality_level_operator', + type='comparison_level_operator', match='>', index=0, line=1, @@ -246,7 +249,7 @@ if __name__ == '__main__': self.assertEqual( tokenize('<'), (Token( - type='equality_level_operator', + type='comparison_level_operator', match='<', index=0, line=1, @@ -257,19 +260,19 @@ if __name__ == '__main__': self.assertEqual( tokenize('!='), (Token( - type='equality_level_operator', + type='comparison_level_operator', match='!=', index=0, line=1, ),), ) - def test_handles_trailing_newline(self): + def test_tokenizes_newline(self): self.assertEqual( - tokenize('print\n'), + tokenize('\n'), (Token( - type='symbol', - match='print', + type='newline', + match='\n', index=0, line=1, ),), @@ -296,6 +299,12 @@ if __name__ == '__main__': index=0, line=1, ), + Token( + type='newline', + match='\n', + index=5, + line=1, + ), Token( type='open_parenthese', match='(',