X-Git-Url: https://code.kerkeslager.com/?a=blobdiff_plain;f=tokenization.py;h=1a2653e3c0dc93d6c1208357d29d703b788eadac;hb=80efbcfcd42da6061d0e31add5cc1e6fb17b2f93;hp=f316e5e9fd19c7c5b6526f89e2dd0a35fdc48fca;hpb=f60d1b48bbf73c51d214c5ae5c22ea3cdee087c1;p=fur diff --git a/tokenization.py b/tokenization.py index f316e5e..1a2653e 100644 --- a/tokenization.py +++ b/tokenization.py @@ -31,16 +31,19 @@ def _make_token_matcher(definition): return token_matcher - _TOKEN_MATCHERS = [ + ('keyword', r'(def|end)(?![a-z_])'), ('open_parenthese', r'\('), ('close_parenthese', r'\)'), ('comma', r','), ('integer_literal', r'\d+'), - ('symbol', r'[a-z]+'), + ('symbol', r'[a-z_]+'), ('single_quoted_string_literal', r"'.*?'"), + ('comparison_level_operator', r'(<=|>=|==|!=|<|>)'), + ('assignment_operator', r'='), ('addition_level_operator', r'(\+|-)'), - ('multiplication_level_operator', r'(\*|//|%)'), + ('multiplication_level_operator', r'(\*|//|%)'), + ('newline', r'\n'), ] _TOKEN_MATCHERS = list(map(_make_token_matcher, _TOKEN_MATCHERS)) @@ -65,11 +68,13 @@ def tokenize(source): break if not success: - raise Exception('Unexpected character "{}"'.format(source[index])) + raise Exception('Unexpected character "{}" on line {}'.format( + source[index], + line, + )) - while index < len(source) and source[index] in set(['\n']): + if token.type == 'newline': line += 1 - index += 1 if __name__ == '__main__': import unittest @@ -185,13 +190,89 @@ if __name__ == '__main__': ),), ) + def test_tokenizes_assignment_operator(self): + self.assertEqual( + tokenize('='), + (Token( + type='assignment_operator', + match='=', + index=0, + line=1, + ),), + ) - def test_handles_trailing_newline(self): + def test_tokenizes_equality_operator(self): self.assertEqual( - tokenize('print\n'), + tokenize('=='), (Token( - type='symbol', - match='print', + type='comparison_level_operator', + match='==', + index=0, + line=1, + ),), + ) + + def test_tokenizes_greater_than_or_equal_operator(self): + self.assertEqual( + tokenize('>='), + (Token( + type='comparison_level_operator', + match='>=', + index=0, + line=1, + ),), + ) + + def test_tokenizes_less_than_or_equal_operator(self): + self.assertEqual( + tokenize('<='), + (Token( + type='comparison_level_operator', + match='<=', + index=0, + line=1, + ),), + ) + + def test_tokenizes_greater_than_equal_operator(self): + self.assertEqual( + tokenize('>'), + (Token( + type='comparison_level_operator', + match='>', + index=0, + line=1, + ),), + ) + + def test_tokenizes_less_than_equal_operator(self): + self.assertEqual( + tokenize('<'), + (Token( + type='comparison_level_operator', + match='<', + index=0, + line=1, + ),), + ) + + def test_tokenizes_not_equal_operator(self): + self.assertEqual( + tokenize('!='), + (Token( + type='comparison_level_operator', + match='!=', + index=0, + line=1, + ),), + ) + + def test_tokenizes_newline(self): + self.assertEqual( + tokenize('\n'), + (Token( + type='newline', + match='\n', index=0, line=1, ),), @@ -218,6 +299,12 @@ if __name__ == '__main__': index=0, line=1, ), + Token( + type='newline', + match='\n', + index=5, + line=1, + ), Token( type='open_parenthese', match='(',