X-Git-Url: https://code.kerkeslager.com/?p=fur;a=blobdiff_plain;f=tokenization.py;fp=tokenization.py;h=0421b84592fa58280c8f84c105870fd197f65c0e;hp=7733ab7d21cde5c5cbcc58b3312849656eb0cd94;hb=fd918259dd949c8fababcf49ced426ab3c39da38;hpb=db1651d2c0e44a380f876b452f30c2244d3a0d06 diff --git a/tokenization.py b/tokenization.py index 7733ab7..0421b84 100644 --- a/tokenization.py +++ b/tokenization.py @@ -8,6 +8,7 @@ Token = collections.namedtuple( [ 'type', 'match', + 'index', ], ) @@ -21,7 +22,11 @@ def _make_token_matcher(definition): if match is None: return False, index, None - return True, index + len(match.group()), Token(type=name, match=match.group()) + return ( + True, + index + len(match.group()), + Token(type=name, match=match.group(), index=index), + ) return token_matcher @@ -29,9 +34,11 @@ def _make_token_matcher(definition): _TOKEN_MATCHERS = [ ('open_parenthese', r'\('), ('close_parenthese', r'\)'), - ('integer_literal', r'-?\s*\d+'), + ('integer_literal', r'\d+'), ('symbol', r'[a-z]+'), ('single_quoted_string_literal', r"'.*?'"), + ('addition_level_operator', r'(\+|-)'), + ('multiplication_level_operator', r'(\*|//|%)'), ] _TOKEN_MATCHERS = list(map(_make_token_matcher, _TOKEN_MATCHERS)) @@ -41,6 +48,10 @@ def tokenize(source): index = 0 while index < len(source): + if source[index] == ' ': + index += 1 + continue + success = False for matcher in _TOKEN_MATCHERS: @@ -63,46 +74,111 @@ if __name__ == '__main__': def test_tokenizes_open_parenthese(self): self.assertEqual( tokenize('('), - [Token( + (Token( type='open_parenthese', match='(', - )], + index=0, + ),), ) def test_tokenizes_close_parenthese(self): self.assertEqual( tokenize(')'), - [Token( + (Token( type='close_parenthese', match=')', - )], + index=0, + ),), ) def test_tokenizes_symbol(self): self.assertEqual( tokenize('print'), - [Token( + (Token( type='symbol', match='print', - )], + index=0, + ),), ) def test_tokenizes_single_quoted_string_literal(self): self.assertEqual( tokenize("'Hello, world'"), - [Token( + (Token( type='single_quoted_string_literal', match="'Hello, world'", - )], + index=0, + ),), + ) + + def test_tokenizes_plus(self): + self.assertEqual( + tokenize('+'), + (Token( + type='addition_level_operator', + match='+', + index=0, + ),), + ) + + def test_tokenizes_minus(self): + self.assertEqual( + tokenize('-'), + (Token( + type='addition_level_operator', + match='-', + index=0, + ),), + ) + + def test_tokenizes_times(self): + self.assertEqual( + tokenize('*'), + (Token( + type='multiplication_level_operator', + match='*', + index=0, + ),), + ) + + def test_tokenizes_integer_divide(self): + self.assertEqual( + tokenize('//'), + (Token( + type='multiplication_level_operator', + match='//', + index=0, + ),), + ) + + def test_tokenizes_modular_divide(self): + self.assertEqual( + tokenize('%'), + (Token( + type='multiplication_level_operator', + match='%', + index=0, + ),), ) def test_handles_trailing_newline(self): self.assertEqual( tokenize('print\n'), - [Token( + (Token( + type='symbol', + match='print', + index=0, + ),), + ) + + def test_handles_leading_space(self): + self.assertEqual( + tokenize(' print'), + (Token( type='symbol', match='print', - )], + index=1, + ),), ) unittest.main()