6 Token = collections.namedtuple(
15 def _make_token_matcher(definition):
16 name, regex = definition
17 regex_matcher = re.compile(regex)
19 def token_matcher(index, source):
20 match = regex_matcher.match(source[index:])
23 return False, index, None
27 index + len(match.group()),
28 Token(type=name, match=match.group(), index=index),
35 ('open_parenthese', r'\('),
36 ('close_parenthese', r'\)'),
37 ('integer_literal', r'\d+'),
38 ('symbol', r'[a-z]+'),
39 ('single_quoted_string_literal', r"'.*?'"),
40 ('addition_level_operator', r'(\+|-)'),
41 ('multiplication_level_operator', r'(\*|//|%)'),
44 _TOKEN_MATCHERS = list(map(_make_token_matcher, _TOKEN_MATCHERS))
46 @util.force_generator(tuple)
50 while index < len(source):
51 if source[index] == ' ':
57 for matcher in _TOKEN_MATCHERS:
58 success, index, token = matcher(index, source)
65 raise Exception('Unexpected character "{}"'.format(source[index]))
67 while index < len(source) and source[index] in set(['\n']):
70 if __name__ == '__main__':
73 class TokenizeTests(unittest.TestCase):
74 def test_tokenizes_open_parenthese(self):
78 type='open_parenthese',
84 def test_tokenizes_close_parenthese(self):
88 type='close_parenthese',
94 def test_tokenizes_symbol(self):
104 def test_tokenizes_single_quoted_string_literal(self):
106 tokenize("'Hello, world'"),
108 type='single_quoted_string_literal',
109 match="'Hello, world'",
114 def test_tokenizes_plus(self):
118 type='addition_level_operator',
124 def test_tokenizes_minus(self):
128 type='addition_level_operator',
134 def test_tokenizes_times(self):
138 type='multiplication_level_operator',
144 def test_tokenizes_integer_divide(self):
148 type='multiplication_level_operator',
154 def test_tokenizes_modular_divide(self):
158 type='multiplication_level_operator',
164 def test_handles_trailing_newline(self):
174 def test_handles_leading_space(self):