6 Token = collections.namedtuple(
16 def _make_token_matcher(definition):
17 name, regex = definition
18 regex_matcher = re.compile(regex)
20 def token_matcher(index, source, line):
21 match = regex_matcher.match(source[index:])
24 return False, index, None
28 index + len(match.group()),
29 Token(type=name, match=match.group(), index=index, line=line),
36 ('open_parenthese', r'\('),
37 ('close_parenthese', r'\)'),
39 ('assignment_operator', r'='),
40 ('integer_literal', r'\d+'),
41 ('symbol', r'[a-z]+'),
42 ('single_quoted_string_literal', r"'.*?'"),
43 ('addition_level_operator', r'(\+|-)'),
44 ('multiplication_level_operator', r'(\*|//|%)'),
47 _TOKEN_MATCHERS = list(map(_make_token_matcher, _TOKEN_MATCHERS))
49 @util.force_generator(tuple)
54 while index < len(source):
55 if source[index] == ' ':
61 for matcher in _TOKEN_MATCHERS:
62 success, index, token = matcher(index, source, line)
69 raise Exception('Unexpected character "{}"'.format(source[index]))
71 while index < len(source) and source[index] in set(['\n']):
75 if __name__ == '__main__':
78 class TokenizeTests(unittest.TestCase):
79 def test_tokenizes_open_parenthese(self):
83 type='open_parenthese',
90 def test_tokenizes_close_parenthese(self):
94 type='close_parenthese',
101 def test_tokenizes_symbol(self):
112 def test_tokenizes_single_quoted_string_literal(self):
114 tokenize("'Hello, world'"),
116 type='single_quoted_string_literal',
117 match="'Hello, world'",
123 def test_tokenizes_plus(self):
127 type='addition_level_operator',
134 def test_tokenizes_minus(self):
138 type='addition_level_operator',
145 def test_tokenizes_times(self):
149 type='multiplication_level_operator',
156 def test_tokenizes_integer_divide(self):
160 type='multiplication_level_operator',
167 def test_tokenizes_modular_divide(self):
171 type='multiplication_level_operator',
178 def test_tokenizes_comma(self):
189 def test_tokenizes_assignment_operator(self):
193 type='assignment_operator',
200 def test_handles_trailing_newline(self):
211 def test_handles_leading_space(self):
222 def test_tokenizes_with_proper_line_numbers(self):
224 tokenize('print\n('),
233 type='open_parenthese',