6 Token = collections.namedtuple(
16 def _make_token_matcher(definition):
17 name, regex = definition
18 regex_matcher = re.compile(regex)
20 def token_matcher(index, source, line):
21 match = regex_matcher.match(source[index:])
24 return False, index, None
28 index + len(match.group()),
29 Token(type=name, match=match.group(), index=index, line=line),
35 ('keyword', r'(def|do|else|end|if)(?![a-z_])'),
36 ('open_bracket', r'\['),
37 ('close_bracket', r'\]'),
38 ('open_parenthese', r'\('),
39 ('close_parenthese', r'\)'),
41 ('integer_literal', r'\d+'),
42 ('symbol', r'[a-z_]+'),
43 ('single_quoted_string_literal', r"'.*?'"),
44 ('double_quoted_string_literal', r'".*?"'),
45 ('comparison_level_operator', r'(<=|>=|==|!=|<|>)'),
46 ('assignment_operator', r'='),
47 ('addition_level_operator', r'(\+\+|\+|-)'),
48 ('multiplication_level_operator', r'(\*|//|%)'),
52 _TOKEN_MATCHERS = list(map(_make_token_matcher, _TOKEN_MATCHERS))
54 @util.force_generator(tuple)
59 while index < len(source):
60 if source[index] == ' ':
64 if source[index] == '#':
65 while index < len(source) and source[index] != '\n':
72 for matcher in _TOKEN_MATCHERS:
73 success, index, token = matcher(index, source, line)
80 raise Exception('Unexpected character "{}" on line {}'.format(
85 if token.type == 'newline':
88 if __name__ == '__main__':
91 class TokenizeTests(unittest.TestCase):
92 def test_tokenizes_open_parenthese(self):
96 type='open_parenthese',
103 def test_tokenizes_close_parenthese(self):
107 type='close_parenthese',
114 def test_tokenizes_symbol(self):
125 def test_tokenizes_single_quoted_string_literal(self):
127 tokenize("'Hello, world'"),
129 type='single_quoted_string_literal',
130 match="'Hello, world'",
136 def test_tokenizes_plus(self):
140 type='addition_level_operator',
147 def test_tokenizes_minus(self):
151 type='addition_level_operator',
158 def test_tokenizes_times(self):
162 type='multiplication_level_operator',
169 def test_tokenizes_integer_divide(self):
173 type='multiplication_level_operator',
180 def test_tokenizes_modular_divide(self):
184 type='multiplication_level_operator',
191 def test_tokenizes_comma(self):
202 def test_tokenizes_assignment_operator(self):
206 type='assignment_operator',
213 def test_tokenizes_equality_operator(self):
217 type='comparison_level_operator',
224 def test_tokenizes_greater_than_or_equal_operator(self):
228 type='comparison_level_operator',
235 def test_tokenizes_less_than_or_equal_operator(self):
239 type='comparison_level_operator',
246 def test_tokenizes_greater_than_equal_operator(self):
250 type='comparison_level_operator',
257 def test_tokenizes_less_than_equal_operator(self):
261 type='comparison_level_operator',
268 def test_tokenizes_not_equal_operator(self):
272 type='comparison_level_operator',
279 def test_tokenizes_newline(self):
290 def test_handles_leading_space(self):
301 def test_tokenizes_with_proper_line_numbers(self):
303 tokenize('print\n('),
318 type='open_parenthese',