6 Token = collections.namedtuple(
16 def _make_token_matcher(definition):
17 name, regex = definition
18 regex_matcher = re.compile(regex)
20 def token_matcher(index, source, line):
21 match = regex_matcher.match(source[index:])
24 return False, index, None
28 index + len(match.group()),
29 Token(type=name, match=match.group(), index=index, line=line),
35 ('keyword', r'(def|do|else|end|if)(?![a-z_])'),
36 ('open_bracket', r'\['),
37 ('close_bracket', r'\]'),
38 ('open_parenthese', r'\('),
39 ('close_parenthese', r'\)'),
43 ('integer_literal', r'\d+'),
44 ('symbol', r'[a-z_]+'),
45 ('single_quoted_string_literal', r"'.*?'"),
46 ('double_quoted_string_literal', r'".*?"'),
47 ('comparison_level_operator', r'(<=|>=|==|!=|<|>)'),
48 ('assignment_operator', r'='),
49 ('addition_level_operator', r'(\+\+|\+|-)'),
50 ('multiplication_level_operator', r'(\*|//|%)'),
54 _TOKEN_MATCHERS = list(map(_make_token_matcher, _TOKEN_MATCHERS))
56 @util.force_generator(tuple)
61 while index < len(source):
62 if source[index] == ' ':
66 if source[index] == '#':
67 while index < len(source) and source[index] != '\n':
74 for matcher in _TOKEN_MATCHERS:
75 success, index, token = matcher(index, source, line)
82 raise Exception('Unexpected character "{}" on line {}'.format(
87 if token.type == 'newline':
90 if __name__ == '__main__':
93 class TokenizeTests(unittest.TestCase):
94 def test_tokenizes_open_parenthese(self):
98 type='open_parenthese',
105 def test_tokenizes_close_parenthese(self):
109 type='close_parenthese',
116 def test_tokenizes_symbol(self):
127 def test_tokenizes_single_quoted_string_literal(self):
129 tokenize("'Hello, world'"),
131 type='single_quoted_string_literal',
132 match="'Hello, world'",
138 def test_tokenizes_plus(self):
142 type='addition_level_operator',
149 def test_tokenizes_minus(self):
153 type='addition_level_operator',
160 def test_tokenizes_times(self):
164 type='multiplication_level_operator',
171 def test_tokenizes_integer_divide(self):
175 type='multiplication_level_operator',
182 def test_tokenizes_modular_divide(self):
186 type='multiplication_level_operator',
193 def test_tokenizes_comma(self):
204 def test_tokenizes_assignment_operator(self):
208 type='assignment_operator',
215 def test_tokenizes_equality_operator(self):
219 type='comparison_level_operator',
226 def test_tokenizes_greater_than_or_equal_operator(self):
230 type='comparison_level_operator',
237 def test_tokenizes_less_than_or_equal_operator(self):
241 type='comparison_level_operator',
248 def test_tokenizes_greater_than_equal_operator(self):
252 type='comparison_level_operator',
259 def test_tokenizes_less_than_equal_operator(self):
263 type='comparison_level_operator',
270 def test_tokenizes_not_equal_operator(self):
274 type='comparison_level_operator',
281 def test_tokenizes_newline(self):
292 def test_handles_leading_space(self):
303 def test_tokenizes_with_proper_line_numbers(self):
305 tokenize('print\n('),
320 type='open_parenthese',