6 Token = collections.namedtuple(
15 NodeMetadata = collections.namedtuple(
23 def _make_token_matcher(definition):
24 name, regex = definition
25 regex_matcher = re.compile(regex)
27 def token_matcher(index, source, line):
28 match = regex_matcher.match(source[index:])
31 return False, index, None
35 index + len(match.group()),
39 metadata=NodeMetadata(
49 ('keyword', r'(def|do|else|end|if|lambda)(?![a-z_])'),
50 ('open_bracket', r'\['),
51 ('close_bracket', r'\]'),
52 ('open_parenthese', r'\('),
53 ('close_parenthese', r'\)'),
57 ('integer_literal', r'\d+'),
58 ('symbol', r'[a-z_]+'),
59 ('single_quoted_string_literal', r"'.*?'"),
60 ('double_quoted_string_literal', r'".*?"'),
61 ('comparison_level_operator', r'(<=|>=|==|!=|<|>)'),
62 ('assignment_operator', r'='),
63 ('addition_level_operator', r'(\+\+|\+|-)'),
64 ('multiplication_level_operator', r'(\*|//|%)'),
68 _TOKEN_MATCHERS = list(map(_make_token_matcher, _TOKEN_MATCHERS))
70 @util.force_generator(tuple)
75 while index < len(source):
76 if source[index] == ' ':
80 if source[index] == '#':
81 while index < len(source) and source[index] != '\n':
88 for matcher in _TOKEN_MATCHERS:
89 success, index, token = matcher(index, source, line)
96 raise Exception('Unexpected character "{}" on line {}'.format(
101 if token.type == 'newline':
104 if __name__ == '__main__':
107 class TokenizeTests(unittest.TestCase):
108 def test_tokenizes_open_parenthese(self):
112 type='open_parenthese',
119 def test_tokenizes_close_parenthese(self):
123 type='close_parenthese',
130 def test_tokenizes_symbol(self):
141 def test_tokenizes_single_quoted_string_literal(self):
143 tokenize("'Hello, world'"),
145 type='single_quoted_string_literal',
146 match="'Hello, world'",
152 def test_tokenizes_plus(self):
156 type='addition_level_operator',
163 def test_tokenizes_minus(self):
167 type='addition_level_operator',
174 def test_tokenizes_times(self):
178 type='multiplication_level_operator',
185 def test_tokenizes_integer_divide(self):
189 type='multiplication_level_operator',
196 def test_tokenizes_modular_divide(self):
200 type='multiplication_level_operator',
207 def test_tokenizes_comma(self):
218 def test_tokenizes_assignment_operator(self):
222 type='assignment_operator',
229 def test_tokenizes_equality_operator(self):
233 type='comparison_level_operator',
240 def test_tokenizes_greater_than_or_equal_operator(self):
244 type='comparison_level_operator',
251 def test_tokenizes_less_than_or_equal_operator(self):
255 type='comparison_level_operator',
262 def test_tokenizes_greater_than_equal_operator(self):
266 type='comparison_level_operator',
273 def test_tokenizes_less_than_equal_operator(self):
277 type='comparison_level_operator',
284 def test_tokenizes_not_equal_operator(self):
288 type='comparison_level_operator',
295 def test_tokenizes_newline(self):
306 def test_handles_leading_space(self):
317 def test_tokenizes_with_proper_line_numbers(self):
319 tokenize('print\n('),
334 type='open_parenthese',