6 Token = collections.namedtuple(
14 def _make_token_matcher(definition):
15 name, regex = definition
16 regex_matcher = re.compile(regex)
18 def token_matcher(index, source):
19 match = regex_matcher.match(source[index:])
22 return False, index, None
24 return True, index + len(match.group()), Token(type=name, match=match.group())
30 ('open_parenthese', r'\('),
31 ('close_parenthese', r'\)'),
32 ('integer_literal', r'-?\s*\d+'),
33 ('symbol', r'[a-z]+'),
34 ('single_quoted_string_literal', r"'.*?'"),
37 _TOKEN_MATCHERS = list(map(_make_token_matcher, _TOKEN_MATCHERS))
43 while index < len(source):
46 for matcher in _TOKEN_MATCHERS:
47 success, index, token = matcher(index, source)
54 raise Exception('Unexpected character "{}"'.format(source[index]))
56 while index < len(source) and source[index] in set(['\n']):
59 if __name__ == '__main__':
62 class TokenizeTests(unittest.TestCase):
63 def test_tokenizes_open_parenthese(self):
67 type='open_parenthese',
72 def test_tokenizes_close_parenthese(self):
76 type='close_parenthese',
81 def test_tokenizes_symbol(self):
90 def test_tokenizes_single_quoted_string_literal(self):
92 tokenize("'Hello, world'"),
94 type='single_quoted_string_literal',
95 match="'Hello, world'",
99 def test_handles_trailing_newline(self):