6 Token = collections.namedtuple(
14 def _make_token_matcher(definition):
15 name, regex = definition
16 regex_matcher = re.compile(regex)
18 def token_matcher(index, source):
19 match = regex_matcher.match(source[index:])
22 return False, index, None
24 return True, index + len(match.group()), Token(type=name, match=match.group())
30 ('open_parenthese', r'\('),
31 ('close_parenthese', r'\)'),
32 ('symbol', r'[a-z]+'),
33 ('single_quoted_string_literal', r"'.*?'"),
36 _TOKEN_MATCHERS = list(map(_make_token_matcher, _TOKEN_MATCHERS))
42 while index < len(source):
45 for matcher in _TOKEN_MATCHERS:
46 success, index, token = matcher(index, source)
53 raise Exception('Unexpected character "{}"'.format(source[index]))
55 while index < len(source) and source[index] in set(['\n']):
58 if __name__ == '__main__':
61 class TokenizeTests(unittest.TestCase):
62 def test_tokenizes_open_parenthese(self):
66 type='open_parenthese',
71 def test_tokenizes_close_parenthese(self):
75 type='close_parenthese',
80 def test_tokenizes_symbol(self):
89 def test_tokenizes_single_quoted_string_literal(self):
91 tokenize("'Hello, world'"),
93 type='single_quoted_string_literal',
94 match="'Hello, world'",
98 def test_handles_trailing_newline(self):