Token = collections.namedtuple(
'Token',
- [
+ (
'type',
'match',
+ 'metadata',
+ ),
+)
+
+NodeMetadata = collections.namedtuple(
+ 'NodeMetadata',
+ (
'index',
'line',
- ],
+ ),
)
def _make_token_matcher(definition):
return (
True,
index + len(match.group()),
- Token(type=name, match=match.group(), index=index, line=line),
+ Token(
+ type=name,
+ match=match.group(),
+ metadata=NodeMetadata(
+ index=index,
+ line=line,
+ ),
+ ),
)
return token_matcher
-
_TOKEN_MATCHERS = [
+ ('keyword', r'(def|do|else|end|if|lambda)(?![a-z_])'),
+ ('open_bracket', r'\['),
+ ('close_bracket', r'\]'),
('open_parenthese', r'\('),
('close_parenthese', r'\)'),
('comma', r','),
+ ('colon', r':'),
+ ('period', r'\.'),
('integer_literal', r'\d+'),
- ('symbol', r'[a-z]+'),
+ ('symbol', r'[a-z_]+'),
('single_quoted_string_literal', r"'.*?'"),
- ('equality_level_operator', r'(<=|>=|==|!=|<|>)'),
- ('addition_level_operator', r'(\+|-)'),
- ('multiplication_level_operator', r'(\*|//|%)'),
+ ('double_quoted_string_literal', r'".*?"'),
+ ('comparison_level_operator', r'(<=|>=|==|!=|<|>)'),
('assignment_operator', r'='),
+ ('addition_level_operator', r'(\+\+|\+|-)'),
+ ('multiplication_level_operator', r'(\*|//|%)'),
+ ('newline', r'\n'),
]
_TOKEN_MATCHERS = list(map(_make_token_matcher, _TOKEN_MATCHERS))
index += 1
continue
+ if source[index] == '#':
+ while index < len(source) and source[index] != '\n':
+ index += 1
+
+ continue
+
success = False
for matcher in _TOKEN_MATCHERS:
break
if not success:
- raise Exception('Unexpected character "{}"'.format(source[index]))
+ raise Exception('Unexpected character "{}" on line {}'.format(
+ source[index],
+ line,
+ ))
- while index < len(source) and source[index] in set(['\n']):
+ if token.type == 'newline':
line += 1
- index += 1
if __name__ == '__main__':
import unittest
self.assertEqual(
tokenize('=='),
(Token(
- type='equality_level_operator',
+ type='comparison_level_operator',
match='==',
index=0,
line=1,
self.assertEqual(
tokenize('>='),
(Token(
- type='equality_level_operator',
+ type='comparison_level_operator',
match='>=',
index=0,
line=1,
self.assertEqual(
tokenize('<='),
(Token(
- type='equality_level_operator',
+ type='comparison_level_operator',
match='<=',
index=0,
line=1,
self.assertEqual(
tokenize('>'),
(Token(
- type='equality_level_operator',
+ type='comparison_level_operator',
match='>',
index=0,
line=1,
self.assertEqual(
tokenize('<'),
(Token(
- type='equality_level_operator',
+ type='comparison_level_operator',
match='<',
index=0,
line=1,
self.assertEqual(
tokenize('!='),
(Token(
- type='equality_level_operator',
+ type='comparison_level_operator',
match='!=',
index=0,
line=1,
),),
)
- def test_handles_trailing_newline(self):
+ def test_tokenizes_newline(self):
self.assertEqual(
- tokenize('print\n'),
+ tokenize('\n'),
(Token(
- type='symbol',
- match='print',
+ type='newline',
+ match='\n',
index=0,
line=1,
),),
index=0,
line=1,
),
+ Token(
+ type='newline',
+ match='\n',
+ index=5,
+ line=1,
+ ),
Token(
type='open_parenthese',
match='(',