return token_matcher
-
_TOKEN_MATCHERS = [
+ ('keyword', r'(def|do|else|end|if)(?![a-z_])'),
('open_parenthese', r'\('),
('close_parenthese', r'\)'),
('comma', r','),
('integer_literal', r'\d+'),
- ('symbol', r'[a-z]+'),
+ ('symbol', r'[a-z_]+'),
('single_quoted_string_literal', r"'.*?'"),
- ('equality_level_operator', r'(<=|>=|==|!=|<|>)'),
+ ('comparison_level_operator', r'(<=|>=|==|!=|<|>)'),
+ ('assignment_operator', r'='),
('addition_level_operator', r'(\+|-)'),
('multiplication_level_operator', r'(\*|//|%)'),
- ('assignment_operator', r'='),
+ ('newline', r'\n'),
]
_TOKEN_MATCHERS = list(map(_make_token_matcher, _TOKEN_MATCHERS))
index += 1
continue
+ if source[index] == '#':
+ while index < len(source) and source[index] != '\n':
+ index += 1
+
+ continue
+
success = False
for matcher in _TOKEN_MATCHERS:
break
if not success:
- raise Exception('Unexpected character "{}"'.format(source[index]))
+ raise Exception('Unexpected character "{}" on line {}'.format(
+ source[index],
+ line,
+ ))
- while index < len(source) and source[index] in set(['\n']):
+ if token.type == 'newline':
line += 1
- index += 1
if __name__ == '__main__':
import unittest
self.assertEqual(
tokenize('=='),
(Token(
- type='equality_level_operator',
+ type='comparison_level_operator',
match='==',
index=0,
line=1,
self.assertEqual(
tokenize('>='),
(Token(
- type='equality_level_operator',
+ type='comparison_level_operator',
match='>=',
index=0,
line=1,
self.assertEqual(
tokenize('<='),
(Token(
- type='equality_level_operator',
+ type='comparison_level_operator',
match='<=',
index=0,
line=1,
self.assertEqual(
tokenize('>'),
(Token(
- type='equality_level_operator',
+ type='comparison_level_operator',
match='>',
index=0,
line=1,
self.assertEqual(
tokenize('<'),
(Token(
- type='equality_level_operator',
+ type='comparison_level_operator',
match='<',
index=0,
line=1,
self.assertEqual(
tokenize('!='),
(Token(
- type='equality_level_operator',
+ type='comparison_level_operator',
match='!=',
index=0,
line=1,
),),
)
- def test_handles_trailing_newline(self):
+ def test_tokenizes_newline(self):
self.assertEqual(
- tokenize('print\n'),
+ tokenize('\n'),
(Token(
- type='symbol',
- match='print',
+ type='newline',
+ match='\n',
index=0,
line=1,
),),
index=0,
line=1,
),
+ Token(
+ type='newline',
+ match='\n',
+ index=5,
+ line=1,
+ ),
Token(
type='open_parenthese',
match='(',