projects
/
fur
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Set the environment to the same variable name
[fur]
/
tokenization.py
diff --git
a/tokenization.py
b/tokenization.py
index
ff79307
..
bc8cf85
100644
(file)
--- a/
tokenization.py
+++ b/
tokenization.py
@@
-31,18
+31,24
@@
def _make_token_matcher(definition):
return token_matcher
return token_matcher
-
_TOKEN_MATCHERS = [
_TOKEN_MATCHERS = [
+ ('keyword', r'(def|do|else|end|if)(?![a-z_])'),
+ ('open_bracket', r'\['),
+ ('close_bracket', r'\]'),
('open_parenthese', r'\('),
('close_parenthese', r'\)'),
('comma', r','),
('open_parenthese', r'\('),
('close_parenthese', r'\)'),
('comma', r','),
+ ('colon', r':'),
+ ('period', r'\.'),
('integer_literal', r'\d+'),
('integer_literal', r'\d+'),
- ('symbol', r'[a-z]+'),
+ ('symbol', r'[a-z
_
]+'),
('single_quoted_string_literal', r"'.*?'"),
('single_quoted_string_literal', r"'.*?'"),
- ('equality_level_operator', r'(<=|>=|==|!=|<|>)'),
+ ('double_quoted_string_literal', r'".*?"'),
+ ('comparison_level_operator', r'(<=|>=|==|!=|<|>)'),
('assignment_operator', r'='),
('assignment_operator', r'='),
- ('addition_level_operator', r'(\+|-)'),
+ ('addition_level_operator', r'(\+
\+|\+
|-)'),
('multiplication_level_operator', r'(\*|//|%)'),
('multiplication_level_operator', r'(\*|//|%)'),
+ ('newline', r'\n'),
]
_TOKEN_MATCHERS = list(map(_make_token_matcher, _TOKEN_MATCHERS))
]
_TOKEN_MATCHERS = list(map(_make_token_matcher, _TOKEN_MATCHERS))
@@
-57,6
+63,12
@@
def tokenize(source):
index += 1
continue
index += 1
continue
+ if source[index] == '#':
+ while index < len(source) and source[index] != '\n':
+ index += 1
+
+ continue
+
success = False
for matcher in _TOKEN_MATCHERS:
success = False
for matcher in _TOKEN_MATCHERS:
@@
-67,11
+79,13
@@
def tokenize(source):
break
if not success:
break
if not success:
- raise Exception('Unexpected character "{}"'.format(source[index]))
+ raise Exception('Unexpected character "{}" on line {}'.format(
+ source[index],
+ line,
+ ))
-
while index < len(source) and source[index] in set(['\n'])
:
+
if token.type == 'newline'
:
line += 1
line += 1
- index += 1
if __name__ == '__main__':
import unittest
if __name__ == '__main__':
import unittest
@@
-202,7
+216,7
@@
if __name__ == '__main__':
self.assertEqual(
tokenize('=='),
(Token(
self.assertEqual(
tokenize('=='),
(Token(
- type='
equality
_level_operator',
+ type='
comparison
_level_operator',
match='==',
index=0,
line=1,
match='==',
index=0,
line=1,
@@
-213,7
+227,7
@@
if __name__ == '__main__':
self.assertEqual(
tokenize('>='),
(Token(
self.assertEqual(
tokenize('>='),
(Token(
- type='
equality
_level_operator',
+ type='
comparison
_level_operator',
match='>=',
index=0,
line=1,
match='>=',
index=0,
line=1,
@@
-224,7
+238,7
@@
if __name__ == '__main__':
self.assertEqual(
tokenize('<='),
(Token(
self.assertEqual(
tokenize('<='),
(Token(
- type='
equality
_level_operator',
+ type='
comparison
_level_operator',
match='<=',
index=0,
line=1,
match='<=',
index=0,
line=1,
@@
-235,7
+249,7
@@
if __name__ == '__main__':
self.assertEqual(
tokenize('>'),
(Token(
self.assertEqual(
tokenize('>'),
(Token(
- type='
equality
_level_operator',
+ type='
comparison
_level_operator',
match='>',
index=0,
line=1,
match='>',
index=0,
line=1,
@@
-246,7
+260,7
@@
if __name__ == '__main__':
self.assertEqual(
tokenize('<'),
(Token(
self.assertEqual(
tokenize('<'),
(Token(
- type='
equality
_level_operator',
+ type='
comparison
_level_operator',
match='<',
index=0,
line=1,
match='<',
index=0,
line=1,
@@
-257,19
+271,19
@@
if __name__ == '__main__':
self.assertEqual(
tokenize('!='),
(Token(
self.assertEqual(
tokenize('!='),
(Token(
- type='
equality
_level_operator',
+ type='
comparison
_level_operator',
match='!=',
index=0,
line=1,
),),
)
match='!=',
index=0,
line=1,
),),
)
- def test_
handles_trailing
_newline(self):
+ def test_
tokenizes
_newline(self):
self.assertEqual(
self.assertEqual(
- tokenize('
print
\n'),
+ tokenize('\n'),
(Token(
(Token(
- type='
symbol
',
- match='
print
',
+ type='
newline
',
+ match='
\n
',
index=0,
line=1,
),),
index=0,
line=1,
),),
@@
-296,6
+310,12
@@
if __name__ == '__main__':
index=0,
line=1,
),
index=0,
line=1,
),
+ Token(
+ type='newline',
+ match='\n',
+ index=5,
+ line=1,
+ ),
Token(
type='open_parenthese',
match='(',
Token(
type='open_parenthese',
match='(',