projects
/
fur
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Implement negatives, use typedef
[fur]
/
tokenization.py
diff --git
a/tokenization.py
b/tokenization.py
index
ff79307
..
819a0de
100644
(file)
--- a/
tokenization.py
+++ b/
tokenization.py
@@
-5,12
+5,19
@@
import util
Token = collections.namedtuple(
'Token',
Token = collections.namedtuple(
'Token',
- [
+ (
'type',
'match',
'type',
'match',
+ 'metadata',
+ ),
+)
+
+NodeMetadata = collections.namedtuple(
+ 'NodeMetadata',
+ (
'index',
'line',
'index',
'line',
-
]
,
+
)
,
)
def _make_token_matcher(definition):
)
def _make_token_matcher(definition):
@@
-26,23
+33,36
@@
def _make_token_matcher(definition):
return (
True,
index + len(match.group()),
return (
True,
index + len(match.group()),
- Token(type=name, match=match.group(), index=index, line=line),
+ Token(
+ type=name,
+ match=match.group(),
+ metadata=NodeMetadata(
+ index=index,
+ line=line,
+ ),
+ ),
)
return token_matcher
)
return token_matcher
-
_TOKEN_MATCHERS = [
_TOKEN_MATCHERS = [
+ ('keyword', r'(def|do|else|end|if|lambda)(?![a-z_])'),
+ ('open_bracket', r'\['),
+ ('close_bracket', r'\]'),
('open_parenthese', r'\('),
('close_parenthese', r'\)'),
('comma', r','),
('open_parenthese', r'\('),
('close_parenthese', r'\)'),
('comma', r','),
+ ('colon', r':'),
+ ('period', r'\.'),
('integer_literal', r'\d+'),
('integer_literal', r'\d+'),
- ('symbol', r'[a-z]+'),
+ ('symbol', r'[a-z
_
]+'),
('single_quoted_string_literal', r"'.*?'"),
('single_quoted_string_literal', r"'.*?'"),
- ('equality_level_operator', r'(<=|>=|==|!=|<|>)'),
+ ('double_quoted_string_literal', r'".*?"'),
+ ('comparison_level_operator', r'(<=|>=|==|!=|<|>)'),
('assignment_operator', r'='),
('assignment_operator', r'='),
- ('addition_level_operator', r'(\+|-)'),
+ ('addition_level_operator', r'(\+
\+|\+
|-)'),
('multiplication_level_operator', r'(\*|//|%)'),
('multiplication_level_operator', r'(\*|//|%)'),
+ ('newline', r'\n'),
]
_TOKEN_MATCHERS = list(map(_make_token_matcher, _TOKEN_MATCHERS))
]
_TOKEN_MATCHERS = list(map(_make_token_matcher, _TOKEN_MATCHERS))
@@
-57,6
+77,12
@@
def tokenize(source):
index += 1
continue
index += 1
continue
+ if source[index] == '#':
+ while index < len(source) and source[index] != '\n':
+ index += 1
+
+ continue
+
success = False
for matcher in _TOKEN_MATCHERS:
success = False
for matcher in _TOKEN_MATCHERS:
@@
-67,11
+93,13
@@
def tokenize(source):
break
if not success:
break
if not success:
- raise Exception('Unexpected character "{}"'.format(source[index]))
+ raise Exception('Unexpected character "{}" on line {}'.format(
+ source[index],
+ line,
+ ))
-
while index < len(source) and source[index] in set(['\n'])
:
+
if token.type == 'newline'
:
line += 1
line += 1
- index += 1
if __name__ == '__main__':
import unittest
if __name__ == '__main__':
import unittest
@@
-202,7
+230,7
@@
if __name__ == '__main__':
self.assertEqual(
tokenize('=='),
(Token(
self.assertEqual(
tokenize('=='),
(Token(
- type='
equality
_level_operator',
+ type='
comparison
_level_operator',
match='==',
index=0,
line=1,
match='==',
index=0,
line=1,
@@
-213,7
+241,7
@@
if __name__ == '__main__':
self.assertEqual(
tokenize('>='),
(Token(
self.assertEqual(
tokenize('>='),
(Token(
- type='
equality
_level_operator',
+ type='
comparison
_level_operator',
match='>=',
index=0,
line=1,
match='>=',
index=0,
line=1,
@@
-224,7
+252,7
@@
if __name__ == '__main__':
self.assertEqual(
tokenize('<='),
(Token(
self.assertEqual(
tokenize('<='),
(Token(
- type='
equality
_level_operator',
+ type='
comparison
_level_operator',
match='<=',
index=0,
line=1,
match='<=',
index=0,
line=1,
@@
-235,7
+263,7
@@
if __name__ == '__main__':
self.assertEqual(
tokenize('>'),
(Token(
self.assertEqual(
tokenize('>'),
(Token(
- type='
equality
_level_operator',
+ type='
comparison
_level_operator',
match='>',
index=0,
line=1,
match='>',
index=0,
line=1,
@@
-246,7
+274,7
@@
if __name__ == '__main__':
self.assertEqual(
tokenize('<'),
(Token(
self.assertEqual(
tokenize('<'),
(Token(
- type='
equality
_level_operator',
+ type='
comparison
_level_operator',
match='<',
index=0,
line=1,
match='<',
index=0,
line=1,
@@
-257,19
+285,19
@@
if __name__ == '__main__':
self.assertEqual(
tokenize('!='),
(Token(
self.assertEqual(
tokenize('!='),
(Token(
- type='
equality
_level_operator',
+ type='
comparison
_level_operator',
match='!=',
index=0,
line=1,
),),
)
match='!=',
index=0,
line=1,
),),
)
- def test_
handles_trailing
_newline(self):
+ def test_
tokenizes
_newline(self):
self.assertEqual(
self.assertEqual(
- tokenize('
print
\n'),
+ tokenize('\n'),
(Token(
(Token(
- type='
symbol
',
- match='
print
',
+ type='
newline
',
+ match='
\n
',
index=0,
line=1,
),),
index=0,
line=1,
),),
@@
-296,6
+324,12
@@
if __name__ == '__main__':
index=0,
line=1,
),
index=0,
line=1,
),
+ Token(
+ type='newline',
+ match='\n',
+ index=5,
+ line=1,
+ ),
Token(
type='open_parenthese',
match='(',
Token(
type='open_parenthese',
match='(',