Implement negatives, use typedef
[fur] / tokenization.py
index 3131c35..819a0de 100644 (file)
@@ -5,12 +5,19 @@ import util
 
 Token = collections.namedtuple(
     'Token',
-    [
+    (
         'type',
         'match',
+        'metadata',
+    ),
+)
+
+NodeMetadata = collections.namedtuple(
+    'NodeMetadata',
+    (
         'index',
         'line',
-    ],
+    ),
 )
 
 def _make_token_matcher(definition):
@@ -26,22 +33,34 @@ def _make_token_matcher(definition):
         return (
             True,
             index + len(match.group()),
-            Token(type=name, match=match.group(), index=index, line=line),
+            Token(
+                type=name,
+                match=match.group(),
+                metadata=NodeMetadata(
+                    index=index,
+                    line=line,
+                ),
+            ),
         )
 
     return token_matcher
 
-
 _TOKEN_MATCHERS = [
+    ('keyword',                         r'(def|do|else|end|if|lambda)(?![a-z_])'),
+    ('open_bracket',                    r'\['),
+    ('close_bracket',                   r'\]'),
     ('open_parenthese',                 r'\('),
     ('close_parenthese',                r'\)'),
     ('comma',                           r','),
+    ('colon',                           r':'),
+    ('period',                          r'\.'),
     ('integer_literal',                 r'\d+'),
-    ('symbol',                          r'[a-z]+'),
+    ('symbol',                          r'[a-z_]+'),
     ('single_quoted_string_literal',    r"'.*?'"),
-    ('equality_level_operator',         r'(<=|>=|==|!=|<|>)'),
+    ('double_quoted_string_literal',    r'".*?"'),
+    ('comparison_level_operator',       r'(<=|>=|==|!=|<|>)'),
     ('assignment_operator',             r'='),
-    ('addition_level_operator',         r'(\+|-)'),
+    ('addition_level_operator',         r'(\+\+|\+|-)'),
     ('multiplication_level_operator',   r'(\*|//|%)'),
     ('newline',                         r'\n'),
 ]
@@ -58,6 +77,12 @@ def tokenize(source):
             index += 1
             continue
 
+        if source[index] == '#':
+            while index < len(source) and source[index] != '\n':
+                index += 1
+
+            continue
+
         success = False
 
         for matcher in _TOKEN_MATCHERS:
@@ -205,7 +230,7 @@ if __name__ == '__main__':
             self.assertEqual(
                 tokenize('=='),
                 (Token(
-                    type='equality_level_operator',
+                    type='comparison_level_operator',
                     match='==',
                     index=0,
                     line=1,
@@ -216,7 +241,7 @@ if __name__ == '__main__':
             self.assertEqual(
                 tokenize('>='),
                 (Token(
-                    type='equality_level_operator',
+                    type='comparison_level_operator',
                     match='>=',
                     index=0,
                     line=1,
@@ -227,7 +252,7 @@ if __name__ == '__main__':
             self.assertEqual(
                 tokenize('<='),
                 (Token(
-                    type='equality_level_operator',
+                    type='comparison_level_operator',
                     match='<=',
                     index=0,
                     line=1,
@@ -238,7 +263,7 @@ if __name__ == '__main__':
             self.assertEqual(
                 tokenize('>'),
                 (Token(
-                    type='equality_level_operator',
+                    type='comparison_level_operator',
                     match='>',
                     index=0,
                     line=1,
@@ -249,7 +274,7 @@ if __name__ == '__main__':
             self.assertEqual(
                 tokenize('<'),
                 (Token(
-                    type='equality_level_operator',
+                    type='comparison_level_operator',
                     match='<',
                     index=0,
                     line=1,
@@ -260,7 +285,7 @@ if __name__ == '__main__':
             self.assertEqual(
                 tokenize('!='),
                 (Token(
-                    type='equality_level_operator',
+                    type='comparison_level_operator',
                     match='!=',
                     index=0,
                     line=1,