Add a very basic optimization pass

[fur] / tokenization.py
diff --git a/tokenization.py b/tokenization.py

index e6cad0a..819a0de 100644 (file)
--- a/tokenization.py
+++ b/tokenization.py
@@ -5,12 +5,19 @@ import util
  
  Token = collections.namedtuple(
      'Token',
-    [
+    (
          'type',
          'match',
+        'metadata',
+    ),
+)
+
+NodeMetadata = collections.namedtuple(
+    'NodeMetadata',
+    (
          'index',
          'line',
-    ],
+    ),
  )
  
  def _make_token_matcher(definition):
@@ -26,23 +33,36 @@ def _make_token_matcher(definition):
          return (
              True,
              index + len(match.group()),
-            Token(type=name, match=match.group(), index=index, line=line),
+            Token(
+                type=name,
+                match=match.group(),
+                metadata=NodeMetadata(
+                    index=index,
+                    line=line,
+                ),
+            ),
          )
  
      return token_matcher
  
-
  _TOKEN_MATCHERS = [
+    ('keyword',                         r'(def|do|else|end|if|lambda)(?![a-z_])'),
+    ('open_bracket',                    r'\['),
+    ('close_bracket',                   r'\]'),
      ('open_parenthese',                 r'\('),
      ('close_parenthese',                r'\)'),
      ('comma',                           r','),
+    ('colon',                           r':'),
+    ('period',                          r'\.'),
      ('integer_literal',                 r'\d+'),
-    ('symbol',                          r'[a-z]+'),
+    ('symbol',                          r'[a-z_]+'),
      ('single_quoted_string_literal',    r"'.*?'"),
-    ('equality_level_operator',         r'(<=|>=|==|!=|<|>)'),
-    ('addition_level_operator',         r'(\+|-)'),
-    ('multiplication_level_operator',   r'(\*|//|%)'),
+    ('double_quoted_string_literal',    r'".*?"'),
+    ('comparison_level_operator',       r'(<=|>=|==|!=|<|>)'),
      ('assignment_operator',             r'='),
+    ('addition_level_operator',         r'(\+\+|\+|-)'),
+    ('multiplication_level_operator',   r'(\*|//|%)'),
+    ('newline',                         r'\n'),
  ]
  
  _TOKEN_MATCHERS = list(map(_make_token_matcher, _TOKEN_MATCHERS))
@@ -57,6 +77,12 @@ def tokenize(source):
              index += 1
              continue
  
+        if source[index] == '#':
+            while index < len(source) and source[index] != '\n':
+                index += 1
+
+            continue
+
          success = False
  
          for matcher in _TOKEN_MATCHERS:
@@ -67,11 +93,13 @@ def tokenize(source):
                  break
  
          if not success:
-            raise Exception('Unexpected character "{}"'.format(source[index]))
+            raise Exception('Unexpected character "{}" on line {}'.format(
+                source[index],
+                line,
+            ))
  
-        while index < len(source) and source[index] in set(['\n']):
+        if token.type == 'newline':
              line += 1
-            index += 1
  
  if __name__ == '__main__':
      import unittest
@@ -202,7 +230,7 @@ if __name__ == '__main__':
              self.assertEqual(
                  tokenize('=='),
                  (Token(
-                    type='equality_level_operator',
+                    type='comparison_level_operator',
                      match='==',
                      index=0,
                      line=1,
@@ -213,7 +241,7 @@ if __name__ == '__main__':
              self.assertEqual(
                  tokenize('>='),
                  (Token(
-                    type='equality_level_operator',
+                    type='comparison_level_operator',
                      match='>=',
                      index=0,
                      line=1,
@@ -224,7 +252,7 @@ if __name__ == '__main__':
              self.assertEqual(
                  tokenize('<='),
                  (Token(
-                    type='equality_level_operator',
+                    type='comparison_level_operator',
                      match='<=',
                      index=0,
                      line=1,
@@ -235,7 +263,7 @@ if __name__ == '__main__':
              self.assertEqual(
                  tokenize('>'),
                  (Token(
-                    type='equality_level_operator',
+                    type='comparison_level_operator',
                      match='>',
                      index=0,
                      line=1,
@@ -246,7 +274,7 @@ if __name__ == '__main__':
              self.assertEqual(
                  tokenize('<'),
                  (Token(
-                    type='equality_level_operator',
+                    type='comparison_level_operator',
                      match='<',
                      index=0,
                      line=1,
@@ -257,19 +285,19 @@ if __name__ == '__main__':
              self.assertEqual(
                  tokenize('!='),
                  (Token(
-                    type='equality_level_operator',
+                    type='comparison_level_operator',
                      match='!=',
                      index=0,
                      line=1,
                  ),),
              )
  
-        def test_handles_trailing_newline(self):
+        def test_tokenizes_newline(self):
              self.assertEqual(
-                tokenize('print\n'),
+                tokenize('\n'),
                  (Token(
-                    type='symbol',
-                    match='print',
+                    type='newline',
+                    match='\n',
                      index=0,
                      line=1,
                  ),),
@@ -296,6 +324,12 @@ if __name__ == '__main__':
                          index=0,
                          line=1,
                      ),
+                    Token(
+                        type='newline',
+                        match='\n',
+                        index=5,
+                        line=1,
+                    ),
                      Token(
                          type='open_parenthese',
                          match='(',