Add newlines to the parsing of statements
authorDavid Kerkeslager <kerkeslager@gmail.com>
Mon, 7 Aug 2017 04:08:09 +0000 (00:08 -0400)
committerDavid Kerkeslager <kerkeslager@gmail.com>
Mon, 7 Aug 2017 04:08:09 +0000 (00:08 -0400)
parsing.py
tokenization.py

index 5a88fdc..a7dd838 100644 (file)
@@ -1,5 +1,11 @@
 import collections
 
+def consume_newlines(index, tokens):
+    while index < len(tokens) and tokens[index].type == 'newline':
+        index += 1
+
+    return True, index, None
+
 def _or_parser(*parsers):
     def result_parser(index, tokens):
         failure = (False, index, None)
@@ -307,7 +313,11 @@ def _assignment_statement_parser(index, tokens):
     return True, index, FurAssignmentStatement(target=target, expression=expression)
 
 def _statement_parser(index, tokens):
-    # TODO It would be good to include newlines in the parsing of this because it removes the ambiguity between "function(argument)" (one statement) and "function\n(argument)" (two statements)
+    _, index, _ = consume_newlines(index, tokens)
+
+    if index == len(tokens):
+        return (False, index, None)
+
     return _or_parser(
         _assignment_statement_parser,
         _expression_parser,
index ff79307..3131c35 100644 (file)
@@ -43,6 +43,7 @@ _TOKEN_MATCHERS = [
     ('assignment_operator',             r'='),
     ('addition_level_operator',         r'(\+|-)'),
     ('multiplication_level_operator',   r'(\*|//|%)'),
+    ('newline',                         r'\n'),
 ]
 
 _TOKEN_MATCHERS = list(map(_make_token_matcher, _TOKEN_MATCHERS))
@@ -67,11 +68,13 @@ def tokenize(source):
                 break
 
         if not success:
-            raise Exception('Unexpected character "{}"'.format(source[index]))
+            raise Exception('Unexpected character "{}" on line {}'.format(
+                source[index],
+                line,
+            ))
 
-        while index < len(source) and source[index] in set(['\n']):
+        if token.type == 'newline':
             line += 1
-            index += 1
 
 if __name__ == '__main__':
     import unittest
@@ -264,12 +267,12 @@ if __name__ == '__main__':
                 ),),
             )
 
-        def test_handles_trailing_newline(self):
+        def test_tokenizes_newline(self):
             self.assertEqual(
-                tokenize('print\n'),
+                tokenize('\n'),
                 (Token(
-                    type='symbol',
-                    match='print',
+                    type='newline',
+                    match='\n',
                     index=0,
                     line=1,
                 ),),
@@ -296,6 +299,12 @@ if __name__ == '__main__':
                         index=0,
                         line=1,
                     ),
+                    Token(
+                        type='newline',
+                        match='\n',
+                        index=5,
+                        line=1,
+                    ),
                     Token(
                         type='open_parenthese',
                         match='(',