From: David Kerkeslager Date: Mon, 7 Aug 2017 04:08:09 +0000 (-0400) Subject: Add newlines to the parsing of statements X-Git-Url: https://code.kerkeslager.com/?p=fur;a=commitdiff_plain;h=730c0a3faa442985f6fdb73d91a1e6b9f72e6165 Add newlines to the parsing of statements --- diff --git a/parsing.py b/parsing.py index 5a88fdc..a7dd838 100644 --- a/parsing.py +++ b/parsing.py @@ -1,5 +1,11 @@ import collections +def consume_newlines(index, tokens): + while index < len(tokens) and tokens[index].type == 'newline': + index += 1 + + return True, index, None + def _or_parser(*parsers): def result_parser(index, tokens): failure = (False, index, None) @@ -307,7 +313,11 @@ def _assignment_statement_parser(index, tokens): return True, index, FurAssignmentStatement(target=target, expression=expression) def _statement_parser(index, tokens): - # TODO It would be good to include newlines in the parsing of this because it removes the ambiguity between "function(argument)" (one statement) and "function\n(argument)" (two statements) + _, index, _ = consume_newlines(index, tokens) + + if index == len(tokens): + return (False, index, None) + return _or_parser( _assignment_statement_parser, _expression_parser, diff --git a/tokenization.py b/tokenization.py index ff79307..3131c35 100644 --- a/tokenization.py +++ b/tokenization.py @@ -43,6 +43,7 @@ _TOKEN_MATCHERS = [ ('assignment_operator', r'='), ('addition_level_operator', r'(\+|-)'), ('multiplication_level_operator', r'(\*|//|%)'), + ('newline', r'\n'), ] _TOKEN_MATCHERS = list(map(_make_token_matcher, _TOKEN_MATCHERS)) @@ -67,11 +68,13 @@ def tokenize(source): break if not success: - raise Exception('Unexpected character "{}"'.format(source[index])) + raise Exception('Unexpected character "{}" on line {}'.format( + source[index], + line, + )) - while index < len(source) and source[index] in set(['\n']): + if token.type == 'newline': line += 1 - index += 1 if __name__ == '__main__': import unittest @@ -264,12 +267,12 @@ if __name__ == '__main__': ),), ) - def test_handles_trailing_newline(self): + def test_tokenizes_newline(self): self.assertEqual( - tokenize('print\n'), + tokenize('\n'), (Token( - type='symbol', - match='print', + type='newline', + match='\n', index=0, line=1, ),), @@ -296,6 +299,12 @@ if __name__ == '__main__': index=0, line=1, ), + Token( + type='newline', + match='\n', + index=5, + line=1, + ), Token( type='open_parenthese', match='(',