X-Git-Url: https://code.kerkeslager.com/?p=fur;a=blobdiff_plain;f=parsing.py;h=b2a14a888b820346c0f0086beb52e4b58c38e4a0;hp=5a88fdcc472c5c77265d617342fec8b5a925ddea;hb=3da330f045ed7fcb66ee9d9447de320680263699;hpb=eab88b322191c40168553b8671b968d1b1558084 diff --git a/parsing.py b/parsing.py index 5a88fdc..b2a14a8 100644 --- a/parsing.py +++ b/parsing.py @@ -1,5 +1,11 @@ import collections +def consume_newlines(index, tokens): + while index < len(tokens) and tokens[index].type == 'newline': + index += 1 + + return True, index, None + def _or_parser(*parsers): def result_parser(index, tokens): failure = (False, index, None) @@ -33,21 +39,21 @@ def _zero_or_more_parser(formatter, parser): FurIntegerLiteralExpression = collections.namedtuple( 'FurIntegerLiteralExpression', [ - 'value', + 'integer', ], ) FurStringLiteralExpression = collections.namedtuple( 'FurStringLiteralExpression', [ - 'value', + 'string', ], ) FurSymbolExpression = collections.namedtuple( 'FurSymbolExpression', [ - 'value', + 'symbol', ], ) @@ -58,13 +64,6 @@ FurNegationExpression = collections.namedtuple( ], ) -FurParenthesizedExpression = collections.namedtuple( - 'FurParenthesizedExpression', - [ - 'internal', - ], -) - FurInfixExpression = collections.namedtuple( 'FurInfixExpression', [ @@ -83,41 +82,47 @@ def _integer_literal_expression_parser(index, tokens): value = int(tokens[index].match) index += 1 - return True, index, FurIntegerLiteralExpression(value=value) + return True, index, FurIntegerLiteralExpression(integer=value) def _string_literal_expression_parser(index, tokens): if tokens[index].type == 'single_quoted_string_literal': - return (True, index + 1, FurStringLiteralExpression(value=tokens[index].match[1:-1])) + return (True, index + 1, FurStringLiteralExpression(string=tokens[index].match[1:-1])) return (False, index, None) def _symbol_expression_parser(index, tokens): if tokens[index].type == 'symbol': - return (True, index + 1, FurSymbolExpression(value=tokens[index].match)) + return (True, index + 1, FurSymbolExpression(symbol=tokens[index].match)) return (False, index, None) -def _parenthesized_expression_parser(index, tokens): - failure = (False, index, None) +def _parenthese_wrapped_parser(internal_parser): + def result_parser(index, tokens): + failure = (False, index, None) - if tokens[index].type == 'open_parenthese': - index += 1 - else: - return failure + if tokens[index].type == 'open_parenthese': + index += 1 + else: + return failure - success, index, internal = _expression_parser(index, tokens) - if not success: - return failure + success, index, internal = internal_parser(index, tokens) + if not success: + return failure - if tokens[index].type == 'close_parenthese': - index += 1 - else: - raise Exception('Expected ")" on line {}, found "{}"'.format( - tokens[index].line, - tokens[index].match, - )) + if tokens[index].type == 'close_parenthese': + index += 1 + else: + raise Exception('Expected ")" on line {}, found "{}"'.format( + tokens[index].line, + tokens[index].match, + )) - return True, index, FurParenthesizedExpression(internal=internal) + return True, index, internal + + return result_parser + +def _parenthesized_expression_parser(index, tokens): + return _parenthese_wrapped_parser(_expression_parser)(index, tokens) def _negation_expression_parser(index, tokens): failure = (False, index, None) @@ -184,17 +189,17 @@ def _addition_level_expression_parser(index, tokens): 'addition_level', )(index, tokens) -def _equality_level_expression_parser(index, tokens): +def _comparison_level_expression_parser(index, tokens): return _left_recursive_infix_operator_parser( - lambda token: token.type == 'equality_level_operator', + lambda token: token.type == 'comparison_level_operator', _addition_level_expression_parser, - 'equality_level', + 'comparison_level', )(index, tokens) def _and_level_expression_parser(index, tokens): return _left_recursive_infix_operator_parser( lambda token: token.type == 'symbol' and token.match == 'and', - _equality_level_expression_parser, + _comparison_level_expression_parser, 'and_level', )(index, tokens) @@ -205,30 +210,35 @@ def _or_level_expression_parser(index, tokens): 'or_level', )(index, tokens) -def _comma_separated_list_parser(index, tokens): - failure = (False, index, None) +def _comma_separated_list_parser(subparser): + def result_parser(index, tokens): + start_index = index - expressions = [] + items = [] - success, index, expression = _expression_parser(index, tokens) + success, index, item = subparser(index, tokens) - if success: - expressions.append(expression) - else: - return failure + if success: + items.append(item) + else: + return (True, start_index, ()) + + while success and index < len(tokens) and tokens[index].type == 'comma': + success = False - while success and index < len(tokens) and tokens[index].type == 'comma': - success = False + if index + 1 < len(tokens): + success, try_index, item = subparser(index + 1, tokens) - if index + 1 < len(tokens): - success, try_index, expression = _expression_parser(index + 1, tokens) + if success: + items.append(item) + index = try_index - if success: - expressions.append(expression) - index = try_index + return True, index, tuple(items) - return True, index, tuple(expressions) + return result_parser +def _comma_separated_expression_list_parser(index, tokens): + return _comma_separated_list_parser(_expression_parser)(index, tokens) FurFunctionCallExpression = collections.namedtuple( 'FurFunctionCallExpression', @@ -238,6 +248,13 @@ FurFunctionCallExpression = collections.namedtuple( ], ) +FurExpressionStatement = collections.namedtuple( + 'FurExpressionStatement', + [ + 'expression', + ], +) + FurAssignmentStatement = collections.namedtuple( 'FurAssignmentStatement', [ @@ -246,6 +263,15 @@ FurAssignmentStatement = collections.namedtuple( ], ) +FurFunctionDefinitionStatement = collections.namedtuple( + 'FurFunctionDefinitionStatement', + [ + 'name', + 'argument_name_list', + 'statement_list', + ], +) + FurProgram = collections.namedtuple( 'FurProgram', [ @@ -254,46 +280,75 @@ FurProgram = collections.namedtuple( ) def _function_call_expression_parser(index, tokens): - # TODO Use a FurSymbolExpression for the name failure = (False, index, None) - success, index, function = _symbol_expression_parser(index, tokens) + # We have to be careful what expressions we add here. Otherwise expressions + # like "a + b()" become ambiguous to the parser. + success, index, function = _or_parser( + _symbol_expression_parser, + _parenthesized_expression_parser, + )(index, tokens) if not success: return failure - if tokens[index].type != 'open_parenthese': - return failure - index += 1 - - success, index, arguments = _comma_separated_list_parser(index, tokens) + success, index, arguments = _parenthese_wrapped_parser(_comma_separated_expression_list_parser)( + index, + tokens, + ) if not success: return failure - if tokens[index].type != 'close_parenthese': - raise Exception('Expected ")", found "{}" on line {}'.format( - tokens[index].match, - tokens[index].line, - )) - index += 1 + while success and index < len(tokens): + # "function" is actually the full function call if the next parse attempt doesn't succeed + # We can't give this a better name without a bunch of checks, however. + function = FurFunctionCallExpression( + function=function, + arguments=arguments, + ) - return True, index, FurFunctionCallExpression(function=function, arguments=arguments) + success, index, arguments = _parenthese_wrapped_parser(_comma_separated_expression_list_parser)( + index, + tokens, + ) + + return True, index, function _expression_parser = _or_level_expression_parser +def _expression_statement_parser(index, tokens): + failure = (False, index, None) + + success, index, expression = _expression_parser(index, tokens) + + if not success: + return failure + + return (True, index, FurExpressionStatement(expression=expression)) + +BUILTINS = {'print', 'pow'} + def _assignment_statement_parser(index, tokens): - # TODO Use a FurSymbolExpression for the target? Maybe this is actually not a good idea failure = (False, index, None) - if tokens[index].type != 'symbol': + if tokens[index].type == 'symbol': + target = tokens[index].match + target_assignment_line = tokens[index].line + + index += 1 + else: return failure - target = tokens[index].match - index += 1 - if tokens[index].type != 'assignment_operator': + + if tokens[index].type == 'assignment_operator': + if target in BUILTINS: + raise Exception( + 'Trying to assign to builtin "{}" on line {}'.format(target, target_assignment_line), + ) + assignment_operator_index = index + else: return failure - assignment_operator_index = index success, index, expression = _expression_parser(index + 1, tokens) @@ -306,11 +361,74 @@ def _assignment_statement_parser(index, tokens): return True, index, FurAssignmentStatement(target=target, expression=expression) +def _function_definition_statement_parser(index, tokens): + failure = (False, index, None) + + if tokens[index].type == 'keyword' and tokens[index].match == 'def': + index += 1 + else: + return failure + + if tokens[index].type == 'symbol': + name = tokens[index].match + index += 1 + else: + raise Exception('Expected function name, found "{}" on line {}'.format( + tokens[index].match, + tokens[index].line, + )) + + if tokens[index].type == 'open_parenthese': + index += 1 + else: + raise Exception('Expected "(", found "{}" on line {}'.format( + tokens[index].match, + tokens[index].line, + )) + + success, index, argument_name_list = _comma_separated_list_parser(_symbol_expression_parser)( + index, + tokens, + ) + + if tokens[index].type == 'close_parenthese': + index += 1 + else: + raise Exception('Expected ")", found "{}" on line {}'.format( + tokens[index].match, + tokens[index].line, + )) + + if tokens[index].type == 'symbol' and tokens[index].match == 'do': + index += 1 + else: + return failure + + success, index, statement_list = _zero_or_more_parser(tuple, _statement_parser)(index, tokens) + + _, index, _ = consume_newlines(index, tokens) + + if tokens[index].type == 'keyword' and tokens[index].match == 'end': + index += 1 + else: + return failure + + return True, index, FurFunctionDefinitionStatement( + name=name, + argument_name_list=tuple(an.symbol for an in argument_name_list), + statement_list=statement_list, + ) + def _statement_parser(index, tokens): - # TODO It would be good to include newlines in the parsing of this because it removes the ambiguity between "function(argument)" (one statement) and "function\n(argument)" (two statements) + _, index, _ = consume_newlines(index, tokens) + + if index == len(tokens): + return (False, index, None) + return _or_parser( _assignment_statement_parser, - _expression_parser, + _expression_statement_parser, + _function_definition_statement_parser, )(index, tokens) def _program_formatter(statement_list): @@ -344,7 +462,7 @@ if __name__ == '__main__': ( True, 1, - FurStringLiteralExpression(value='Hello, world'), + FurStringLiteralExpression(string='Hello, world'), ), ) @@ -357,7 +475,7 @@ if __name__ == '__main__': 4, FurFunctionCallExpression( name='print', - arguments=(FurStringLiteralExpression(value='Hello, world'),), + arguments=(FurStringLiteralExpression(string='Hello, world'),), ), ), )