X-Git-Url: https://code.kerkeslager.com/?a=blobdiff_plain;f=parsing.py;h=041f3aa8e0f893072e191328f58e457b0f33a2e4;hb=059e6ff380d17a715ffbd2d55ac59e39c931a954;hp=03a17445210cf17ca046632fc26904ebe662c451;hpb=db1651d2c0e44a380f876b452f30c2244d3a0d06;p=fur diff --git a/parsing.py b/parsing.py index 03a1744..041f3aa 100644 --- a/parsing.py +++ b/parsing.py @@ -1,5 +1,11 @@ import collections +def consume_newlines(index, tokens): + while index < len(tokens) and tokens[index].type == 'newline': + index += 1 + + return True, index, None + def _or_parser(*parsers): def result_parser(index, tokens): failure = (False, index, None) @@ -30,21 +36,53 @@ def _zero_or_more_parser(formatter, parser): return result_parser -IntegerLiteral = collections.namedtuple( - 'IntegerLiteral', +FurIntegerLiteralExpression = collections.namedtuple( + 'FurIntegerLiteralExpression', + [ + 'value', + ], +) + +FurStringLiteralExpression = collections.namedtuple( + 'FurStringLiteralExpression', [ 'value', ], ) -StringLiteral = collections.namedtuple( - 'StringLiteral', +FurSymbolExpression = collections.namedtuple( + 'FurSymbolExpression', [ 'value', ], ) -def _integer_literal_parser(index, tokens): +FurNegationExpression = collections.namedtuple( + 'FurNegationExpression', + [ + 'value', + ], +) + +# TODO We don't need to wrap this type +FurParenthesizedExpression = collections.namedtuple( + 'FurParenthesizedExpression', + [ + 'internal', + ], +) + +FurInfixExpression = collections.namedtuple( + 'FurInfixExpression', + [ + 'order', + 'operator', + 'left', + 'right', + ], +) + +def _integer_literal_expression_parser(index, tokens): failure = (False, index, None) if tokens[index].type != 'integer_literal': @@ -52,28 +90,184 @@ def _integer_literal_parser(index, tokens): value = int(tokens[index].match) index += 1 - return True, index, IntegerLiteral(value=value) + return True, index, FurIntegerLiteralExpression(value=value) + +def _string_literal_expression_parser(index, tokens): + if tokens[index].type == 'single_quoted_string_literal': + return (True, index + 1, FurStringLiteralExpression(value=tokens[index].match[1:-1])) + + return (False, index, None) -def _string_literal_parser(index, tokens): +def _symbol_expression_parser(index, tokens): + if tokens[index].type == 'symbol': + return (True, index + 1, FurSymbolExpression(value=tokens[index].match)) + + return (False, index, None) + +def _parenthesized_expression_parser(index, tokens): failure = (False, index, None) - if tokens[index].type != 'single_quoted_string_literal': + if tokens[index].type == 'open_parenthese': + index += 1 + else: return failure - value = tokens[index].match[1:-1] - index += 1 - return True, index, StringLiteral(value=value) + success, index, internal = _expression_parser(index, tokens) + if not success: + return failure + + if tokens[index].type == 'close_parenthese': + index += 1 + else: + raise Exception('Expected ")" on line {}, found "{}"'.format( + tokens[index].line, + tokens[index].match, + )) + + return True, index, FurParenthesizedExpression(internal=internal) + +def _negation_expression_parser(index, tokens): + failure = (False, index, None) + + if tokens[index].match != '-': + return failure + + success, index, value = _literal_level_expression_parser(index + 1, tokens) + + if not success: + return failure + + return (True, index, FurNegationExpression(value=value)) + +def _literal_level_expression_parser(index, tokens): + return _or_parser( + _negation_expression_parser, + _function_call_expression_parser, + _parenthesized_expression_parser, + _integer_literal_expression_parser, + _string_literal_expression_parser, + _symbol_expression_parser, + )(index, tokens) + +def _left_recursive_infix_operator_parser(operator_token_matcher, operand_parser, order): + def result_parser(index, tokens): + failure = (False, index, None) + + success, index, result = operand_parser(index, tokens) + + if not success: + return failure + + while success and index < len(tokens) and operator_token_matcher(tokens[index]): + success = False + + if index + 1 < len(tokens): + success, try_index, value = operand_parser(index + 1, tokens) + + if success: + result = FurInfixExpression( + order=order, + operator=tokens[index].match, + left=result, + right=value, + ) + index = try_index + + return True, index, result + + return result_parser + +def _multiplication_level_expression_parser(index, tokens): + return _left_recursive_infix_operator_parser( + lambda token: token.type == 'multiplication_level_operator', + _literal_level_expression_parser, + 'multiplication_level', + )(index, tokens) + +def _addition_level_expression_parser(index, tokens): + return _left_recursive_infix_operator_parser( + lambda token: token.type == 'addition_level_operator', + _multiplication_level_expression_parser, + 'addition_level', + )(index, tokens) + +def _comparison_level_expression_parser(index, tokens): + return _left_recursive_infix_operator_parser( + lambda token: token.type == 'comparison_level_operator', + _addition_level_expression_parser, + 'comparison_level', + )(index, tokens) + +def _and_level_expression_parser(index, tokens): + return _left_recursive_infix_operator_parser( + lambda token: token.type == 'symbol' and token.match == 'and', + _comparison_level_expression_parser, + 'and_level', + )(index, tokens) + +def _or_level_expression_parser(index, tokens): + return _left_recursive_infix_operator_parser( + lambda token: token.type == 'symbol' and token.match == 'or', + _and_level_expression_parser, + 'or_level', + )(index, tokens) + +def _comma_separated_list_parser(index, tokens): + start_index = index + + expressions = [] + + success, index, expression = _expression_parser(index, tokens) + + if success: + expressions.append(expression) + else: + return (True, start_index, ()) + + while success and index < len(tokens) and tokens[index].type == 'comma': + success = False + + if index + 1 < len(tokens): + success, try_index, expression = _expression_parser(index + 1, tokens) + + if success: + expressions.append(expression) + index = try_index -_argument_parser = _or_parser(_integer_literal_parser, _string_literal_parser) + return True, index, tuple(expressions) -FunctionCall = collections.namedtuple( - 'FunctionCall', + +FurFunctionCallExpression = collections.namedtuple( + 'FurFunctionCallExpression', [ - 'name', + 'function', 'arguments', ], ) +FurExpressionStatement = collections.namedtuple( + 'FurExpressionStatement', + [ + 'expression', + ], +) + +FurAssignmentStatement = collections.namedtuple( + 'FurAssignmentStatement', + [ + 'target', + 'expression', + ], +) + +FurFunctionDefinitionStatement = collections.namedtuple( + 'FurFunctionDefinitionStatement', + [ + 'name', + 'statement_list', + ], +) + FurProgram = collections.namedtuple( 'FurProgram', [ @@ -81,33 +275,134 @@ FurProgram = collections.namedtuple( ], ) -def _function_call_parser(index, tokens): +def _function_call_expression_parser(index, tokens): + # TODO Use a FurSymbolExpression for the name failure = (False, index, None) - if tokens[index].type != 'symbol': + success, index, function = _symbol_expression_parser(index, tokens) + + if not success: return failure - name = tokens[index].match - index += 1 if tokens[index].type != 'open_parenthese': return failure index += 1 - success, index, argument = _argument_parser(index, tokens) + success, index, arguments = _comma_separated_list_parser(index, tokens) if not success: return failure if tokens[index].type != 'close_parenthese': + raise Exception('Expected ")", found "{}" on line {}'.format( + tokens[index].match, + tokens[index].line, + )) + index += 1 + + return True, index, FurFunctionCallExpression(function=function, arguments=arguments) + +_expression_parser = _or_level_expression_parser + +def _expression_statement_parser(index, tokens): + failure = (False, index, None) + + success, index, expression = _expression_parser(index, tokens) + + if not success: return failure + + return (True, index, FurExpressionStatement(expression=expression)) + +def _assignment_statement_parser(index, tokens): + # TODO Use a FurSymbolExpression for the target? Maybe this is actually not a good idea + failure = (False, index, None) + + if tokens[index].type != 'symbol': + return failure + target = tokens[index].match index += 1 - - return True, index, FunctionCall(name=name, arguments=(argument,)) + + if tokens[index].type != 'assignment_operator': + return failure + assignment_operator_index = index + + success, index, expression = _expression_parser(index + 1, tokens) + + if not success: + raise Exception( + 'Expected expression after assignment operator on line {}'.format( + tokens[assignment_operator_index].line + ) + ) + + return True, index, FurAssignmentStatement(target=target, expression=expression) + +def _function_definition_statement_parser(index, tokens): + failure = (False, index, None) + + if tokens[index].type == 'keyword' and tokens[index].match == 'def': + index += 1 + else: + return failure + + if tokens[index].type == 'symbol': + name = tokens[index].match + index += 1 + else: + raise Exception('Expected function name, found "{}" on line {}'.format( + tokens[index].match, + tokens[index].line, + )) + + if tokens[index].type == 'open_parenthese': + index += 1 + else: + raise Exception('Expected "(", found "{}" on line {}'.format( + tokens[index].match, + tokens[index].line, + )) + + if tokens[index].type == 'close_parenthese': + index += 1 + else: + raise Exception('Expected ")", found "{}" on line {}'.format( + tokens[index].match, + tokens[index].line, + )) + + if tokens[index].type == 'symbol' and tokens[index].match == 'do': + index += 1 + else: + return failure + + success, index, statement_list = _zero_or_more_parser(tuple, _statement_parser)(index, tokens) + + _, index, _ = consume_newlines(index, tokens) + + if tokens[index].type == 'keyword' and tokens[index].match == 'end': + index += 1 + else: + return failure + + return True, index, FurFunctionDefinitionStatement(name=name, statement_list=statement_list) + +def _statement_parser(index, tokens): + _, index, _ = consume_newlines(index, tokens) + + if index == len(tokens): + return (False, index, None) + + return _or_parser( + _assignment_statement_parser, + _expression_statement_parser, + _function_definition_statement_parser, + )(index, tokens) def _program_formatter(statement_list): return FurProgram(statement_list=statement_list) -_program_parser = _zero_or_more_parser(_program_formatter, _function_call_parser) +_program_parser = _zero_or_more_parser(_program_formatter, _statement_parser) def _parse(parser, tokens): success, index, result = parser(0, tokens) @@ -120,7 +415,6 @@ def _parse(parser, tokens): raise Exception('Unable to parse') - def parse(tokens): return _parse(_program_parser, tokens) @@ -129,27 +423,27 @@ if __name__ == '__main__': import tokenization - class StringLiteralParserTests(unittest.TestCase): + class FurStringLiteralExpressionParserTests(unittest.TestCase): def test_parses_single_quoted_string_literal(self): self.assertEqual( - _string_literal_parser(0, tokenization.tokenize("'Hello, world'")), + _string_literal_expression_parser(0, tokenization.tokenize("'Hello, world'")), ( True, 1, - StringLiteral(value='Hello, world'), + FurStringLiteralExpression(value='Hello, world'), ), ) - class FunctionCallParserTests(unittest.TestCase): + class FurFunctionCallExpressionParserTests(unittest.TestCase): def test_parses_function_with_string_literal_argument(self): self.assertEqual( - _function_call_parser(0, tokenization.tokenize("print('Hello, world')")), + _function_call_expression_parser(0, tokenization.tokenize("print('Hello, world')")), ( True, 4, - FunctionCall( + FurFunctionCallExpression( name='print', - arguments=(StringLiteral(value='Hello, world'),), + arguments=(FurStringLiteralExpression(value='Hello, world'),), ), ), )