X-Git-Url: https://code.kerkeslager.com/?a=blobdiff_plain;f=parsing.py;h=041f3aa8e0f893072e191328f58e457b0f33a2e4;hb=c9f47106ccf806533783a551b5c51d9280869635;hp=1ab6df4e5534aec97f68a454b3afbccabc133cfd;hpb=f60d1b48bbf73c51d214c5ae5c22ea3cdee087c1;p=fur diff --git a/parsing.py b/parsing.py index 1ab6df4..041f3aa 100644 --- a/parsing.py +++ b/parsing.py @@ -1,5 +1,11 @@ import collections +def consume_newlines(index, tokens): + while index < len(tokens) and tokens[index].type == 'newline': + index += 1 + + return True, index, None + def _or_parser(*parsers): def result_parser(index, tokens): failure = (False, index, None) @@ -44,41 +50,33 @@ FurStringLiteralExpression = collections.namedtuple( ], ) -FurAdditionExpression = collections.namedtuple( - 'FurAdditionExpression', +FurSymbolExpression = collections.namedtuple( + 'FurSymbolExpression', [ - 'left', - 'right', - ], -) - -FurSubtractionExpression = collections.namedtuple( - 'FurSubtractionExpression', - [ - 'left', - 'right', + 'value', ], ) -FurMultiplicationExpression = collections.namedtuple( - 'FurMultiplicationExpression', +FurNegationExpression = collections.namedtuple( + 'FurNegationExpression', [ - 'left', - 'right', + 'value', ], ) -FurIntegerDivisionExpression = collections.namedtuple( - 'FurIntegerDivisionExpression', +# TODO We don't need to wrap this type +FurParenthesizedExpression = collections.namedtuple( + 'FurParenthesizedExpression', [ - 'left', - 'right', + 'internal', ], ) -FurModularDivisionExpression = collections.namedtuple( - 'FurModularDivisionExpression', +FurInfixExpression = collections.namedtuple( + 'FurInfixExpression', [ + 'order', + 'operator', 'left', 'right', ], @@ -95,86 +93,142 @@ def _integer_literal_expression_parser(index, tokens): return True, index, FurIntegerLiteralExpression(value=value) def _string_literal_expression_parser(index, tokens): + if tokens[index].type == 'single_quoted_string_literal': + return (True, index + 1, FurStringLiteralExpression(value=tokens[index].match[1:-1])) + + return (False, index, None) + +def _symbol_expression_parser(index, tokens): + if tokens[index].type == 'symbol': + return (True, index + 1, FurSymbolExpression(value=tokens[index].match)) + + return (False, index, None) + +def _parenthesized_expression_parser(index, tokens): failure = (False, index, None) - if tokens[index].type != 'single_quoted_string_literal': + if tokens[index].type == 'open_parenthese': + index += 1 + else: return failure - value = tokens[index].match[1:-1] - index += 1 - return True, index, FurStringLiteralExpression(value=value) + success, index, internal = _expression_parser(index, tokens) + if not success: + return failure + + if tokens[index].type == 'close_parenthese': + index += 1 + else: + raise Exception('Expected ")" on line {}, found "{}"'.format( + tokens[index].line, + tokens[index].match, + )) + + return True, index, FurParenthesizedExpression(internal=internal) + +def _negation_expression_parser(index, tokens): + failure = (False, index, None) + + if tokens[index].match != '-': + return failure + + success, index, value = _literal_level_expression_parser(index + 1, tokens) + + if not success: + return failure + + return (True, index, FurNegationExpression(value=value)) def _literal_level_expression_parser(index, tokens): return _or_parser( + _negation_expression_parser, _function_call_expression_parser, + _parenthesized_expression_parser, _integer_literal_expression_parser, _string_literal_expression_parser, + _symbol_expression_parser, )(index, tokens) -def _multiplication_level_expression_parser(index, tokens): - failure = (False, index, None) - - success, index, result = _literal_level_expression_parser(index, tokens) +def _left_recursive_infix_operator_parser(operator_token_matcher, operand_parser, order): + def result_parser(index, tokens): + failure = (False, index, None) - if not success: - return failure + success, index, result = operand_parser(index, tokens) - while success and index < len(tokens) and tokens[index].type == 'multiplication_level_operator': - success = False + if not success: + return failure - if index + 1 < len(tokens): - success, try_index, value = _literal_level_expression_parser(index + 1, tokens) + while success and index < len(tokens) and operator_token_matcher(tokens[index]): + success = False - if success: - result = { - '*': FurMultiplicationExpression, - '//': FurIntegerDivisionExpression, - '%': FurModularDivisionExpression, - }[tokens[index].match](left=result, right=value) - index = try_index + if index + 1 < len(tokens): + success, try_index, value = operand_parser(index + 1, tokens) - return True, index, result + if success: + result = FurInfixExpression( + order=order, + operator=tokens[index].match, + left=result, + right=value, + ) + index = try_index -def _addition_level_expression_parser(index, tokens): - failure = (False, index, None) + return True, index, result - success, index, result = _multiplication_level_expression_parser(index, tokens) + return result_parser - if not success: - return failure +def _multiplication_level_expression_parser(index, tokens): + return _left_recursive_infix_operator_parser( + lambda token: token.type == 'multiplication_level_operator', + _literal_level_expression_parser, + 'multiplication_level', + )(index, tokens) - while success and index < len(tokens) and tokens[index].type == 'addition_level_operator': - success = False +def _addition_level_expression_parser(index, tokens): + return _left_recursive_infix_operator_parser( + lambda token: token.type == 'addition_level_operator', + _multiplication_level_expression_parser, + 'addition_level', + )(index, tokens) - if index + 1 < len(tokens): - success, try_index, value = _multiplication_level_expression_parser(index + 1, tokens) +def _comparison_level_expression_parser(index, tokens): + return _left_recursive_infix_operator_parser( + lambda token: token.type == 'comparison_level_operator', + _addition_level_expression_parser, + 'comparison_level', + )(index, tokens) - if success: - result = { - '+': FurAdditionExpression, - '-': FurSubtractionExpression, - }[tokens[index].match](left=result, right=value) - index = try_index +def _and_level_expression_parser(index, tokens): + return _left_recursive_infix_operator_parser( + lambda token: token.type == 'symbol' and token.match == 'and', + _comparison_level_expression_parser, + 'and_level', + )(index, tokens) - return True, index, result +def _or_level_expression_parser(index, tokens): + return _left_recursive_infix_operator_parser( + lambda token: token.type == 'symbol' and token.match == 'or', + _and_level_expression_parser, + 'or_level', + )(index, tokens) def _comma_separated_list_parser(index, tokens): - failure = (False, index, None) + start_index = index expressions = [] - success, index, expression = _addition_level_expression_parser(index, tokens) + success, index, expression = _expression_parser(index, tokens) if success: expressions.append(expression) else: - return failure + return (True, start_index, ()) while success and index < len(tokens) and tokens[index].type == 'comma': success = False if index + 1 < len(tokens): - success, try_index, expression = _addition_level_expression_parser(index + 1, tokens) + success, try_index, expression = _expression_parser(index + 1, tokens) if success: expressions.append(expression) @@ -186,11 +240,34 @@ def _comma_separated_list_parser(index, tokens): FurFunctionCallExpression = collections.namedtuple( 'FurFunctionCallExpression', [ - 'name', + 'function', 'arguments', ], ) +FurExpressionStatement = collections.namedtuple( + 'FurExpressionStatement', + [ + 'expression', + ], +) + +FurAssignmentStatement = collections.namedtuple( + 'FurAssignmentStatement', + [ + 'target', + 'expression', + ], +) + +FurFunctionDefinitionStatement = collections.namedtuple( + 'FurFunctionDefinitionStatement', + [ + 'name', + 'statement_list', + ], +) + FurProgram = collections.namedtuple( 'FurProgram', [ @@ -199,12 +276,13 @@ FurProgram = collections.namedtuple( ) def _function_call_expression_parser(index, tokens): + # TODO Use a FurSymbolExpression for the name failure = (False, index, None) - if tokens[index].type != 'symbol': + success, index, function = _symbol_expression_parser(index, tokens) + + if not success: return failure - name = tokens[index].match - index += 1 if tokens[index].type != 'open_parenthese': return failure @@ -222,12 +300,109 @@ def _function_call_expression_parser(index, tokens): )) index += 1 - return True, index, FurFunctionCallExpression(name=name, arguments=arguments) + return True, index, FurFunctionCallExpression(function=function, arguments=arguments) + +_expression_parser = _or_level_expression_parser + +def _expression_statement_parser(index, tokens): + failure = (False, index, None) + + success, index, expression = _expression_parser(index, tokens) + + if not success: + return failure + + return (True, index, FurExpressionStatement(expression=expression)) + +def _assignment_statement_parser(index, tokens): + # TODO Use a FurSymbolExpression for the target? Maybe this is actually not a good idea + failure = (False, index, None) + + if tokens[index].type != 'symbol': + return failure + target = tokens[index].match + index += 1 + + if tokens[index].type != 'assignment_operator': + return failure + assignment_operator_index = index + + success, index, expression = _expression_parser(index + 1, tokens) + + if not success: + raise Exception( + 'Expected expression after assignment operator on line {}'.format( + tokens[assignment_operator_index].line + ) + ) + + return True, index, FurAssignmentStatement(target=target, expression=expression) + +def _function_definition_statement_parser(index, tokens): + failure = (False, index, None) + + if tokens[index].type == 'keyword' and tokens[index].match == 'def': + index += 1 + else: + return failure + + if tokens[index].type == 'symbol': + name = tokens[index].match + index += 1 + else: + raise Exception('Expected function name, found "{}" on line {}'.format( + tokens[index].match, + tokens[index].line, + )) + + if tokens[index].type == 'open_parenthese': + index += 1 + else: + raise Exception('Expected "(", found "{}" on line {}'.format( + tokens[index].match, + tokens[index].line, + )) + + if tokens[index].type == 'close_parenthese': + index += 1 + else: + raise Exception('Expected ")", found "{}" on line {}'.format( + tokens[index].match, + tokens[index].line, + )) + + if tokens[index].type == 'symbol' and tokens[index].match == 'do': + index += 1 + else: + return failure + + success, index, statement_list = _zero_or_more_parser(tuple, _statement_parser)(index, tokens) + + _, index, _ = consume_newlines(index, tokens) + + if tokens[index].type == 'keyword' and tokens[index].match == 'end': + index += 1 + else: + return failure + + return True, index, FurFunctionDefinitionStatement(name=name, statement_list=statement_list) + +def _statement_parser(index, tokens): + _, index, _ = consume_newlines(index, tokens) + + if index == len(tokens): + return (False, index, None) + + return _or_parser( + _assignment_statement_parser, + _expression_statement_parser, + _function_definition_statement_parser, + )(index, tokens) def _program_formatter(statement_list): return FurProgram(statement_list=statement_list) -_program_parser = _zero_or_more_parser(_program_formatter, _function_call_expression_parser) +_program_parser = _zero_or_more_parser(_program_formatter, _statement_parser) def _parse(parser, tokens): success, index, result = parser(0, tokens)