X-Git-Url: https://code.kerkeslager.com/?p=fur;a=blobdiff_plain;f=parsing.py;h=20ac3761252a577523034493d347383c4b9fcdd4;hp=42f0ec82a0e890a5ec876b200d7037aac6c31f25;hb=HEAD;hpb=b059a00e0e2511a2ca2477db1e91d53fc5d2ec0f diff --git a/parsing.py b/parsing.py index 42f0ec8..20ac376 100644 --- a/parsing.py +++ b/parsing.py @@ -39,27 +39,37 @@ def _zero_or_more_parser(formatter, parser): FurIntegerLiteralExpression = collections.namedtuple( 'FurIntegerLiteralExpression', [ - 'value', + 'integer', ], ) +FurLambdaExpression = collections.namedtuple( + 'FurLambdaExpression', + ( + 'argument_name_list', + 'statement_list', + ), +) + FurStringLiteralExpression = collections.namedtuple( 'FurStringLiteralExpression', [ - 'value', + 'string', ], ) FurSymbolExpression = collections.namedtuple( 'FurSymbolExpression', [ - 'value', + 'metadata', + 'symbol', ], ) FurNegationExpression = collections.namedtuple( 'FurNegationExpression', [ + 'metadata', 'value', ], ) @@ -67,6 +77,7 @@ FurNegationExpression = collections.namedtuple( FurInfixExpression = collections.namedtuple( 'FurInfixExpression', [ + 'metadata', 'order', 'operator', 'left', @@ -74,6 +85,37 @@ FurInfixExpression = collections.namedtuple( ], ) +FurListLiteralExpression = collections.namedtuple( + 'FurListLiteralExpression', + [ + 'item_expression_list', + ], +) + +FurIfExpression = collections.namedtuple( + 'FurIfExpression', + [ + 'condition_expression', + 'if_statement_list', + 'else_statement_list', + ], +) + +FurSymbolExpressionPair = collections.namedtuple( + 'FurSymbolExpressionPair', + [ + 'symbol', + 'expression', + ], +) + +FurStructureLiteralExpression = collections.namedtuple( + 'FurStructureLiteralExpression', + [ + 'fields', + ], +) + def _integer_literal_expression_parser(index, tokens): failure = (False, index, None) @@ -82,63 +124,204 @@ def _integer_literal_expression_parser(index, tokens): value = int(tokens[index].match) index += 1 - return True, index, FurIntegerLiteralExpression(value=value) + return True, index, FurIntegerLiteralExpression(integer=value) def _string_literal_expression_parser(index, tokens): + if tokens[index].type == 'double_quoted_string_literal': + return (True, index + 1, FurStringLiteralExpression(string=tokens[index].match[1:-1])) + if tokens[index].type == 'single_quoted_string_literal': - return (True, index + 1, FurStringLiteralExpression(value=tokens[index].match[1:-1])) + return (True, index + 1, FurStringLiteralExpression(string=tokens[index].match[1:-1])) return (False, index, None) def _symbol_expression_parser(index, tokens): if tokens[index].type == 'symbol': - return (True, index + 1, FurSymbolExpression(value=tokens[index].match)) + return ( + True, + index + 1, + FurSymbolExpression( + metadata=tokens[index].metadata, + symbol=tokens[index].match, + ), + ) return (False, index, None) +def _wrapped_parser(open_token, close_token, internal_parser): + def result_parser(index, tokens): + failure = (False, index, None) + + if tokens[index].type == open_token: + index += 1 + else: + return failure + + success, index, internal = internal_parser(index, tokens) + if not success: + return failure + + if tokens[index].type == close_token: + index += 1 + else: + # TODO Put the actual expected character in the error message + raise Exception('Expected closing token on line {}, found "{}"'.format( + tokens[index].line, + tokens[index].match, + )) + + return True, index, internal + + return result_parser + +def _bracket_wrapped_parser(internal_parser): + return _wrapped_parser('open_bracket', 'close_bracket', internal_parser) + +def _parenthese_wrapped_parser(internal_parser): + return _wrapped_parser('open_parenthese', 'close_parenthese', internal_parser) + def _parenthesized_expression_parser(index, tokens): + return _parenthese_wrapped_parser(_expression_parser)(index, tokens) + +def symbol_expression_pair_parser(index, tokens): failure = (False, index, None) - if tokens[index].type == 'open_parenthese': + if tokens[index].type == 'symbol': + symbol = tokens[index].match index += 1 else: return failure - success, index, internal = _expression_parser(index, tokens) + if tokens[index].type == 'colon': + index += 1 + else: + return failure + + success, index, expression = _expression_parser(index, tokens) + if not success: + raise Exception() + + return ( + True, + index, + FurSymbolExpressionPair( + symbol=symbol, + expression=expression, + ), + ) + +def _structure_literal_parser(index, tokens): + success, index, result = _parenthese_wrapped_parser(_comma_separated_list_parser(symbol_expression_pair_parser))(index, tokens) + return ( + success, + index, + FurStructureLiteralExpression( + fields=result, + ), + ) + +def _lambda_expression_parser(index, tokens): + failure = (False, index, None) + + if tokens[index].type == 'keyword' and tokens[index].match == 'lambda': + index += 1 + else: return failure - if tokens[index].type == 'close_parenthese': + if tokens[index].type == 'open_parenthese': index += 1 else: - raise Exception('Expected ")" on line {}, found "{}"'.format( - tokens[index].line, + raise Exception('Expected "(", found "{}" on line {}'.format( tokens[index].match, + tokens[index].metadata.line, )) - return True, index, internal + success, index, argument_name_list = _comma_separated_list_parser(_symbol_expression_parser)( + index, + tokens, + ) -def _negation_expression_parser(index, tokens): - failure = (False, index, None) + if tokens[index].type == 'close_parenthese': + index += 1 + else: + raise Exception('Expected ")", found "{}" on line {}'.format( + tokens[index].match, + tokens[index].line, + )) - if tokens[index].match != '-': + if tokens[index].match == 'do': + index += 1 + else: return failure - success, index, value = _literal_level_expression_parser(index + 1, tokens) + success, index, statement_list = _zero_or_more_parser(tuple, _statement_parser)(index, tokens) - if not success: + _, index, _ = consume_newlines(index, tokens) + + if tokens[index].type == 'keyword' and tokens[index].match == 'end': + index += 1 + else: return failure - return (True, index, FurNegationExpression(value=value)) + return True, index, FurLambdaExpression( + argument_name_list=tuple(an.symbol for an in argument_name_list), + statement_list=statement_list, + ) + + + +def _list_literal_expression_parser(index, tokens): + failure = (False, index, None) + + success, index, item_expression_list = _bracket_wrapped_parser(_comma_separated_expression_list_parser)(index, tokens) + + if success: + return success, index, FurListLiteralExpression( + item_expression_list=item_expression_list, + ) + else: + return failure def _literal_level_expression_parser(index, tokens): return _or_parser( - _negation_expression_parser, + _list_item_expression_parser, _function_call_expression_parser, _parenthesized_expression_parser, _integer_literal_expression_parser, _string_literal_expression_parser, + _list_literal_expression_parser, + _lambda_expression_parser, _symbol_expression_parser, + _structure_literal_parser, + )(index, tokens) + +def _dot_expression_parser(index, tokens): + return _left_recursive_infix_operator_parser( + lambda token: token.type == 'period', + _literal_level_expression_parser, + 'dot_level', + )(index, tokens) + +def _negation_expression_parser(index, tokens): + failure = (False, index, None) + + if tokens[index].match != '-': + return failure + + metadata = tokens[index].metadata + + success, index, value = _dot_expression_parser(index + 1, tokens) + + if not success: + return failure + + return (True, index, FurNegationExpression(metadata=metadata, value=value)) + +def _negation_level_expression_parser(index, tokens): + return _or_parser( + _dot_expression_parser, + _negation_expression_parser, )(index, tokens) def _left_recursive_infix_operator_parser(operator_token_matcher, operand_parser, order): @@ -158,6 +341,7 @@ def _left_recursive_infix_operator_parser(operator_token_matcher, operand_parser if success: result = FurInfixExpression( + metadata=tokens[index].metadata, order=order, operator=tokens[index].match, left=result, @@ -172,7 +356,7 @@ def _left_recursive_infix_operator_parser(operator_token_matcher, operand_parser def _multiplication_level_expression_parser(index, tokens): return _left_recursive_infix_operator_parser( lambda token: token.type == 'multiplication_level_operator', - _literal_level_expression_parser, + _negation_level_expression_parser, 'multiplication_level', )(index, tokens) @@ -210,6 +394,8 @@ def _comma_separated_list_parser(subparser): items = [] + _, index, _ = consume_newlines(index, tokens) + success, index, item = subparser(index, tokens) if success: @@ -218,10 +404,13 @@ def _comma_separated_list_parser(subparser): return (True, start_index, ()) while success and index < len(tokens) and tokens[index].type == 'comma': + index += 1 success = False - if index + 1 < len(tokens): - success, try_index, item = subparser(index + 1, tokens) + _, index, _ = consume_newlines(index, tokens) + + if index < len(tokens): + success, try_index, item = subparser(index, tokens) if success: items.append(item) @@ -234,9 +423,19 @@ def _comma_separated_list_parser(subparser): def _comma_separated_expression_list_parser(index, tokens): return _comma_separated_list_parser(_expression_parser)(index, tokens) +FurListItemExpression = collections.namedtuple( + 'FurListItemExpression', + [ + 'list_expression', + 'metadata', + 'index_expression', + ], +) + FurFunctionCallExpression = collections.namedtuple( 'FurFunctionCallExpression', [ + 'metadata', 'function', 'arguments', ], @@ -273,34 +472,136 @@ FurProgram = collections.namedtuple( ], ) -def _function_call_expression_parser(index, tokens): - # TODO Use a FurSymbolExpression for the name +def _list_item_expression_parser(index, tokens): failure = (False, index, None) - success, index, function = _symbol_expression_parser(index, tokens) + # We have to be careful what expressions we add here. Otherwise expressions + # like "a + b[0]" become ambiguous to the parser. + success, index, list_expression = _or_parser( + _symbol_expression_parser, + _parenthesized_expression_parser, + )(index, tokens) if not success: return failure - if tokens[index].type != 'open_parenthese': + metadata = tokens[index].metadata + + success, index, index_expression = _bracket_wrapped_parser(_expression_parser)( + index, + tokens, + ) + + if not success: + return failure + + while success and index < len(tokens): + # "list_expression" is actually the full list item expression if the next parse attempt doesn't succeed + # We can't give this a better name without a bunch of checks, however. + list_expression = FurListItemExpression( + list_expression=list_expression, + metadata=metadata, + index_expression=index_expression, + ) + + metadata = tokens[index].metadata + + success, index, index_expression = _bracket_wrapped_parser(_expression_parser)( + index, + tokens, + ) + + return True, index, list_expression + +def _function_call_expression_parser(index, tokens): + failure = (False, index, None) + + # We have to be careful what expressions we add here. Otherwise expressions + # like "a + b()" become ambiguous to the parser. + success, index, function = _or_parser( + _symbol_expression_parser, + _parenthesized_expression_parser, + )(index, tokens) + + if not success: return failure - index += 1 - success, index, arguments = _comma_separated_expression_list_parser(index, tokens) + metadata = tokens[index].metadata + + success, index, arguments = _parenthese_wrapped_parser(_comma_separated_expression_list_parser)( + index, + tokens, + ) if not success: return failure - if tokens[index].type != 'close_parenthese': - raise Exception('Expected ")", found "{}" on line {}'.format( - tokens[index].match, - tokens[index].line, - )) - index += 1 + while success and index < len(tokens): + # "function" is actually the full function call if the next parse attempt doesn't succeed + # We can't give this a better name without a bunch of checks, however. + function = FurFunctionCallExpression( + metadata=metadata, + function=function, + arguments=arguments, + ) + + metadata = tokens[index].metadata - return True, index, FurFunctionCallExpression(function=function, arguments=arguments) + success, index, arguments = _parenthese_wrapped_parser(_comma_separated_expression_list_parser)( + index, + tokens, + ) -_expression_parser = _or_level_expression_parser + return True, index, function + +def _if_expression_parser(index, tokens): + failure = (False, index, None) + + if tokens[index].match == 'if': + index += 1 + else: + return failure + + success, index, condition_expression = _or_level_expression_parser(index, tokens) + + if not success: + raise Exception('Expected condition after "if" on line {}'.format(tokens[index].line)) + + if tokens[index].match == 'do': + index += 1 + else: + raise Exception('Expected "do" after "if" on line {}'.format(tokens[index].line)) + + + success, index, if_statement_list = _zero_or_more_parser(tuple, _statement_parser)(index, tokens) + _, index, _ = consume_newlines(index, tokens) + + if tokens[index].match == 'else': + index += 1 + success, index, else_statement_list = _zero_or_more_parser(tuple, _statement_parser)(index, tokens) + _, index, _ = consume_newlines(index, tokens) + else: + else_statement_list = () + + if tokens[index].match == 'end': + index += 1 + else: + raise Exception('Expected "end" after "if" on line {}'.format(tokens[index].line)) + + return ( + True, + index, + FurIfExpression( + condition_expression=condition_expression, + if_statement_list=if_statement_list, + else_statement_list=else_statement_list, + ), + ) + +_expression_parser = _or_parser( + _or_level_expression_parser, + _if_expression_parser, # This should always be at the top level +) def _expression_statement_parser(index, tokens): failure = (False, index, None) @@ -312,18 +613,28 @@ def _expression_statement_parser(index, tokens): return (True, index, FurExpressionStatement(expression=expression)) +BUILTINS = {'print', 'pow'} + def _assignment_statement_parser(index, tokens): - # TODO Use a FurSymbolExpression for the target? Maybe this is actually not a good idea failure = (False, index, None) - if tokens[index].type != 'symbol': + if tokens[index].type == 'symbol': + target = tokens[index].match + target_assignment_line = tokens[index].metadata.line + + index += 1 + else: return failure - target = tokens[index].match - index += 1 - if tokens[index].type != 'assignment_operator': + + if tokens[index].type == 'assignment_operator': + if target in BUILTINS: + raise Exception( + 'Trying to assign to builtin "{}" on line {}'.format(target, target_assignment_line), + ) + assignment_operator_index = index + else: return failure - assignment_operator_index = index success, index, expression = _expression_parser(index + 1, tokens) @@ -374,7 +685,7 @@ def _function_definition_statement_parser(index, tokens): tokens[index].line, )) - if tokens[index].type == 'symbol' and tokens[index].match == 'do': + if tokens[index].match == 'do': index += 1 else: return failure @@ -390,7 +701,7 @@ def _function_definition_statement_parser(index, tokens): return True, index, FurFunctionDefinitionStatement( name=name, - argument_name_list=tuple(an.value for an in argument_name_list), + argument_name_list=tuple(an.symbol for an in argument_name_list), statement_list=statement_list, ) @@ -437,7 +748,7 @@ if __name__ == '__main__': ( True, 1, - FurStringLiteralExpression(value='Hello, world'), + FurStringLiteralExpression(string='Hello, world'), ), ) @@ -450,7 +761,7 @@ if __name__ == '__main__': 4, FurFunctionCallExpression( name='print', - arguments=(FurStringLiteralExpression(value='Hello, world'),), + arguments=(FurStringLiteralExpression(string='Hello, world'),), ), ), )