3 def consume_newlines(index, tokens):
4 while index < len(tokens) and tokens[index].type == 'newline':
7 return True, index, None
9 def _or_parser(*parsers):
10 def result_parser(index, tokens):
11 failure = (False, index, None)
13 for parser in parsers:
14 success, index, value = parser(index, tokens)
17 return (success, index, value)
23 def _zero_or_more_parser(formatter, parser):
24 def result_parser(index, tokens):
27 while index < len(tokens):
28 success, index, value = parser(index, tokens)
35 return (True, index, formatter(values))
39 FurIntegerLiteralExpression = collections.namedtuple(
40 'FurIntegerLiteralExpression',
46 FurLambdaExpression = collections.namedtuple(
47 'FurLambdaExpression',
54 FurStringLiteralExpression = collections.namedtuple(
55 'FurStringLiteralExpression',
61 FurSymbolExpression = collections.namedtuple(
62 'FurSymbolExpression',
69 FurNegationExpression = collections.namedtuple(
70 'FurNegationExpression',
77 FurInfixExpression = collections.namedtuple(
88 FurListLiteralExpression = collections.namedtuple(
89 'FurListLiteralExpression',
91 'item_expression_list',
95 FurIfExpression = collections.namedtuple(
98 'condition_expression',
100 'else_statement_list',
104 FurSymbolExpressionPair = collections.namedtuple(
105 'FurSymbolExpressionPair',
112 FurStructureLiteralExpression = collections.namedtuple(
113 'FurStructureLiteralExpression',
119 def _integer_literal_expression_parser(index, tokens):
120 failure = (False, index, None)
122 if tokens[index].type != 'integer_literal':
124 value = int(tokens[index].match)
127 return True, index, FurIntegerLiteralExpression(integer=value)
129 def _string_literal_expression_parser(index, tokens):
130 if tokens[index].type == 'double_quoted_string_literal':
131 return (True, index + 1, FurStringLiteralExpression(string=tokens[index].match[1:-1]))
133 if tokens[index].type == 'single_quoted_string_literal':
134 return (True, index + 1, FurStringLiteralExpression(string=tokens[index].match[1:-1]))
136 return (False, index, None)
138 def _symbol_expression_parser(index, tokens):
139 if tokens[index].type == 'symbol':
144 metadata=tokens[index].metadata,
145 symbol=tokens[index].match,
149 return (False, index, None)
151 def _wrapped_parser(open_token, close_token, internal_parser):
152 def result_parser(index, tokens):
153 failure = (False, index, None)
155 if tokens[index].type == open_token:
160 success, index, internal = internal_parser(index, tokens)
164 if tokens[index].type == close_token:
167 # TODO Put the actual expected character in the error message
168 raise Exception('Expected closing token on line {}, found "{}"'.format(
173 return True, index, internal
177 def _bracket_wrapped_parser(internal_parser):
178 return _wrapped_parser('open_bracket', 'close_bracket', internal_parser)
180 def _parenthese_wrapped_parser(internal_parser):
181 return _wrapped_parser('open_parenthese', 'close_parenthese', internal_parser)
183 def _parenthesized_expression_parser(index, tokens):
184 return _parenthese_wrapped_parser(_expression_parser)(index, tokens)
186 def symbol_expression_pair_parser(index, tokens):
187 failure = (False, index, None)
189 if tokens[index].type == 'symbol':
190 symbol = tokens[index].match
195 if tokens[index].type == 'colon':
200 success, index, expression = _expression_parser(index, tokens)
208 FurSymbolExpressionPair(
210 expression=expression,
214 def _structure_literal_parser(index, tokens):
215 success, index, result = _parenthese_wrapped_parser(_comma_separated_list_parser(symbol_expression_pair_parser))(index, tokens)
219 FurStructureLiteralExpression(
224 def _lambda_expression_parser(index, tokens):
225 failure = (False, index, None)
227 if tokens[index].type == 'keyword' and tokens[index].match == 'lambda':
232 if tokens[index].type == 'open_parenthese':
235 raise Exception('Expected "(", found "{}" on line {}'.format(
237 tokens[index].metadata.line,
240 success, index, argument_name_list = _comma_separated_list_parser(_symbol_expression_parser)(
245 if tokens[index].type == 'close_parenthese':
248 raise Exception('Expected ")", found "{}" on line {}'.format(
253 if tokens[index].match == 'do':
258 success, index, statement_list = _zero_or_more_parser(tuple, _statement_parser)(index, tokens)
260 _, index, _ = consume_newlines(index, tokens)
262 if tokens[index].type == 'keyword' and tokens[index].match == 'end':
267 return True, index, FurLambdaExpression(
268 argument_name_list=tuple(an.symbol for an in argument_name_list),
269 statement_list=statement_list,
274 def _list_literal_expression_parser(index, tokens):
275 failure = (False, index, None)
277 success, index, item_expression_list = _bracket_wrapped_parser(_comma_separated_expression_list_parser)(index, tokens)
280 return success, index, FurListLiteralExpression(
281 item_expression_list=item_expression_list,
286 def _literal_level_expression_parser(index, tokens):
288 _list_item_expression_parser,
289 _function_call_expression_parser,
290 _parenthesized_expression_parser,
291 _integer_literal_expression_parser,
292 _string_literal_expression_parser,
293 _list_literal_expression_parser,
294 _lambda_expression_parser,
295 _symbol_expression_parser,
296 _structure_literal_parser,
299 def _dot_expression_parser(index, tokens):
300 return _left_recursive_infix_operator_parser(
301 lambda token: token.type == 'period',
302 _literal_level_expression_parser,
306 def _negation_expression_parser(index, tokens):
307 failure = (False, index, None)
309 if tokens[index].match != '-':
312 metadata = tokens[index].metadata
314 success, index, value = _dot_expression_parser(index + 1, tokens)
319 return (True, index, FurNegationExpression(metadata=metadata, value=value))
321 def _negation_level_expression_parser(index, tokens):
323 _dot_expression_parser,
324 _negation_expression_parser,
327 def _left_recursive_infix_operator_parser(operator_token_matcher, operand_parser, order):
328 def result_parser(index, tokens):
329 failure = (False, index, None)
331 success, index, result = operand_parser(index, tokens)
336 while success and index < len(tokens) and operator_token_matcher(tokens[index]):
339 if index + 1 < len(tokens):
340 success, try_index, value = operand_parser(index + 1, tokens)
343 result = FurInfixExpression(
344 metadata=tokens[index].metadata,
346 operator=tokens[index].match,
352 return True, index, result
356 def _multiplication_level_expression_parser(index, tokens):
357 return _left_recursive_infix_operator_parser(
358 lambda token: token.type == 'multiplication_level_operator',
359 _negation_level_expression_parser,
360 'multiplication_level',
363 def _addition_level_expression_parser(index, tokens):
364 return _left_recursive_infix_operator_parser(
365 lambda token: token.type == 'addition_level_operator',
366 _multiplication_level_expression_parser,
370 def _comparison_level_expression_parser(index, tokens):
371 return _left_recursive_infix_operator_parser(
372 lambda token: token.type == 'comparison_level_operator',
373 _addition_level_expression_parser,
377 def _and_level_expression_parser(index, tokens):
378 return _left_recursive_infix_operator_parser(
379 lambda token: token.type == 'symbol' and token.match == 'and',
380 _comparison_level_expression_parser,
384 def _or_level_expression_parser(index, tokens):
385 return _left_recursive_infix_operator_parser(
386 lambda token: token.type == 'symbol' and token.match == 'or',
387 _and_level_expression_parser,
391 def _comma_separated_list_parser(subparser):
392 def result_parser(index, tokens):
397 _, index, _ = consume_newlines(index, tokens)
399 success, index, item = subparser(index, tokens)
404 return (True, start_index, ())
406 while success and index < len(tokens) and tokens[index].type == 'comma':
410 _, index, _ = consume_newlines(index, tokens)
412 if index < len(tokens):
413 success, try_index, item = subparser(index, tokens)
419 return True, index, tuple(items)
423 def _comma_separated_expression_list_parser(index, tokens):
424 return _comma_separated_list_parser(_expression_parser)(index, tokens)
426 FurListItemExpression = collections.namedtuple(
427 'FurListItemExpression',
435 FurFunctionCallExpression = collections.namedtuple(
436 'FurFunctionCallExpression',
444 FurExpressionStatement = collections.namedtuple(
445 'FurExpressionStatement',
451 FurAssignmentStatement = collections.namedtuple(
452 'FurAssignmentStatement',
459 FurFunctionDefinitionStatement = collections.namedtuple(
460 'FurFunctionDefinitionStatement',
463 'argument_name_list',
468 FurProgram = collections.namedtuple(
475 def _list_item_expression_parser(index, tokens):
476 failure = (False, index, None)
478 # We have to be careful what expressions we add here. Otherwise expressions
479 # like "a + b[0]" become ambiguous to the parser.
480 success, index, list_expression = _or_parser(
481 _symbol_expression_parser,
482 _parenthesized_expression_parser,
488 metadata = tokens[index].metadata
490 success, index, index_expression = _bracket_wrapped_parser(_expression_parser)(
498 while success and index < len(tokens):
499 # "list_expression" is actually the full list item expression if the next parse attempt doesn't succeed
500 # We can't give this a better name without a bunch of checks, however.
501 list_expression = FurListItemExpression(
502 list_expression=list_expression,
504 index_expression=index_expression,
507 metadata = tokens[index].metadata
509 success, index, index_expression = _bracket_wrapped_parser(_expression_parser)(
514 return True, index, list_expression
516 def _function_call_expression_parser(index, tokens):
517 failure = (False, index, None)
519 # We have to be careful what expressions we add here. Otherwise expressions
520 # like "a + b()" become ambiguous to the parser.
521 success, index, function = _or_parser(
522 _symbol_expression_parser,
523 _parenthesized_expression_parser,
529 metadata = tokens[index].metadata
531 success, index, arguments = _parenthese_wrapped_parser(_comma_separated_expression_list_parser)(
539 while success and index < len(tokens):
540 # "function" is actually the full function call if the next parse attempt doesn't succeed
541 # We can't give this a better name without a bunch of checks, however.
542 function = FurFunctionCallExpression(
548 metadata = tokens[index].metadata
550 success, index, arguments = _parenthese_wrapped_parser(_comma_separated_expression_list_parser)(
555 return True, index, function
557 def _if_expression_parser(index, tokens):
558 failure = (False, index, None)
560 if tokens[index].match == 'if':
565 success, index, condition_expression = _or_level_expression_parser(index, tokens)
568 raise Exception('Expected condition after "if" on line {}'.format(tokens[index].line))
570 if tokens[index].match == 'do':
573 raise Exception('Expected "do" after "if" on line {}'.format(tokens[index].line))
576 success, index, if_statement_list = _zero_or_more_parser(tuple, _statement_parser)(index, tokens)
577 _, index, _ = consume_newlines(index, tokens)
579 if tokens[index].match == 'else':
581 success, index, else_statement_list = _zero_or_more_parser(tuple, _statement_parser)(index, tokens)
582 _, index, _ = consume_newlines(index, tokens)
584 else_statement_list = ()
586 if tokens[index].match == 'end':
589 raise Exception('Expected "end" after "if" on line {}'.format(tokens[index].line))
595 condition_expression=condition_expression,
596 if_statement_list=if_statement_list,
597 else_statement_list=else_statement_list,
601 _expression_parser = _or_parser(
602 _or_level_expression_parser,
603 _if_expression_parser, # This should always be at the top level
606 def _expression_statement_parser(index, tokens):
607 failure = (False, index, None)
609 success, index, expression = _expression_parser(index, tokens)
614 return (True, index, FurExpressionStatement(expression=expression))
616 BUILTINS = {'print', 'pow'}
618 def _assignment_statement_parser(index, tokens):
619 failure = (False, index, None)
621 if tokens[index].type == 'symbol':
622 target = tokens[index].match
623 target_assignment_line = tokens[index].metadata.line
630 if tokens[index].type == 'assignment_operator':
631 if target in BUILTINS:
633 'Trying to assign to builtin "{}" on line {}'.format(target, target_assignment_line),
635 assignment_operator_index = index
639 success, index, expression = _expression_parser(index + 1, tokens)
643 'Expected expression after assignment operator on line {}'.format(
644 tokens[assignment_operator_index].line
648 return True, index, FurAssignmentStatement(target=target, expression=expression)
650 def _function_definition_statement_parser(index, tokens):
651 failure = (False, index, None)
653 if tokens[index].type == 'keyword' and tokens[index].match == 'def':
658 if tokens[index].type == 'symbol':
659 name = tokens[index].match
662 raise Exception('Expected function name, found "{}" on line {}'.format(
667 if tokens[index].type == 'open_parenthese':
670 raise Exception('Expected "(", found "{}" on line {}'.format(
675 success, index, argument_name_list = _comma_separated_list_parser(_symbol_expression_parser)(
680 if tokens[index].type == 'close_parenthese':
683 raise Exception('Expected ")", found "{}" on line {}'.format(
688 if tokens[index].match == 'do':
693 success, index, statement_list = _zero_or_more_parser(tuple, _statement_parser)(index, tokens)
695 _, index, _ = consume_newlines(index, tokens)
697 if tokens[index].type == 'keyword' and tokens[index].match == 'end':
702 return True, index, FurFunctionDefinitionStatement(
704 argument_name_list=tuple(an.symbol for an in argument_name_list),
705 statement_list=statement_list,
708 def _statement_parser(index, tokens):
709 _, index, _ = consume_newlines(index, tokens)
711 if index == len(tokens):
712 return (False, index, None)
715 _assignment_statement_parser,
716 _expression_statement_parser,
717 _function_definition_statement_parser,
720 def _program_formatter(statement_list):
721 return FurProgram(statement_list=statement_list)
723 _program_parser = _zero_or_more_parser(_program_formatter, _statement_parser)
725 def _parse(parser, tokens):
726 success, index, result = parser(0, tokens)
728 if index < len(tokens):
729 raise Exception('Unable to parse token {}'.format(tokens[index]))
734 raise Exception('Unable to parse')
737 return _parse(_program_parser, tokens)
739 if __name__ == '__main__':
744 class FurStringLiteralExpressionParserTests(unittest.TestCase):
745 def test_parses_single_quoted_string_literal(self):
747 _string_literal_expression_parser(0, tokenization.tokenize("'Hello, world'")),
751 FurStringLiteralExpression(string='Hello, world'),
755 class FurFunctionCallExpressionParserTests(unittest.TestCase):
756 def test_parses_function_with_string_literal_argument(self):
758 _function_call_expression_parser(0, tokenization.tokenize("print('Hello, world')")),
762 FurFunctionCallExpression(
764 arguments=(FurStringLiteralExpression(string='Hello, world'),),