3 def consume_newlines(index, tokens):
4 while index < len(tokens) and tokens[index].type == 'newline':
7 return True, index, None
9 def _or_parser(*parsers):
10 def result_parser(index, tokens):
11 failure = (False, index, None)
13 for parser in parsers:
14 success, index, value = parser(index, tokens)
17 return (success, index, value)
23 def _zero_or_more_parser(formatter, parser):
24 def result_parser(index, tokens):
27 while index < len(tokens):
28 success, index, value = parser(index, tokens)
35 return (True, index, formatter(values))
39 FurIntegerLiteralExpression = collections.namedtuple(
40 'FurIntegerLiteralExpression',
46 FurStringLiteralExpression = collections.namedtuple(
47 'FurStringLiteralExpression',
53 FurSymbolExpression = collections.namedtuple(
54 'FurSymbolExpression',
61 FurNegationExpression = collections.namedtuple(
62 'FurNegationExpression',
69 FurInfixExpression = collections.namedtuple(
80 FurListLiteralExpression = collections.namedtuple(
81 'FurListLiteralExpression',
83 'item_expression_list',
87 FurIfExpression = collections.namedtuple(
90 'condition_expression',
92 'else_statement_list',
96 FurSymbolExpressionPair = collections.namedtuple(
97 'FurSymbolExpressionPair',
104 FurStructureLiteralExpression = collections.namedtuple(
105 'FurStructureLiteralExpression',
111 def _integer_literal_expression_parser(index, tokens):
112 failure = (False, index, None)
114 if tokens[index].type != 'integer_literal':
116 value = int(tokens[index].match)
119 return True, index, FurIntegerLiteralExpression(integer=value)
121 def _string_literal_expression_parser(index, tokens):
122 if tokens[index].type == 'double_quoted_string_literal':
123 return (True, index + 1, FurStringLiteralExpression(string=tokens[index].match[1:-1]))
125 if tokens[index].type == 'single_quoted_string_literal':
126 return (True, index + 1, FurStringLiteralExpression(string=tokens[index].match[1:-1]))
128 return (False, index, None)
130 def _symbol_expression_parser(index, tokens):
131 if tokens[index].type == 'symbol':
136 metadata=tokens[index].metadata,
137 symbol=tokens[index].match,
141 return (False, index, None)
143 def _wrapped_parser(open_token, close_token, internal_parser):
144 def result_parser(index, tokens):
145 failure = (False, index, None)
147 if tokens[index].type == open_token:
152 success, index, internal = internal_parser(index, tokens)
156 if tokens[index].type == close_token:
159 # TODO Put the actual expected character in the error message
160 raise Exception('Expected closing token on line {}, found "{}"'.format(
165 return True, index, internal
169 def _bracket_wrapped_parser(internal_parser):
170 return _wrapped_parser('open_bracket', 'close_bracket', internal_parser)
172 def _parenthese_wrapped_parser(internal_parser):
173 return _wrapped_parser('open_parenthese', 'close_parenthese', internal_parser)
175 def _parenthesized_expression_parser(index, tokens):
176 return _parenthese_wrapped_parser(_expression_parser)(index, tokens)
178 def symbol_expression_pair_parser(index, tokens):
179 failure = (False, index, None)
181 if tokens[index].type == 'symbol':
182 symbol = tokens[index].match
187 if tokens[index].type == 'colon':
192 success, index, expression = _expression_parser(index, tokens)
200 FurSymbolExpressionPair(
202 expression=expression,
206 def _structure_literal_parser(index, tokens):
207 success, index, result = _parenthese_wrapped_parser(_comma_separated_list_parser(symbol_expression_pair_parser))(index, tokens)
211 FurStructureLiteralExpression(
216 def _list_literal_expression_parser(index, tokens):
217 failure = (False, index, None)
219 success, index, item_expression_list = _bracket_wrapped_parser(_comma_separated_expression_list_parser)(index, tokens)
222 return success, index, FurListLiteralExpression(
223 item_expression_list=item_expression_list,
228 def _literal_level_expression_parser(index, tokens):
230 _list_item_expression_parser,
231 _function_call_expression_parser,
232 _parenthesized_expression_parser,
233 _integer_literal_expression_parser,
234 _string_literal_expression_parser,
235 _list_literal_expression_parser,
236 _symbol_expression_parser,
237 _structure_literal_parser,
240 def _dot_expression_parser(index, tokens):
241 return _left_recursive_infix_operator_parser(
242 lambda token: token.type == 'period',
243 _literal_level_expression_parser,
247 def _negation_expression_parser(index, tokens):
248 failure = (False, index, None)
250 if tokens[index].match != '-':
253 metadata = tokens[index].metadata
255 success, index, value = _dot_expression_parser(index + 1, tokens)
260 return (True, index, FurNegationExpression(metadata=metadata, value=value))
262 def _negation_level_expression_parser(index, tokens):
264 _dot_expression_parser,
265 _negation_expression_parser,
268 def _left_recursive_infix_operator_parser(operator_token_matcher, operand_parser, order):
269 def result_parser(index, tokens):
270 failure = (False, index, None)
272 success, index, result = operand_parser(index, tokens)
277 while success and index < len(tokens) and operator_token_matcher(tokens[index]):
280 if index + 1 < len(tokens):
281 success, try_index, value = operand_parser(index + 1, tokens)
284 result = FurInfixExpression(
285 metadata=tokens[index].metadata,
287 operator=tokens[index].match,
293 return True, index, result
297 def _multiplication_level_expression_parser(index, tokens):
298 return _left_recursive_infix_operator_parser(
299 lambda token: token.type == 'multiplication_level_operator',
300 _negation_level_expression_parser,
301 'multiplication_level',
304 def _addition_level_expression_parser(index, tokens):
305 return _left_recursive_infix_operator_parser(
306 lambda token: token.type == 'addition_level_operator',
307 _multiplication_level_expression_parser,
311 def _comparison_level_expression_parser(index, tokens):
312 return _left_recursive_infix_operator_parser(
313 lambda token: token.type == 'comparison_level_operator',
314 _addition_level_expression_parser,
318 def _and_level_expression_parser(index, tokens):
319 return _left_recursive_infix_operator_parser(
320 lambda token: token.type == 'symbol' and token.match == 'and',
321 _comparison_level_expression_parser,
325 def _or_level_expression_parser(index, tokens):
326 return _left_recursive_infix_operator_parser(
327 lambda token: token.type == 'symbol' and token.match == 'or',
328 _and_level_expression_parser,
332 def _comma_separated_list_parser(subparser):
333 def result_parser(index, tokens):
338 _, index, _ = consume_newlines(index, tokens)
340 success, index, item = subparser(index, tokens)
345 return (True, start_index, ())
347 while success and index < len(tokens) and tokens[index].type == 'comma':
351 _, index, _ = consume_newlines(index, tokens)
353 if index < len(tokens):
354 success, try_index, item = subparser(index, tokens)
360 return True, index, tuple(items)
364 def _comma_separated_expression_list_parser(index, tokens):
365 return _comma_separated_list_parser(_expression_parser)(index, tokens)
367 FurListItemExpression = collections.namedtuple(
368 'FurListItemExpression',
376 FurFunctionCallExpression = collections.namedtuple(
377 'FurFunctionCallExpression',
385 FurExpressionStatement = collections.namedtuple(
386 'FurExpressionStatement',
392 FurAssignmentStatement = collections.namedtuple(
393 'FurAssignmentStatement',
400 FurFunctionDefinitionStatement = collections.namedtuple(
401 'FurFunctionDefinitionStatement',
404 'argument_name_list',
409 FurProgram = collections.namedtuple(
416 def _list_item_expression_parser(index, tokens):
417 failure = (False, index, None)
419 # We have to be careful what expressions we add here. Otherwise expressions
420 # like "a + b[0]" become ambiguous to the parser.
421 success, index, list_expression = _or_parser(
422 _symbol_expression_parser,
423 _parenthesized_expression_parser,
429 metadata = tokens[index].metadata
431 success, index, index_expression = _bracket_wrapped_parser(_expression_parser)(
439 while success and index < len(tokens):
440 # "list_expression" is actually the full list item expression if the next parse attempt doesn't succeed
441 # We can't give this a better name without a bunch of checks, however.
442 list_expression = FurListItemExpression(
443 list_expression=list_expression,
445 index_expression=index_expression,
448 metadata = tokens[index].metadata
450 success, index, index_expression = _bracket_wrapped_parser(_expression_parser)(
455 return True, index, list_expression
457 def _function_call_expression_parser(index, tokens):
458 failure = (False, index, None)
460 # We have to be careful what expressions we add here. Otherwise expressions
461 # like "a + b()" become ambiguous to the parser.
462 success, index, function = _or_parser(
463 _symbol_expression_parser,
464 _parenthesized_expression_parser,
470 metadata = tokens[index].metadata
472 success, index, arguments = _parenthese_wrapped_parser(_comma_separated_expression_list_parser)(
480 while success and index < len(tokens):
481 # "function" is actually the full function call if the next parse attempt doesn't succeed
482 # We can't give this a better name without a bunch of checks, however.
483 function = FurFunctionCallExpression(
489 metadata = tokens[index].metadata
491 success, index, arguments = _parenthese_wrapped_parser(_comma_separated_expression_list_parser)(
496 return True, index, function
498 def _if_expression_parser(index, tokens):
499 failure = (False, index, None)
501 if tokens[index].match == 'if':
506 success, index, condition_expression = _or_level_expression_parser(index, tokens)
509 raise Exception('Expected condition after "if" on line {}'.format(tokens[index].line))
511 if tokens[index].match == 'do':
514 raise Exception('Expected "do" after "if" on line {}'.format(tokens[index].line))
517 success, index, if_statement_list = _zero_or_more_parser(tuple, _statement_parser)(index, tokens)
518 _, index, _ = consume_newlines(index, tokens)
520 if tokens[index].match == 'else':
522 success, index, else_statement_list = _zero_or_more_parser(tuple, _statement_parser)(index, tokens)
523 _, index, _ = consume_newlines(index, tokens)
525 else_statement_list = ()
527 if tokens[index].match == 'end':
530 raise Exception('Expected "end" after "if" on line {}'.format(tokens[index].line))
536 condition_expression=condition_expression,
537 if_statement_list=if_statement_list,
538 else_statement_list=else_statement_list,
542 _expression_parser = _or_parser(
543 _or_level_expression_parser,
544 _if_expression_parser, # This should always be at the top level
547 def _expression_statement_parser(index, tokens):
548 failure = (False, index, None)
550 success, index, expression = _expression_parser(index, tokens)
555 return (True, index, FurExpressionStatement(expression=expression))
557 BUILTINS = {'print', 'pow'}
559 def _assignment_statement_parser(index, tokens):
560 failure = (False, index, None)
562 if tokens[index].type == 'symbol':
563 target = tokens[index].match
564 target_assignment_line = tokens[index].metadata.line
571 if tokens[index].type == 'assignment_operator':
572 if target in BUILTINS:
574 'Trying to assign to builtin "{}" on line {}'.format(target, target_assignment_line),
576 assignment_operator_index = index
580 success, index, expression = _expression_parser(index + 1, tokens)
584 'Expected expression after assignment operator on line {}'.format(
585 tokens[assignment_operator_index].line
589 return True, index, FurAssignmentStatement(target=target, expression=expression)
591 def _function_definition_statement_parser(index, tokens):
592 failure = (False, index, None)
594 if tokens[index].type == 'keyword' and tokens[index].match == 'def':
599 if tokens[index].type == 'symbol':
600 name = tokens[index].match
603 raise Exception('Expected function name, found "{}" on line {}'.format(
608 if tokens[index].type == 'open_parenthese':
611 raise Exception('Expected "(", found "{}" on line {}'.format(
616 success, index, argument_name_list = _comma_separated_list_parser(_symbol_expression_parser)(
621 if tokens[index].type == 'close_parenthese':
624 raise Exception('Expected ")", found "{}" on line {}'.format(
629 if tokens[index].match == 'do':
634 success, index, statement_list = _zero_or_more_parser(tuple, _statement_parser)(index, tokens)
636 _, index, _ = consume_newlines(index, tokens)
638 if tokens[index].type == 'keyword' and tokens[index].match == 'end':
643 return True, index, FurFunctionDefinitionStatement(
645 argument_name_list=tuple(an.symbol for an in argument_name_list),
646 statement_list=statement_list,
649 def _statement_parser(index, tokens):
650 _, index, _ = consume_newlines(index, tokens)
652 if index == len(tokens):
653 return (False, index, None)
656 _assignment_statement_parser,
657 _expression_statement_parser,
658 _function_definition_statement_parser,
661 def _program_formatter(statement_list):
662 return FurProgram(statement_list=statement_list)
664 _program_parser = _zero_or_more_parser(_program_formatter, _statement_parser)
666 def _parse(parser, tokens):
667 success, index, result = parser(0, tokens)
669 if index < len(tokens):
670 raise Exception('Unable to parse token {}'.format(tokens[index]))
675 raise Exception('Unable to parse')
678 return _parse(_program_parser, tokens)
680 if __name__ == '__main__':
685 class FurStringLiteralExpressionParserTests(unittest.TestCase):
686 def test_parses_single_quoted_string_literal(self):
688 _string_literal_expression_parser(0, tokenization.tokenize("'Hello, world'")),
692 FurStringLiteralExpression(string='Hello, world'),
696 class FurFunctionCallExpressionParserTests(unittest.TestCase):
697 def test_parses_function_with_string_literal_argument(self):
699 _function_call_expression_parser(0, tokenization.tokenize("print('Hello, world')")),
703 FurFunctionCallExpression(
705 arguments=(FurStringLiteralExpression(string='Hello, world'),),