3 def consume_newlines(index, tokens):
4 while index < len(tokens) and tokens[index].type == 'newline':
7 return True, index, None
9 def _or_parser(*parsers):
10 def result_parser(index, tokens):
11 failure = (False, index, None)
13 for parser in parsers:
14 success, index, value = parser(index, tokens)
17 return (success, index, value)
23 def _zero_or_more_parser(formatter, parser):
24 def result_parser(index, tokens):
27 while index < len(tokens):
28 success, index, value = parser(index, tokens)
35 return (True, index, formatter(values))
39 FurIntegerLiteralExpression = collections.namedtuple(
40 'FurIntegerLiteralExpression',
46 FurStringLiteralExpression = collections.namedtuple(
47 'FurStringLiteralExpression',
53 FurSymbolExpression = collections.namedtuple(
54 'FurSymbolExpression',
60 FurNegationExpression = collections.namedtuple(
61 'FurNegationExpression',
67 FurInfixExpression = collections.namedtuple(
77 FurListLiteralExpression = collections.namedtuple(
78 'FurListLiteralExpression',
80 'item_expression_list',
84 FurIfExpression = collections.namedtuple(
87 'condition_expression',
89 'else_statement_list',
93 FurSymbolExpressionPair = collections.namedtuple(
94 'FurSymbolExpressionPair',
101 FurStructureLiteralExpression = collections.namedtuple(
102 'FurStructureLiteralExpression',
108 def _integer_literal_expression_parser(index, tokens):
109 failure = (False, index, None)
111 if tokens[index].type != 'integer_literal':
113 value = int(tokens[index].match)
116 return True, index, FurIntegerLiteralExpression(integer=value)
118 def _string_literal_expression_parser(index, tokens):
119 if tokens[index].type == 'double_quoted_string_literal':
120 return (True, index + 1, FurStringLiteralExpression(string=tokens[index].match[1:-1]))
122 if tokens[index].type == 'single_quoted_string_literal':
123 return (True, index + 1, FurStringLiteralExpression(string=tokens[index].match[1:-1]))
125 return (False, index, None)
127 def _symbol_expression_parser(index, tokens):
128 if tokens[index].type == 'symbol':
129 return (True, index + 1, FurSymbolExpression(symbol=tokens[index].match))
131 return (False, index, None)
133 def _wrapped_parser(open_token, close_token, internal_parser):
134 def result_parser(index, tokens):
135 failure = (False, index, None)
137 if tokens[index].type == open_token:
142 success, index, internal = internal_parser(index, tokens)
146 if tokens[index].type == close_token:
149 # TODO Put the actual expected character in the error message
150 raise Exception('Expected closing token on line {}, found "{}"'.format(
155 return True, index, internal
159 def _bracket_wrapped_parser(internal_parser):
160 return _wrapped_parser('open_bracket', 'close_bracket', internal_parser)
162 def _parenthese_wrapped_parser(internal_parser):
163 return _wrapped_parser('open_parenthese', 'close_parenthese', internal_parser)
165 def _parenthesized_expression_parser(index, tokens):
166 return _parenthese_wrapped_parser(_expression_parser)(index, tokens)
168 def symbol_expression_pair_parser(index, tokens):
169 failure = (False, index, None)
171 if tokens[index].type == 'symbol':
172 symbol = tokens[index].match
177 if tokens[index].type == 'colon':
182 success, index, expression = _expression_parser(index, tokens)
190 FurSymbolExpressionPair(
192 expression=expression,
196 def _structure_literal_parser(index, tokens):
197 success, index, result = _parenthese_wrapped_parser(_comma_separated_list_parser(symbol_expression_pair_parser))(index, tokens)
201 FurStructureLiteralExpression(
206 def _list_literal_expression_parser(index, tokens):
207 failure = (False, index, None)
209 success, index, item_expression_list = _bracket_wrapped_parser(_comma_separated_expression_list_parser)(index, tokens)
212 return success, index, FurListLiteralExpression(
213 item_expression_list=item_expression_list,
218 def _literal_level_expression_parser(index, tokens):
220 _list_item_expression_parser,
221 _function_call_expression_parser,
222 _parenthesized_expression_parser,
223 _integer_literal_expression_parser,
224 _string_literal_expression_parser,
225 _list_literal_expression_parser,
226 _symbol_expression_parser,
227 _structure_literal_parser,
230 def _dot_expression_parser(index, tokens):
231 return _left_recursive_infix_operator_parser(
232 lambda token: token.type == 'period',
233 _literal_level_expression_parser,
237 def _negation_expression_parser(index, tokens):
238 failure = (False, index, None)
240 if tokens[index].match != '-':
243 success, index, value = _dot_expression_parser(index + 1, tokens)
248 return (True, index, FurNegationExpression(value=value))
250 def _negation_level_expression_parser(index, tokens):
252 _dot_expression_parser,
253 _negation_expression_parser,
256 def _left_recursive_infix_operator_parser(operator_token_matcher, operand_parser, order):
257 def result_parser(index, tokens):
258 failure = (False, index, None)
260 success, index, result = operand_parser(index, tokens)
265 while success and index < len(tokens) and operator_token_matcher(tokens[index]):
268 if index + 1 < len(tokens):
269 success, try_index, value = operand_parser(index + 1, tokens)
272 result = FurInfixExpression(
274 operator=tokens[index].match,
280 return True, index, result
284 def _multiplication_level_expression_parser(index, tokens):
285 return _left_recursive_infix_operator_parser(
286 lambda token: token.type == 'multiplication_level_operator',
287 _negation_level_expression_parser,
288 'multiplication_level',
291 def _addition_level_expression_parser(index, tokens):
292 return _left_recursive_infix_operator_parser(
293 lambda token: token.type == 'addition_level_operator',
294 _multiplication_level_expression_parser,
298 def _comparison_level_expression_parser(index, tokens):
299 return _left_recursive_infix_operator_parser(
300 lambda token: token.type == 'comparison_level_operator',
301 _addition_level_expression_parser,
305 def _and_level_expression_parser(index, tokens):
306 return _left_recursive_infix_operator_parser(
307 lambda token: token.type == 'symbol' and token.match == 'and',
308 _comparison_level_expression_parser,
312 def _or_level_expression_parser(index, tokens):
313 return _left_recursive_infix_operator_parser(
314 lambda token: token.type == 'symbol' and token.match == 'or',
315 _and_level_expression_parser,
319 def _comma_separated_list_parser(subparser):
320 def result_parser(index, tokens):
325 _, index, _ = consume_newlines(index, tokens)
327 success, index, item = subparser(index, tokens)
332 return (True, start_index, ())
334 while success and index < len(tokens) and tokens[index].type == 'comma':
338 _, index, _ = consume_newlines(index, tokens)
340 if index < len(tokens):
341 success, try_index, item = subparser(index, tokens)
347 return True, index, tuple(items)
351 def _comma_separated_expression_list_parser(index, tokens):
352 return _comma_separated_list_parser(_expression_parser)(index, tokens)
354 FurListItemExpression = collections.namedtuple(
355 'FurListItemExpression',
362 FurFunctionCallExpression = collections.namedtuple(
363 'FurFunctionCallExpression',
370 FurExpressionStatement = collections.namedtuple(
371 'FurExpressionStatement',
377 FurAssignmentStatement = collections.namedtuple(
378 'FurAssignmentStatement',
385 FurFunctionDefinitionStatement = collections.namedtuple(
386 'FurFunctionDefinitionStatement',
389 'argument_name_list',
394 FurProgram = collections.namedtuple(
401 def _list_item_expression_parser(index, tokens):
402 failure = (False, index, None)
404 # We have to be careful what expressions we add here. Otherwise expressions
405 # like "a + b[0]" become ambiguous to the parser.
406 success, index, list_expression = _or_parser(
407 _symbol_expression_parser,
408 _parenthesized_expression_parser,
414 success, index, index_expression = _bracket_wrapped_parser(_expression_parser)(
422 while success and index < len(tokens):
423 # "list_expression" is actually the full list item expression if the next parse attempt doesn't succeed
424 # We can't give this a better name without a bunch of checks, however.
425 list_expression = FurListItemExpression(
426 list_expression=list_expression,
427 index_expression=index_expression,
430 success, index, index_expression = _bracket_wrapped_parser(_expression_parser)(
435 return True, index, list_expression
437 def _function_call_expression_parser(index, tokens):
438 failure = (False, index, None)
440 # We have to be careful what expressions we add here. Otherwise expressions
441 # like "a + b()" become ambiguous to the parser.
442 success, index, function = _or_parser(
443 _symbol_expression_parser,
444 _parenthesized_expression_parser,
450 success, index, arguments = _parenthese_wrapped_parser(_comma_separated_expression_list_parser)(
458 while success and index < len(tokens):
459 # "function" is actually the full function call if the next parse attempt doesn't succeed
460 # We can't give this a better name without a bunch of checks, however.
461 function = FurFunctionCallExpression(
466 success, index, arguments = _parenthese_wrapped_parser(_comma_separated_expression_list_parser)(
471 return True, index, function
473 def _if_expression_parser(index, tokens):
474 failure = (False, index, None)
476 if tokens[index].match == 'if':
481 success, index, condition_expression = _or_level_expression_parser(index, tokens)
484 raise Exception('Expected condition after "if" on line {}'.format(tokens[index].line))
486 if tokens[index].match == 'do':
489 raise Exception('Expected "do" after "if" on line {}'.format(tokens[index].line))
492 success, index, if_statement_list = _zero_or_more_parser(tuple, _statement_parser)(index, tokens)
493 _, index, _ = consume_newlines(index, tokens)
495 if tokens[index].match == 'else':
497 success, index, else_statement_list = _zero_or_more_parser(tuple, _statement_parser)(index, tokens)
498 _, index, _ = consume_newlines(index, tokens)
500 else_statement_list = ()
502 if tokens[index].match == 'end':
505 raise Exception('Expected "end" after "if" on line {}'.format(tokens[index].line))
511 condition_expression=condition_expression,
512 if_statement_list=if_statement_list,
513 else_statement_list=else_statement_list,
517 _expression_parser = _or_parser(
518 _or_level_expression_parser,
519 _if_expression_parser, # This should always be at the top level
522 def _expression_statement_parser(index, tokens):
523 failure = (False, index, None)
525 success, index, expression = _expression_parser(index, tokens)
530 return (True, index, FurExpressionStatement(expression=expression))
532 BUILTINS = {'print', 'pow'}
534 def _assignment_statement_parser(index, tokens):
535 failure = (False, index, None)
537 if tokens[index].type == 'symbol':
538 target = tokens[index].match
539 target_assignment_line = tokens[index].line
546 if tokens[index].type == 'assignment_operator':
547 if target in BUILTINS:
549 'Trying to assign to builtin "{}" on line {}'.format(target, target_assignment_line),
551 assignment_operator_index = index
555 success, index, expression = _expression_parser(index + 1, tokens)
559 'Expected expression after assignment operator on line {}'.format(
560 tokens[assignment_operator_index].line
564 return True, index, FurAssignmentStatement(target=target, expression=expression)
566 def _function_definition_statement_parser(index, tokens):
567 failure = (False, index, None)
569 if tokens[index].type == 'keyword' and tokens[index].match == 'def':
574 if tokens[index].type == 'symbol':
575 name = tokens[index].match
578 raise Exception('Expected function name, found "{}" on line {}'.format(
583 if tokens[index].type == 'open_parenthese':
586 raise Exception('Expected "(", found "{}" on line {}'.format(
591 success, index, argument_name_list = _comma_separated_list_parser(_symbol_expression_parser)(
596 if tokens[index].type == 'close_parenthese':
599 raise Exception('Expected ")", found "{}" on line {}'.format(
604 if tokens[index].match == 'do':
609 success, index, statement_list = _zero_or_more_parser(tuple, _statement_parser)(index, tokens)
611 _, index, _ = consume_newlines(index, tokens)
613 if tokens[index].type == 'keyword' and tokens[index].match == 'end':
618 return True, index, FurFunctionDefinitionStatement(
620 argument_name_list=tuple(an.symbol for an in argument_name_list),
621 statement_list=statement_list,
624 def _statement_parser(index, tokens):
625 _, index, _ = consume_newlines(index, tokens)
627 if index == len(tokens):
628 return (False, index, None)
631 _assignment_statement_parser,
632 _expression_statement_parser,
633 _function_definition_statement_parser,
636 def _program_formatter(statement_list):
637 return FurProgram(statement_list=statement_list)
639 _program_parser = _zero_or_more_parser(_program_formatter, _statement_parser)
641 def _parse(parser, tokens):
642 success, index, result = parser(0, tokens)
644 if index < len(tokens):
645 raise Exception('Unable to parse token {}'.format(tokens[index]))
650 raise Exception('Unable to parse')
653 return _parse(_program_parser, tokens)
655 if __name__ == '__main__':
660 class FurStringLiteralExpressionParserTests(unittest.TestCase):
661 def test_parses_single_quoted_string_literal(self):
663 _string_literal_expression_parser(0, tokenization.tokenize("'Hello, world'")),
667 FurStringLiteralExpression(string='Hello, world'),
671 class FurFunctionCallExpressionParserTests(unittest.TestCase):
672 def test_parses_function_with_string_literal_argument(self):
674 _function_call_expression_parser(0, tokenization.tokenize("print('Hello, world')")),
678 FurFunctionCallExpression(
680 arguments=(FurStringLiteralExpression(string='Hello, world'),),