From ec54804ff2c217e4f6be0220049142e300681093 Mon Sep 17 00:00:00 2001 From: David Kerkeslager Date: Sat, 5 Aug 2017 18:56:57 -0400 Subject: [PATCH] Add the ability to assign to and retrieve variables --- examples/09_assignment.fur | 2 + examples/09_assignment.fur.output.txt | 1 + generation.py | 23 ++++- parsing.py | 68 +++++++++++-- templates/program.c | 135 ++++++++++++++++++++++++++ tokenization.py | 11 +++ transformation.py | 31 +++++- 7 files changed, 261 insertions(+), 10 deletions(-) create mode 100644 examples/09_assignment.fur create mode 100644 examples/09_assignment.fur.output.txt diff --git a/examples/09_assignment.fur b/examples/09_assignment.fur new file mode 100644 index 0000000..6c827b3 --- /dev/null +++ b/examples/09_assignment.fur @@ -0,0 +1,2 @@ +greeting = 'Hello, world' +print(greeting) diff --git a/examples/09_assignment.fur.output.txt b/examples/09_assignment.fur.output.txt new file mode 100644 index 0000000..dbe9dba --- /dev/null +++ b/examples/09_assignment.fur.output.txt @@ -0,0 +1 @@ +Hello, world \ No newline at end of file diff --git a/generation.py b/generation.py index cefae7b..5696065 100644 --- a/generation.py +++ b/generation.py @@ -1,5 +1,6 @@ import jinja2 +import parsing import transformation ENV = jinja2.Environment( @@ -23,6 +24,9 @@ def generate_string_literal(c_string_literal): ''.join(c_escape(ch for ch in c_string_literal.value)), ) +def generate_symbol_expression(c_symbol_expression): + return 'Environment_get(environment, Runtime_symbol(runtime, "{}"))'.format(c_symbol_expression.value) + def generate_expression(c_argument): if isinstance(c_argument, transformation.CNegationExpression): return generate_negation_expression(c_argument) @@ -33,6 +37,7 @@ def generate_expression(c_argument): LITERAL_TYPE_MAPPING = { transformation.CIntegerLiteral: generate_integer_literal, transformation.CStringLiteral: generate_string_literal, + transformation.CSymbolExpression: generate_symbol_expression, } if type(c_argument) in LITERAL_TYPE_MAPPING: @@ -63,12 +68,26 @@ def generate_function_call(c_function_call): ', '.join(generate_expression(argument) for argument in c_function_call.arguments), ) -def generate_statement(c_function_call_statement): - return '{};'.format(generate_function_call(c_function_call_statement)) +def generate_expression_statement(c_function_call_statement): + # TODO Do we need to garbage collect the results of arbitrary statements? + return '{};'.format(generate_expression(c_function_call_statement)) + +def generate_assignment_statement(c_assignment_statement): + return 'Environment_set(environment, Runtime_symbol(runtime, "{}"), {});'.format( + c_assignment_statement.target, + generate_expression(c_assignment_statement.expression), + ) + +def generate_statement(statement): + if isinstance(statement, transformation.CAssignmentStatement): + return generate_assignment_statement(statement) + + return generate_expression_statement(statement) def generate(c_program): template = ENV.get_template('program.c') return template.render( + MAX_SYMBOL_LENGTH=parsing.MAX_SYMBOL_LENGTH, builtins=list(sorted(c_program.builtins)), statements=[generate_statement(statement) for statement in c_program.statements], standard_libraries=list(sorted(c_program.standard_libraries)), diff --git a/parsing.py b/parsing.py index 0d7caa6..b660013 100644 --- a/parsing.py +++ b/parsing.py @@ -1,5 +1,8 @@ import collections +# TODO Check max symbol length in assignments, function calls, and symbol expressions +MAX_SYMBOL_LENGTH = 16 + def _or_parser(*parsers): def result_parser(index, tokens): failure = (False, index, None) @@ -44,6 +47,13 @@ FurStringLiteralExpression = collections.namedtuple( ], ) +FurSymbolExpression = collections.namedtuple( + 'FurSymbolExpression', + [ + 'value', + ], +) + FurNegationExpression = collections.namedtuple( 'FurNegationExpression', [ @@ -102,14 +112,16 @@ def _integer_literal_expression_parser(index, tokens): return True, index, FurIntegerLiteralExpression(value=value) def _string_literal_expression_parser(index, tokens): - failure = (False, index, None) + if tokens[index].type == 'single_quoted_string_literal': + return (True, index + 1, FurStringLiteralExpression(value=tokens[index].match[1:-1])) - if tokens[index].type != 'single_quoted_string_literal': - return failure - value = tokens[index].match[1:-1] - index += 1 + return (False, index, None) + +def _symbol_expression_parser(index, tokens): + if tokens[index].type == 'symbol': + return (True, index + 1, FurSymbolExpression(value=tokens[index].match)) - return True, index, FurStringLiteralExpression(value=value) + return (False, index, None) def _negation_expression_parser(index, tokens): failure = (False, index, None) @@ -130,6 +142,7 @@ def _literal_level_expression_parser(index, tokens): _function_call_expression_parser, _integer_literal_expression_parser, _string_literal_expression_parser, + _symbol_expression_parser, )(index, tokens) def _multiplication_level_expression_parser(index, tokens): @@ -212,6 +225,14 @@ FurFunctionCallExpression = collections.namedtuple( ], ) +FurAssignmentStatement = collections.namedtuple( + 'FurAssignmentStatement', + [ + 'target', + 'expression', + ], +) + FurProgram = collections.namedtuple( 'FurProgram', [ @@ -220,6 +241,7 @@ FurProgram = collections.namedtuple( ) def _function_call_expression_parser(index, tokens): + # TODO Use a FurSymbolExpression for the name failure = (False, index, None) if tokens[index].type != 'symbol': @@ -245,10 +267,42 @@ def _function_call_expression_parser(index, tokens): return True, index, FurFunctionCallExpression(name=name, arguments=arguments) +_expression_parser = _multiplication_level_expression_parser + +def _assignment_statement_parser(index, tokens): + # TODO Use a FurSymbolExpression for the target + failure = (False, index, None) + + if tokens[index].type != 'symbol': + return failure + target = tokens[index].match + index += 1 + + if tokens[index].type != 'assignment_operator': + return failure + assignment_operator_index = index + + success, index, expression = _expression_parser(index + 1, tokens) + + if not success: + raise Exception( + 'Expected expression after assignment operator on line {}'.format( + tokens[assignment_operator_index].line + ) + ) + + return True, index, FurAssignmentStatement(target=target, expression=expression) + +def _statement_parser(index, tokens): + return _or_parser( + _assignment_statement_parser, + _expression_parser, + )(index, tokens) + def _program_formatter(statement_list): return FurProgram(statement_list=statement_list) -_program_parser = _zero_or_more_parser(_program_formatter, _function_call_expression_parser) +_program_parser = _zero_or_more_parser(_program_formatter, _statement_parser) def _parse(parser, tokens): success, index, result = parser(0, tokens) diff --git a/templates/program.c b/templates/program.c index 64e0152..c20fd45 100644 --- a/templates/program.c +++ b/templates/program.c @@ -25,6 +25,15 @@ struct String char* characters; }; +#define MAX_SYMBOL_LENGTH {{ MAX_SYMBOL_LENGTH }} +struct Symbol; +typedef struct Symbol Symbol; +struct Symbol +{ + size_t length; + char name[MAX_SYMBOL_LENGTH]; +}; + enum Type { INTEGER, @@ -43,11 +52,78 @@ struct Object Instance instance; }; +struct EnvironmentNode; +typedef struct EnvironmentNode EnvironmentNode; +struct EnvironmentNode +{ + Symbol* key; + Object value; + EnvironmentNode* next; +}; + +struct Environment; +typedef struct Environment Environment; +struct Environment +{ + EnvironmentNode* root; +}; + +Environment* Environment_construct() +{ + // TODO Handle malloc returning NULL + Environment* result = malloc(sizeof(Environment)); + result->root = NULL; + return result; +} + +void Environment_destruct(Environment* self) +{ + EnvironmentNode* next; + for(EnvironmentNode* node = self->root; node != NULL; node = next) + { + // We don't need to destruct the keys, because those will be destructed at the end when the Runtime is destructed + // We don't need to destruct the permanent strings, because those will be destructed at the end when the Runtime is destructed + // The above two comments represent all heap-allocated objects currently, so we don't need to destruct Objects (yet) + next = node->next; + free(node); + } +} + +// This need not be thread safe because environments exist on one thread only +void Environment_set(Environment* self, Symbol* key, Object value) +{ + EnvironmentNode* node = malloc(sizeof(EnvironmentNode)); + node->key = key; + node->value = value; + node->next = self->root; + self->root = node; +} + +Object Environment_get(Environment* self, Symbol* symbol) +{ + for(EnvironmentNode* node = self->root; node != NULL; node = node->next) + { + // We can compare pointers because pointers are unique within Runtime->symbols + if(node->key == symbol) + { + return node->value; + } + } + + // TODO Handle symbol errors + assert(false); +} + + +// TODO Allocate all symbols and strings as static constants so we can remove the level of indirection struct Runtime { size_t permanentStringsLength; size_t permanentStringsAllocated; String** permanentStrings; + size_t symbolsLength; + size_t symbolsAllocated; + Symbol** symbols; }; Runtime* Runtime_construct() @@ -56,12 +132,26 @@ Runtime* Runtime_construct() result->permanentStringsLength = 0; result->permanentStringsAllocated = 0; result->permanentStrings = NULL; + result->symbolsLength = 0; + result->symbolsAllocated =0; + result->symbols = NULL; return result; } void Runtime_destruct(Runtime* self) { + for(size_t i = 0; i < self->permanentStringsLength; i++) + { + free(self->permanentStrings[i]); + } + + for(size_t i = 0; i < self->symbolsLength; i++) + { + free(self->symbols[i]); + } + free(self->permanentStrings); + free(self->symbols); free(self); } @@ -91,6 +181,49 @@ void Runtime_addPermanentString(Runtime* self, String* string) self->permanentStringsLength++; } +// TODO Optimize this by sorting the symbols +// TODO Make this function thread safe +Symbol* Runtime_symbol(Runtime* self, const char* name) +{ + assert(strlen(name) <= MAX_SYMBOL_LENGTH); + + for(size_t i = 0; i < self->symbolsLength; i++) + { + if(strcmp(self->symbols[i]->name, name) == 0) + { + return self->symbols[i]; + } + } + + if(self->symbolsLength == self->symbolsAllocated) + { + if(self->symbolsAllocated == 0) + { + self->symbolsAllocated = 8; + } + else + { + self->symbolsAllocated = self->symbolsAllocated * 2; + } + + self->symbols = realloc( + self->symbols, + sizeof(Symbol*) * self->symbolsAllocated + ); + + // TODO Handle realloc returning NULL + } + + Symbol* result = malloc(sizeof(Symbol)); + result->length = strlen(name); + strcpy(result->name, name); + + self->symbols[self->symbolsLength] = result; + self->symbolsLength++; + + return result; +} + Object integerLiteral(int32_t literal) { Object result; @@ -215,11 +348,13 @@ void builtin$print(Object output) int main(int argc, char** argv) { Runtime* runtime = Runtime_construct(); + Environment* environment = Environment_construct(); {% for statement in statements %} {{ statement }} {% endfor %} + Environment_destruct(environment); Runtime_destruct(runtime); return 0; diff --git a/tokenization.py b/tokenization.py index f316e5e..e9f536b 100644 --- a/tokenization.py +++ b/tokenization.py @@ -36,6 +36,7 @@ _TOKEN_MATCHERS = [ ('open_parenthese', r'\('), ('close_parenthese', r'\)'), ('comma', r','), + ('assignment_operator', r'='), ('integer_literal', r'\d+'), ('symbol', r'[a-z]+'), ('single_quoted_string_literal', r"'.*?'"), @@ -185,6 +186,16 @@ if __name__ == '__main__': ),), ) + def test_tokenizes_assignment_operator(self): + self.assertEqual( + tokenize('='), + (Token( + type='assignment_operator', + match='=', + index=0, + line=1, + ),), + ) def test_handles_trailing_newline(self): self.assertEqual( diff --git a/transformation.py b/transformation.py index d680145..3f52e43 100644 --- a/transformation.py +++ b/transformation.py @@ -16,6 +16,13 @@ CStringLiteral = collections.namedtuple( ], ) +CSymbolExpression = collections.namedtuple( + 'CSymbolExpression', + [ + 'value', + ], +) + CNegationExpression = collections.namedtuple( 'CNegationExpression', [ @@ -71,6 +78,14 @@ CFunctionCallExpression = collections.namedtuple( ], ) +CAssignmentStatement = collections.namedtuple( + 'CAssignmentStatement', + [ + 'target', + 'expression', + ], +) + CProgram = collections.namedtuple( 'CProgram', [ @@ -95,6 +110,7 @@ def transform_expression(builtin_dependencies, expression): LITERAL_TYPE_MAPPING = { parsing.FurIntegerLiteralExpression: CIntegerLiteral, parsing.FurStringLiteralExpression: CStringLiteral, + parsing.FurSymbolExpression: CSymbolExpression, } if type(expression) in LITERAL_TYPE_MAPPING: @@ -113,6 +129,13 @@ def transform_expression(builtin_dependencies, expression): right=transform_expression(builtin_dependencies, expression.right), ) +def transform_assignment_statement(builtin_dependencies, assignment_statement): + # TODO Check that target is not a builtin + return CAssignmentStatement( + target=assignment_statement.target, + expression=transform_expression(builtin_dependencies, assignment_statement.expression), + ) + def transform_negation_expression(builtin_dependencies, negation_expression): return CNegationExpression(value=transform_expression(builtin_dependencies, negation_expression.value)) @@ -127,11 +150,17 @@ def transform_function_call_expression(builtin_dependencies, function_call): raise Exception() +def transform_statement(builtin_dependencies, statement): + return { + parsing.FurAssignmentStatement: transform_assignment_statement, + parsing.FurFunctionCallExpression: transform_function_call_expression, + }[type(statement)](builtin_dependencies, statement) + def transform(program): builtins = set() c_statements = [ - transform_function_call_expression(builtins, statement) for statement in program.statement_list + transform_statement(builtins, statement) for statement in program.statement_list ] standard_libraries = set() -- 2.20.1