Add the ability to assign to and retrieve variables
authorDavid Kerkeslager <kerkeslager@gmail.com>
Sat, 5 Aug 2017 22:56:57 +0000 (18:56 -0400)
committerDavid Kerkeslager <kerkeslager@gmail.com>
Sat, 5 Aug 2017 22:56:57 +0000 (18:56 -0400)
examples/09_assignment.fur [new file with mode: 0644]
examples/09_assignment.fur.output.txt [new file with mode: 0644]
generation.py
parsing.py
templates/program.c
tokenization.py
transformation.py

diff --git a/examples/09_assignment.fur b/examples/09_assignment.fur
new file mode 100644 (file)
index 0000000..6c827b3
--- /dev/null
@@ -0,0 +1,2 @@
+greeting = 'Hello, world'
+print(greeting)
diff --git a/examples/09_assignment.fur.output.txt b/examples/09_assignment.fur.output.txt
new file mode 100644 (file)
index 0000000..dbe9dba
--- /dev/null
@@ -0,0 +1 @@
+Hello, world
\ No newline at end of file
index cefae7b..5696065 100644 (file)
@@ -1,5 +1,6 @@
 import jinja2
 
+import parsing
 import transformation
 
 ENV = jinja2.Environment(
@@ -23,6 +24,9 @@ def generate_string_literal(c_string_literal):
         ''.join(c_escape(ch for ch in c_string_literal.value)),
     )
 
+def generate_symbol_expression(c_symbol_expression):
+    return 'Environment_get(environment, Runtime_symbol(runtime, "{}"))'.format(c_symbol_expression.value)
+
 def generate_expression(c_argument):
     if isinstance(c_argument, transformation.CNegationExpression):
         return generate_negation_expression(c_argument)
@@ -33,6 +37,7 @@ def generate_expression(c_argument):
     LITERAL_TYPE_MAPPING = {
         transformation.CIntegerLiteral: generate_integer_literal,
         transformation.CStringLiteral: generate_string_literal,
+        transformation.CSymbolExpression: generate_symbol_expression,
     }
 
     if type(c_argument) in LITERAL_TYPE_MAPPING:
@@ -63,12 +68,26 @@ def generate_function_call(c_function_call):
         ', '.join(generate_expression(argument) for argument in c_function_call.arguments),
     )
 
-def generate_statement(c_function_call_statement):
-    return '{};'.format(generate_function_call(c_function_call_statement))
+def generate_expression_statement(c_function_call_statement):
+    # TODO Do we need to garbage collect the results of arbitrary statements?
+    return '{};'.format(generate_expression(c_function_call_statement))
+
+def generate_assignment_statement(c_assignment_statement):
+    return 'Environment_set(environment, Runtime_symbol(runtime, "{}"), {});'.format(
+        c_assignment_statement.target,
+        generate_expression(c_assignment_statement.expression),
+    )
+
+def generate_statement(statement):
+    if isinstance(statement, transformation.CAssignmentStatement):
+        return generate_assignment_statement(statement)
+
+    return generate_expression_statement(statement)
 
 def generate(c_program):
     template = ENV.get_template('program.c')
     return template.render(
+        MAX_SYMBOL_LENGTH=parsing.MAX_SYMBOL_LENGTH,
         builtins=list(sorted(c_program.builtins)),
         statements=[generate_statement(statement) for statement in c_program.statements],
         standard_libraries=list(sorted(c_program.standard_libraries)),
index 0d7caa6..b660013 100644 (file)
@@ -1,5 +1,8 @@
 import collections
 
+# TODO Check max symbol length in assignments, function calls, and symbol expressions
+MAX_SYMBOL_LENGTH = 16
+
 def _or_parser(*parsers):
     def result_parser(index, tokens):
         failure = (False, index, None)
@@ -44,6 +47,13 @@ FurStringLiteralExpression = collections.namedtuple(
     ],
 )
 
+FurSymbolExpression = collections.namedtuple(
+    'FurSymbolExpression',
+    [
+        'value',
+    ],
+)
+
 FurNegationExpression = collections.namedtuple(
     'FurNegationExpression',
     [
@@ -102,14 +112,16 @@ def _integer_literal_expression_parser(index, tokens):
     return True, index, FurIntegerLiteralExpression(value=value)
 
 def _string_literal_expression_parser(index, tokens):
-    failure = (False, index, None)
+    if tokens[index].type == 'single_quoted_string_literal':
+        return (True, index + 1, FurStringLiteralExpression(value=tokens[index].match[1:-1]))
 
-    if tokens[index].type != 'single_quoted_string_literal':
-        return failure
-    value = tokens[index].match[1:-1]
-    index += 1
+    return (False, index, None)
+
+def _symbol_expression_parser(index, tokens):
+    if tokens[index].type == 'symbol':
+        return (True, index + 1, FurSymbolExpression(value=tokens[index].match))
 
-    return True, index, FurStringLiteralExpression(value=value)
+    return (False, index, None)
 
 def _negation_expression_parser(index, tokens):
     failure = (False, index, None)
@@ -130,6 +142,7 @@ def _literal_level_expression_parser(index, tokens):
         _function_call_expression_parser,
         _integer_literal_expression_parser,
         _string_literal_expression_parser,
+        _symbol_expression_parser,
     )(index, tokens)
 
 def _multiplication_level_expression_parser(index, tokens):
@@ -212,6 +225,14 @@ FurFunctionCallExpression = collections.namedtuple(
     ],
 )
 
+FurAssignmentStatement = collections.namedtuple(
+    'FurAssignmentStatement',
+    [
+        'target',
+        'expression',
+    ],
+)
+
 FurProgram = collections.namedtuple(
     'FurProgram',
     [
@@ -220,6 +241,7 @@ FurProgram = collections.namedtuple(
 )
 
 def _function_call_expression_parser(index, tokens):
+    # TODO Use a FurSymbolExpression for the name
     failure = (False, index, None)
 
     if tokens[index].type != 'symbol':
@@ -245,10 +267,42 @@ def _function_call_expression_parser(index, tokens):
 
     return True, index, FurFunctionCallExpression(name=name, arguments=arguments)
 
+_expression_parser = _multiplication_level_expression_parser
+
+def _assignment_statement_parser(index, tokens):
+    # TODO Use a FurSymbolExpression for the target
+    failure = (False, index, None)
+
+    if tokens[index].type != 'symbol':
+        return failure
+    target = tokens[index].match
+    index += 1
+
+    if tokens[index].type != 'assignment_operator':
+        return failure
+    assignment_operator_index = index
+
+    success, index, expression = _expression_parser(index + 1, tokens)
+
+    if not success:
+        raise Exception(
+            'Expected expression after assignment operator on line {}'.format(
+                tokens[assignment_operator_index].line
+            )
+        )
+
+    return True, index, FurAssignmentStatement(target=target, expression=expression)
+
+def _statement_parser(index, tokens):
+    return _or_parser(
+        _assignment_statement_parser,
+        _expression_parser,
+    )(index, tokens)
+
 def _program_formatter(statement_list):
     return FurProgram(statement_list=statement_list)
 
-_program_parser = _zero_or_more_parser(_program_formatter, _function_call_expression_parser)
+_program_parser = _zero_or_more_parser(_program_formatter, _statement_parser)
 
 def _parse(parser, tokens):
     success, index, result = parser(0, tokens)
index 64e0152..c20fd45 100644 (file)
@@ -25,6 +25,15 @@ struct String
   char* characters;
 };
 
+#define MAX_SYMBOL_LENGTH {{ MAX_SYMBOL_LENGTH }}
+struct Symbol;
+typedef struct Symbol Symbol;
+struct Symbol
+{
+  size_t length;
+  char name[MAX_SYMBOL_LENGTH];
+};
+
 enum Type
 {
   INTEGER,
@@ -43,11 +52,78 @@ struct Object
   Instance instance;
 };
 
+struct EnvironmentNode;
+typedef struct EnvironmentNode EnvironmentNode;
+struct EnvironmentNode
+{
+  Symbol* key;
+  Object value;
+  EnvironmentNode* next;
+};
+
+struct Environment;
+typedef struct Environment Environment;
+struct Environment
+{
+  EnvironmentNode* root;
+};
+
+Environment* Environment_construct()
+{
+  // TODO Handle malloc returning NULL
+  Environment* result = malloc(sizeof(Environment));
+  result->root = NULL;
+  return result;
+}
+
+void Environment_destruct(Environment* self)
+{
+  EnvironmentNode* next;
+  for(EnvironmentNode* node = self->root; node != NULL; node = next)
+  {
+    // We don't need to destruct the keys, because those will be destructed at the end when the Runtime is destructed
+    // We don't need to destruct the permanent strings, because those will be destructed at the end when the Runtime is destructed
+    // The above two comments represent all heap-allocated objects currently, so we don't need to destruct Objects (yet)
+    next = node->next;
+    free(node);
+  }
+}
+
+// This need not be thread safe because environments exist on one thread only
+void Environment_set(Environment* self, Symbol* key, Object value)
+{
+  EnvironmentNode* node = malloc(sizeof(EnvironmentNode));
+  node->key = key;
+  node->value = value;
+  node->next = self->root;
+  self->root = node;
+}
+
+Object Environment_get(Environment* self, Symbol* symbol)
+{
+  for(EnvironmentNode* node = self->root; node != NULL; node = node->next)
+  {
+    // We can compare pointers because pointers are unique within Runtime->symbols
+    if(node->key == symbol)
+    {
+      return node->value;
+    }
+  }
+
+  // TODO Handle symbol errors
+  assert(false);
+}
+
+
+// TODO Allocate all symbols and strings as static constants so we can remove the level of indirection
 struct Runtime
 {
   size_t permanentStringsLength;
   size_t permanentStringsAllocated;
   String** permanentStrings;
+  size_t symbolsLength;
+  size_t symbolsAllocated;
+  Symbol** symbols;
 };
 
 Runtime* Runtime_construct()
@@ -56,12 +132,26 @@ Runtime* Runtime_construct()
   result->permanentStringsLength = 0;
   result->permanentStringsAllocated = 0;
   result->permanentStrings = NULL;
+  result->symbolsLength = 0;
+  result->symbolsAllocated =0;
+  result->symbols = NULL;
   return result;
 }
 
 void Runtime_destruct(Runtime* self)
 {
+  for(size_t i = 0; i < self->permanentStringsLength; i++)
+  {
+    free(self->permanentStrings[i]);
+  }
+
+  for(size_t i = 0; i < self->symbolsLength; i++)
+  {
+    free(self->symbols[i]);
+  }
+
   free(self->permanentStrings);
+  free(self->symbols);
   free(self);
 }
 
@@ -91,6 +181,49 @@ void Runtime_addPermanentString(Runtime* self, String* string)
   self->permanentStringsLength++;
 }
 
+// TODO Optimize this by sorting the symbols
+// TODO Make this function thread safe
+Symbol* Runtime_symbol(Runtime* self, const char* name)
+{
+  assert(strlen(name) <= MAX_SYMBOL_LENGTH);
+
+  for(size_t i = 0; i < self->symbolsLength; i++)
+  {
+    if(strcmp(self->symbols[i]->name, name) == 0)
+    {
+      return self->symbols[i];
+    }
+  }
+
+  if(self->symbolsLength == self->symbolsAllocated)
+  {
+    if(self->symbolsAllocated == 0)
+    {
+      self->symbolsAllocated = 8;
+    }
+    else
+    {
+      self->symbolsAllocated = self->symbolsAllocated * 2;
+    }
+
+    self->symbols = realloc(
+      self->symbols,
+      sizeof(Symbol*) * self->symbolsAllocated
+    );
+
+    // TODO Handle realloc returning NULL
+  }
+
+  Symbol* result = malloc(sizeof(Symbol));
+  result->length = strlen(name);
+  strcpy(result->name, name);
+
+  self->symbols[self->symbolsLength] = result;
+  self->symbolsLength++;
+
+  return result;
+}
+
 Object integerLiteral(int32_t literal)
 {
   Object result;
@@ -215,11 +348,13 @@ void builtin$print(Object output)
 int main(int argc, char** argv)
 {
   Runtime* runtime = Runtime_construct();
+  Environment* environment = Environment_construct();
 
   {% for statement in statements %}
   {{ statement }}
   {% endfor %}
 
+  Environment_destruct(environment);
   Runtime_destruct(runtime);
 
   return 0;
index f316e5e..e9f536b 100644 (file)
@@ -36,6 +36,7 @@ _TOKEN_MATCHERS = [
     ('open_parenthese',                 r'\('),
     ('close_parenthese',                r'\)'),
     ('comma',                           r','),
+    ('assignment_operator',             r'='),
     ('integer_literal',                 r'\d+'),
     ('symbol',                          r'[a-z]+'),
     ('single_quoted_string_literal',    r"'.*?'"),
@@ -185,6 +186,16 @@ if __name__ == '__main__':
                 ),),
             )
 
+        def test_tokenizes_assignment_operator(self):
+            self.assertEqual(
+                tokenize('='),
+                (Token(
+                    type='assignment_operator',
+                    match='=',
+                    index=0,
+                    line=1,
+                ),),
+            )
 
         def test_handles_trailing_newline(self):
             self.assertEqual(
index d680145..3f52e43 100644 (file)
@@ -16,6 +16,13 @@ CStringLiteral = collections.namedtuple(
     ],
 )
 
+CSymbolExpression = collections.namedtuple(
+    'CSymbolExpression',
+    [
+        'value',
+    ],
+)
+
 CNegationExpression = collections.namedtuple(
     'CNegationExpression',
     [
@@ -71,6 +78,14 @@ CFunctionCallExpression = collections.namedtuple(
     ],
 )
 
+CAssignmentStatement = collections.namedtuple(
+    'CAssignmentStatement',
+    [
+        'target',
+        'expression',
+    ],
+)
+
 CProgram = collections.namedtuple(
     'CProgram',
     [
@@ -95,6 +110,7 @@ def transform_expression(builtin_dependencies, expression):
     LITERAL_TYPE_MAPPING = {
         parsing.FurIntegerLiteralExpression: CIntegerLiteral,
         parsing.FurStringLiteralExpression: CStringLiteral,
+        parsing.FurSymbolExpression: CSymbolExpression,
     }
 
     if type(expression) in LITERAL_TYPE_MAPPING:
@@ -113,6 +129,13 @@ def transform_expression(builtin_dependencies, expression):
         right=transform_expression(builtin_dependencies, expression.right),
     )
 
+def transform_assignment_statement(builtin_dependencies, assignment_statement):
+    # TODO Check that target is not a builtin
+    return CAssignmentStatement(
+        target=assignment_statement.target,
+        expression=transform_expression(builtin_dependencies, assignment_statement.expression),
+    )
+
 def transform_negation_expression(builtin_dependencies, negation_expression):
     return CNegationExpression(value=transform_expression(builtin_dependencies, negation_expression.value))
 
@@ -127,11 +150,17 @@ def transform_function_call_expression(builtin_dependencies, function_call):
 
     raise Exception()
 
+def transform_statement(builtin_dependencies, statement):
+    return {
+        parsing.FurAssignmentStatement: transform_assignment_statement,
+        parsing.FurFunctionCallExpression: transform_function_call_expression,
+    }[type(statement)](builtin_dependencies, statement)
+
 def transform(program):
     builtins = set()
 
     c_statements = [
-        transform_function_call_expression(builtins, statement) for statement in program.statement_list
+        transform_statement(builtins, statement) for statement in program.statement_list
     ]
 
     standard_libraries = set()