Add the ability to assign to and retrieve variables

author David Kerkeslager <kerkeslager@gmail.com>

Sat, 5 Aug 2017 22:56:57 +0000 (18:56 -0400)

committer David Kerkeslager <kerkeslager@gmail.com>

Sat, 5 Aug 2017 22:56:57 +0000 (18:56 -0400)
author David Kerkeslager <kerkeslager@gmail.com>
Sat, 5 Aug 2017 22:56:57 +0000 (18:56 -0400)
committer David Kerkeslager <kerkeslager@gmail.com>
Sat, 5 Aug 2017 22:56:57 +0000 (18:56 -0400)
diff --git a/examples/09_assignment.fur b/examples/09_assignment.fur

new file mode 100644 (file)

index 0000000..6c827b3
--- /dev/null
+++ b/examples/09_assignment.fur
@@ -0,0 +1,2 @@
+greeting = 'Hello, world'
+print(greeting)
diff --git a/examples/09_assignment.fur.output.txt b/examples/09_assignment.fur.output.txt

new file mode 100644 (file)

index 0000000..dbe9dba
--- /dev/null
+++ b/examples/09_assignment.fur.output.txt
@@ -0,0 +1 @@
+Hello, world
+\ No newline at end of file
diff --git a/generation.py b/generation.py

index cefae7b..5696065 100644 (file)
--- a/generation.py
+++ b/generation.py
@@ -1,5 +1,6 @@
  import jinja2
  
+import parsing
  import transformation
  
  ENV = jinja2.Environment(
@@ -23,6 +24,9 @@ def generate_string_literal(c_string_literal):
          ''.join(c_escape(ch for ch in c_string_literal.value)),
      )
  
+def generate_symbol_expression(c_symbol_expression):
+    return 'Environment_get(environment, Runtime_symbol(runtime, "{}"))'.format(c_symbol_expression.value)
+
  def generate_expression(c_argument):
      if isinstance(c_argument, transformation.CNegationExpression):
          return generate_negation_expression(c_argument)
@@ -33,6 +37,7 @@ def generate_expression(c_argument):
      LITERAL_TYPE_MAPPING = {
          transformation.CIntegerLiteral: generate_integer_literal,
          transformation.CStringLiteral: generate_string_literal,
+        transformation.CSymbolExpression: generate_symbol_expression,
      }
  
      if type(c_argument) in LITERAL_TYPE_MAPPING:
@@ -63,12 +68,26 @@ def generate_function_call(c_function_call):
          ', '.join(generate_expression(argument) for argument in c_function_call.arguments),
      )
  
-def generate_statement(c_function_call_statement):
-    return '{};'.format(generate_function_call(c_function_call_statement))
+def generate_expression_statement(c_function_call_statement):
+    # TODO Do we need to garbage collect the results of arbitrary statements?
+    return '{};'.format(generate_expression(c_function_call_statement))
+
+def generate_assignment_statement(c_assignment_statement):
+    return 'Environment_set(environment, Runtime_symbol(runtime, "{}"), {});'.format(
+        c_assignment_statement.target,
+        generate_expression(c_assignment_statement.expression),
+    )
+
+def generate_statement(statement):
+    if isinstance(statement, transformation.CAssignmentStatement):
+        return generate_assignment_statement(statement)
+
+    return generate_expression_statement(statement)
  
  def generate(c_program):
      template = ENV.get_template('program.c')
      return template.render(
+        MAX_SYMBOL_LENGTH=parsing.MAX_SYMBOL_LENGTH,
          builtins=list(sorted(c_program.builtins)),
          statements=[generate_statement(statement) for statement in c_program.statements],
          standard_libraries=list(sorted(c_program.standard_libraries)),
diff --git a/parsing.py b/parsing.py

index 0d7caa6..b660013 100644 (file)
--- a/parsing.py
+++ b/parsing.py
@@ -1,5 +1,8 @@
  import collections
  
+# TODO Check max symbol length in assignments, function calls, and symbol expressions
+MAX_SYMBOL_LENGTH = 16
+
  def _or_parser(*parsers):
      def result_parser(index, tokens):
          failure = (False, index, None)
@@ -44,6 +47,13 @@ FurStringLiteralExpression = collections.namedtuple(
      ],
  )
  
+FurSymbolExpression = collections.namedtuple(
+    'FurSymbolExpression',
+    [
+        'value',
+    ],
+)
+
  FurNegationExpression = collections.namedtuple(
      'FurNegationExpression',
      [
@@ -102,14 +112,16 @@ def _integer_literal_expression_parser(index, tokens):
      return True, index, FurIntegerLiteralExpression(value=value)
  
  def _string_literal_expression_parser(index, tokens):
-    failure = (False, index, None)
+    if tokens[index].type == 'single_quoted_string_literal':
+        return (True, index + 1, FurStringLiteralExpression(value=tokens[index].match[1:-1]))
  
-    if tokens[index].type != 'single_quoted_string_literal':
-        return failure
-    value = tokens[index].match[1:-1]
-    index += 1
+    return (False, index, None)
+
+def _symbol_expression_parser(index, tokens):
+    if tokens[index].type == 'symbol':
+        return (True, index + 1, FurSymbolExpression(value=tokens[index].match))
  
-    return True, index, FurStringLiteralExpression(value=value)
+    return (False, index, None)
  
  def _negation_expression_parser(index, tokens):
      failure = (False, index, None)
@@ -130,6 +142,7 @@ def _literal_level_expression_parser(index, tokens):
          _function_call_expression_parser,
          _integer_literal_expression_parser,
          _string_literal_expression_parser,
+        _symbol_expression_parser,
      )(index, tokens)
  
  def _multiplication_level_expression_parser(index, tokens):
@@ -212,6 +225,14 @@ FurFunctionCallExpression = collections.namedtuple(
      ],
  )
  
+FurAssignmentStatement = collections.namedtuple(
+    'FurAssignmentStatement',
+    [
+        'target',
+        'expression',
+    ],
+)
+
  FurProgram = collections.namedtuple(
      'FurProgram',
      [
@@ -220,6 +241,7 @@ FurProgram = collections.namedtuple(
  )
  
  def _function_call_expression_parser(index, tokens):
+    # TODO Use a FurSymbolExpression for the name
      failure = (False, index, None)
  
      if tokens[index].type != 'symbol':
@@ -245,10 +267,42 @@ def _function_call_expression_parser(index, tokens):
  
      return True, index, FurFunctionCallExpression(name=name, arguments=arguments)
  
+_expression_parser = _multiplication_level_expression_parser
+
+def _assignment_statement_parser(index, tokens):
+    # TODO Use a FurSymbolExpression for the target
+    failure = (False, index, None)
+
+    if tokens[index].type != 'symbol':
+        return failure
+    target = tokens[index].match
+    index += 1
+
+    if tokens[index].type != 'assignment_operator':
+        return failure
+    assignment_operator_index = index
+
+    success, index, expression = _expression_parser(index + 1, tokens)
+
+    if not success:
+        raise Exception(
+            'Expected expression after assignment operator on line {}'.format(
+                tokens[assignment_operator_index].line
+            )
+        )
+
+    return True, index, FurAssignmentStatement(target=target, expression=expression)
+
+def _statement_parser(index, tokens):
+    return _or_parser(
+        _assignment_statement_parser,
+        _expression_parser,
+    )(index, tokens)
+
  def _program_formatter(statement_list):
      return FurProgram(statement_list=statement_list)
  
-_program_parser = _zero_or_more_parser(_program_formatter, _function_call_expression_parser)
+_program_parser = _zero_or_more_parser(_program_formatter, _statement_parser)
  
  def _parse(parser, tokens):
      success, index, result = parser(0, tokens)
diff --git a/templates/program.c b/templates/program.c

index 64e0152..c20fd45 100644 (file)
--- a/templates/program.c
+++ b/templates/program.c
@@ -25,6 +25,15 @@ struct String
    char* characters;
  };
  
+#define MAX_SYMBOL_LENGTH {{ MAX_SYMBOL_LENGTH }}
+struct Symbol;
+typedef struct Symbol Symbol;
+struct Symbol
+{
+  size_t length;
+  char name[MAX_SYMBOL_LENGTH];
+};
+
  enum Type
  {
    INTEGER,
@@ -43,11 +52,78 @@ struct Object
    Instance instance;
  };
  
+struct EnvironmentNode;
+typedef struct EnvironmentNode EnvironmentNode;
+struct EnvironmentNode
+{
+  Symbol* key;
+  Object value;
+  EnvironmentNode* next;
+};
+
+struct Environment;
+typedef struct Environment Environment;
+struct Environment
+{
+  EnvironmentNode* root;
+};
+
+Environment* Environment_construct()
+{
+  // TODO Handle malloc returning NULL
+  Environment* result = malloc(sizeof(Environment));
+  result->root = NULL;
+  return result;
+}
+
+void Environment_destruct(Environment* self)
+{
+  EnvironmentNode* next;
+  for(EnvironmentNode* node = self->root; node != NULL; node = next)
+  {
+    // We don't need to destruct the keys, because those will be destructed at the end when the Runtime is destructed
+    // We don't need to destruct the permanent strings, because those will be destructed at the end when the Runtime is destructed
+    // The above two comments represent all heap-allocated objects currently, so we don't need to destruct Objects (yet)
+    next = node->next;
+    free(node);
+  }
+}
+
+// This need not be thread safe because environments exist on one thread only
+void Environment_set(Environment* self, Symbol* key, Object value)
+{
+  EnvironmentNode* node = malloc(sizeof(EnvironmentNode));
+  node->key = key;
+  node->value = value;
+  node->next = self->root;
+  self->root = node;
+}
+
+Object Environment_get(Environment* self, Symbol* symbol)
+{
+  for(EnvironmentNode* node = self->root; node != NULL; node = node->next)
+  {
+    // We can compare pointers because pointers are unique within Runtime->symbols
+    if(node->key == symbol)
+    {
+      return node->value;
+    }
+  }
+
+  // TODO Handle symbol errors
+  assert(false);
+}
+
+
+// TODO Allocate all symbols and strings as static constants so we can remove the level of indirection
  struct Runtime
  {
    size_t permanentStringsLength;
    size_t permanentStringsAllocated;
    String** permanentStrings;
+  size_t symbolsLength;
+  size_t symbolsAllocated;
+  Symbol** symbols;
  };
  
  Runtime* Runtime_construct()
@@ -56,12 +132,26 @@ Runtime* Runtime_construct()
    result->permanentStringsLength = 0;
    result->permanentStringsAllocated = 0;
    result->permanentStrings = NULL;
+  result->symbolsLength = 0;
+  result->symbolsAllocated =0;
+  result->symbols = NULL;
    return result;
  }
  
  void Runtime_destruct(Runtime* self)
  {
+  for(size_t i = 0; i < self->permanentStringsLength; i++)
+  {
+    free(self->permanentStrings[i]);
+  }
+
+  for(size_t i = 0; i < self->symbolsLength; i++)
+  {
+    free(self->symbols[i]);
+  }
+
    free(self->permanentStrings);
+  free(self->symbols);
    free(self);
  }
  
@@ -91,6 +181,49 @@ void Runtime_addPermanentString(Runtime* self, String* string)
    self->permanentStringsLength++;
  }
  
+// TODO Optimize this by sorting the symbols
+// TODO Make this function thread safe
+Symbol* Runtime_symbol(Runtime* self, const char* name)
+{
+  assert(strlen(name) <= MAX_SYMBOL_LENGTH);
+
+  for(size_t i = 0; i < self->symbolsLength; i++)
+  {
+    if(strcmp(self->symbols[i]->name, name) == 0)
+    {
+      return self->symbols[i];
+    }
+  }
+
+  if(self->symbolsLength == self->symbolsAllocated)
+  {
+    if(self->symbolsAllocated == 0)
+    {
+      self->symbolsAllocated = 8;
+    }
+    else
+    {
+      self->symbolsAllocated = self->symbolsAllocated * 2;
+    }
+
+    self->symbols = realloc(
+      self->symbols,
+      sizeof(Symbol*) * self->symbolsAllocated
+    );
+
+    // TODO Handle realloc returning NULL
+  }
+
+  Symbol* result = malloc(sizeof(Symbol));
+  result->length = strlen(name);
+  strcpy(result->name, name);
+
+  self->symbols[self->symbolsLength] = result;
+  self->symbolsLength++;
+
+  return result;
+}
+
  Object integerLiteral(int32_t literal)
  {
    Object result;
@@ -215,11 +348,13 @@ void builtin$print(Object output)
  int main(int argc, char** argv)
  {
    Runtime* runtime = Runtime_construct();
+  Environment* environment = Environment_construct();
  
    {% for statement in statements %}
    {{ statement }}
    {% endfor %}
  
+  Environment_destruct(environment);
    Runtime_destruct(runtime);
  
    return 0;
diff --git a/tokenization.py b/tokenization.py

index f316e5e..e9f536b 100644 (file)
--- a/tokenization.py
+++ b/tokenization.py
@@ -36,6 +36,7 @@ _TOKEN_MATCHERS = [
      ('open_parenthese',                 r'\('),
      ('close_parenthese',                r'\)'),
      ('comma',                           r','),
+    ('assignment_operator',             r'='),
      ('integer_literal',                 r'\d+'),
      ('symbol',                          r'[a-z]+'),
      ('single_quoted_string_literal',    r"'.*?'"),
@@ -185,6 +186,16 @@ if __name__ == '__main__':
                  ),),
              )
  
+        def test_tokenizes_assignment_operator(self):
+            self.assertEqual(
+                tokenize('='),
+                (Token(
+                    type='assignment_operator',
+                    match='=',
+                    index=0,
+                    line=1,
+                ),),
+            )
  
          def test_handles_trailing_newline(self):
              self.assertEqual(
diff --git a/transformation.py b/transformation.py

index d680145..3f52e43 100644 (file)
--- a/transformation.py
+++ b/transformation.py
@@ -16,6 +16,13 @@ CStringLiteral = collections.namedtuple(
      ],
  )
  
+CSymbolExpression = collections.namedtuple(
+    'CSymbolExpression',
+    [
+        'value',
+    ],
+)
+
  CNegationExpression = collections.namedtuple(
      'CNegationExpression',
      [
@@ -71,6 +78,14 @@ CFunctionCallExpression = collections.namedtuple(
      ],
  )
  
+CAssignmentStatement = collections.namedtuple(
+    'CAssignmentStatement',
+    [
+        'target',
+        'expression',
+    ],
+)
+
  CProgram = collections.namedtuple(
      'CProgram',
      [
@@ -95,6 +110,7 @@ def transform_expression(builtin_dependencies, expression):
      LITERAL_TYPE_MAPPING = {
          parsing.FurIntegerLiteralExpression: CIntegerLiteral,
          parsing.FurStringLiteralExpression: CStringLiteral,
+        parsing.FurSymbolExpression: CSymbolExpression,
      }
  
      if type(expression) in LITERAL_TYPE_MAPPING:
@@ -113,6 +129,13 @@ def transform_expression(builtin_dependencies, expression):
          right=transform_expression(builtin_dependencies, expression.right),
      )
  
+def transform_assignment_statement(builtin_dependencies, assignment_statement):
+    # TODO Check that target is not a builtin
+    return CAssignmentStatement(
+        target=assignment_statement.target,
+        expression=transform_expression(builtin_dependencies, assignment_statement.expression),
+    )
+
  def transform_negation_expression(builtin_dependencies, negation_expression):
      return CNegationExpression(value=transform_expression(builtin_dependencies, negation_expression.value))
  
@@ -127,11 +150,17 @@ def transform_function_call_expression(builtin_dependencies, function_call):
  
      raise Exception()
  
+def transform_statement(builtin_dependencies, statement):
+    return {
+        parsing.FurAssignmentStatement: transform_assignment_statement,
+        parsing.FurFunctionCallExpression: transform_function_call_expression,
+    }[type(statement)](builtin_dependencies, statement)
+
  def transform(program):
      builtins = set()
  
      c_statements = [
-        transform_function_call_expression(builtins, statement) for statement in program.statement_list
+        transform_statement(builtins, statement) for statement in program.statement_list
      ]
  
      standard_libraries = set()
author	David Kerkeslager <kerkeslager@gmail.com>
	Sat, 5 Aug 2017 22:56:57 +0000 (18:56 -0400)
committer	David Kerkeslager <kerkeslager@gmail.com>
	Sat, 5 Aug 2017 22:56:57 +0000 (18:56 -0400)
examples/09_assignment.fur	[new file with mode: 0644]	patch \| blob
examples/09_assignment.fur.output.txt	[new file with mode: 0644]	patch \| blob
generation.py		patch \| blob \| history
parsing.py		patch \| blob \| history
templates/program.c		patch \| blob \| history
tokenization.py		patch \| blob \| history
transformation.py		patch \| blob \| history