Add constant symbol list, which solves all the symbol allocation problems
authorDavid Kerkeslager <kerkeslager@gmail.com>
Sun, 6 Aug 2017 08:02:26 +0000 (04:02 -0400)
committerDavid Kerkeslager <kerkeslager@gmail.com>
Sun, 6 Aug 2017 08:02:26 +0000 (04:02 -0400)
generation.py
parsing.py
templates/program.c
transformation.py

index 5696065..4dfd115 100644 (file)
@@ -1,6 +1,5 @@
 import jinja2
 
-import parsing
 import transformation
 
 ENV = jinja2.Environment(
@@ -25,7 +24,10 @@ def generate_string_literal(c_string_literal):
     )
 
 def generate_symbol_expression(c_symbol_expression):
-    return 'Environment_get(environment, Runtime_symbol(runtime, "{}"))'.format(c_symbol_expression.value)
+    return 'Environment_get(environment, SYMBOL_LIST[{}] /* symbol: {} */)'.format(
+        c_symbol_expression.symbol_list_index,
+        c_symbol_expression.symbol,
+    )
 
 def generate_expression(c_argument):
     if isinstance(c_argument, transformation.CNegationExpression):
@@ -73,7 +75,8 @@ def generate_expression_statement(c_function_call_statement):
     return '{};'.format(generate_expression(c_function_call_statement))
 
 def generate_assignment_statement(c_assignment_statement):
-    return 'Environment_set(environment, Runtime_symbol(runtime, "{}"), {});'.format(
+    return 'Environment_set(environment, SYMBOL_LIST[{}] /* symbol: {} */, {});'.format(
+        c_assignment_statement.target_symbol_list_index,
         c_assignment_statement.target,
         generate_expression(c_assignment_statement.expression),
     )
@@ -87,10 +90,10 @@ def generate_statement(statement):
 def generate(c_program):
     template = ENV.get_template('program.c')
     return template.render(
-        MAX_SYMBOL_LENGTH=parsing.MAX_SYMBOL_LENGTH,
         builtins=list(sorted(c_program.builtins)),
         statements=[generate_statement(statement) for statement in c_program.statements],
         standard_libraries=list(sorted(c_program.standard_libraries)),
+        symbol_list=c_program.symbol_list,
     )
 
 if __name__ == '__main__':
index 84e08de..290eef9 100644 (file)
@@ -1,8 +1,5 @@
 import collections
 
-# TODO Check max symbol length in assignments, function calls, and symbol expressions
-MAX_SYMBOL_LENGTH = 16
-
 def _or_parser(*parsers):
     def result_parser(index, tokens):
         failure = (False, index, None)
index c20fd45..0f79b14 100644 (file)
@@ -25,13 +25,10 @@ struct String
   char* characters;
 };
 
-#define MAX_SYMBOL_LENGTH {{ MAX_SYMBOL_LENGTH }}
-struct Symbol;
-typedef struct Symbol Symbol;
-struct Symbol
-{
-  size_t length;
-  char name[MAX_SYMBOL_LENGTH];
+const char * const SYMBOL_LIST[] = {
+{% for symbol in symbol_list %}
+  "{{ symbol }}",
+{% endfor %}
 };
 
 enum Type
@@ -56,7 +53,7 @@ struct EnvironmentNode;
 typedef struct EnvironmentNode EnvironmentNode;
 struct EnvironmentNode
 {
-  Symbol* key;
+  const char* key;
   Object value;
   EnvironmentNode* next;
 };
@@ -81,16 +78,15 @@ void Environment_destruct(Environment* self)
   EnvironmentNode* next;
   for(EnvironmentNode* node = self->root; node != NULL; node = next)
   {
-    // We don't need to destruct the keys, because those will be destructed at the end when the Runtime is destructed
     // We don't need to destruct the permanent strings, because those will be destructed at the end when the Runtime is destructed
-    // The above two comments represent all heap-allocated objects currently, so we don't need to destruct Objects (yet)
+    // The above comment represents all heap-allocated objects currently, so we don't need to destruct Objects (yet)
     next = node->next;
     free(node);
   }
 }
 
 // This need not be thread safe because environments exist on one thread only
-void Environment_set(Environment* self, Symbol* key, Object value)
+void Environment_set(Environment* self, const char* const key, Object value)
 {
   EnvironmentNode* node = malloc(sizeof(EnvironmentNode));
   node->key = key;
@@ -99,11 +95,11 @@ void Environment_set(Environment* self, Symbol* key, Object value)
   self->root = node;
 }
 
-Object Environment_get(Environment* self, Symbol* symbol)
+Object Environment_get(Environment* self, const char* const symbol)
 {
   for(EnvironmentNode* node = self->root; node != NULL; node = node->next)
   {
-    // We can compare pointers because pointers are unique within Runtime->symbols
+    // We can compare pointers because pointers are unique in the SYMBOLS_LIST
     if(node->key == symbol)
     {
       return node->value;
@@ -115,15 +111,11 @@ Object Environment_get(Environment* self, Symbol* symbol)
 }
 
 
-// TODO Allocate all symbols and strings as static constants so we can remove the level of indirection
 struct Runtime
 {
   size_t permanentStringsLength;
   size_t permanentStringsAllocated;
   String** permanentStrings;
-  size_t symbolsLength;
-  size_t symbolsAllocated;
-  Symbol** symbols;
 };
 
 Runtime* Runtime_construct()
@@ -132,9 +124,6 @@ Runtime* Runtime_construct()
   result->permanentStringsLength = 0;
   result->permanentStringsAllocated = 0;
   result->permanentStrings = NULL;
-  result->symbolsLength = 0;
-  result->symbolsAllocated =0;
-  result->symbols = NULL;
   return result;
 }
 
@@ -145,13 +134,7 @@ void Runtime_destruct(Runtime* self)
     free(self->permanentStrings[i]);
   }
 
-  for(size_t i = 0; i < self->symbolsLength; i++)
-  {
-    free(self->symbols[i]);
-  }
-
   free(self->permanentStrings);
-  free(self->symbols);
   free(self);
 }
 
@@ -181,49 +164,6 @@ void Runtime_addPermanentString(Runtime* self, String* string)
   self->permanentStringsLength++;
 }
 
-// TODO Optimize this by sorting the symbols
-// TODO Make this function thread safe
-Symbol* Runtime_symbol(Runtime* self, const char* name)
-{
-  assert(strlen(name) <= MAX_SYMBOL_LENGTH);
-
-  for(size_t i = 0; i < self->symbolsLength; i++)
-  {
-    if(strcmp(self->symbols[i]->name, name) == 0)
-    {
-      return self->symbols[i];
-    }
-  }
-
-  if(self->symbolsLength == self->symbolsAllocated)
-  {
-    if(self->symbolsAllocated == 0)
-    {
-      self->symbolsAllocated = 8;
-    }
-    else
-    {
-      self->symbolsAllocated = self->symbolsAllocated * 2;
-    }
-
-    self->symbols = realloc(
-      self->symbols,
-      sizeof(Symbol*) * self->symbolsAllocated
-    );
-
-    // TODO Handle realloc returning NULL
-  }
-
-  Symbol* result = malloc(sizeof(Symbol));
-  result->length = strlen(name);
-  strcpy(result->name, name);
-
-  self->symbols[self->symbolsLength] = result;
-  self->symbolsLength++;
-
-  return result;
-}
-
 Object integerLiteral(int32_t literal)
 {
   Object result;
index 0260696..e3658bc 100644 (file)
@@ -19,7 +19,8 @@ CStringLiteral = collections.namedtuple(
 CSymbolExpression = collections.namedtuple(
     'CSymbolExpression',
     [
-        'value',
+        'symbol',
+        'symbol_list_index',
     ],
 )
 
@@ -82,6 +83,7 @@ CAssignmentStatement = collections.namedtuple(
     'CAssignmentStatement',
     [
         'target',
+        'target_symbol_list_index',
         'expression',
     ],
 )
@@ -92,6 +94,7 @@ CProgram = collections.namedtuple(
         'builtins',
         'statements',
         'standard_libraries',
+        'symbol_list',
     ],
 )
 
@@ -100,17 +103,25 @@ BUILTINS = {
     'print':    ['stdio.h'],
 }
 
-def transform_expression(builtin_dependencies, expression):
+def transform_expression(builtin_dependencies, symbol_list, expression):
     if isinstance(expression, parsing.FurNegationExpression):
-        return transform_negation_expression(builtin_dependencies, expression)
+        return transform_negation_expression(builtin_dependencies, symbol_list, expression)
 
     if isinstance(expression, parsing.FurFunctionCallExpression):
-        return transform_function_call_expression(builtin_dependencies, expression)
+        return transform_function_call_expression(builtin_dependencies, symbol_list, expression)
+
+    if isinstance(expression, parsing.FurSymbolExpression):
+        if expression.value not in symbol_list:
+            symbol_list.append(expression.value)
+
+        return CSymbolExpression(
+            symbol=expression.value,
+            symbol_list_index=symbol_list.index(expression.value),
+        )
 
     LITERAL_TYPE_MAPPING = {
         parsing.FurIntegerLiteralExpression: CIntegerLiteral,
         parsing.FurStringLiteralExpression: CStringLiteral,
-        parsing.FurSymbolExpression: CSymbolExpression,
     }
 
     if type(expression) in LITERAL_TYPE_MAPPING:
@@ -125,42 +136,56 @@ def transform_expression(builtin_dependencies, expression):
     }
 
     return INFIX_TYPE_MAPPING[type(expression)](
-        left=transform_expression(builtin_dependencies, expression.left),
-        right=transform_expression(builtin_dependencies, expression.right),
+        left=transform_expression(builtin_dependencies, symbol_list, expression.left),
+        right=transform_expression(builtin_dependencies, symbol_list, expression.right),
     )
 
-def transform_assignment_statement(builtin_dependencies, assignment_statement):
+def transform_assignment_statement(builtin_dependencies, symbol_list, assignment_statement):
     # TODO Check that target is not a builtin
+    if assignment_statement.target not in symbol_list:
+        symbol_list.append(assignment_statement.target)
+
     return CAssignmentStatement(
         target=assignment_statement.target,
-        expression=transform_expression(builtin_dependencies, assignment_statement.expression),
+        target_symbol_list_index=symbol_list.index(assignment_statement.target),
+        expression=transform_expression(
+            builtin_dependencies,
+            symbol_list,
+            assignment_statement.expression,
+        ),
     )
 
-def transform_negation_expression(builtin_dependencies, negation_expression):
-    return CNegationExpression(value=transform_expression(builtin_dependencies, negation_expression.value))
+def transform_negation_expression(builtin_dependencies, symbol_list, negation_expression):
+    return CNegationExpression(
+        value=transform_expression(builtin_dependencies, symbol_list, negation_expression.value),
+    )
 
-def transform_function_call_expression(builtin_dependencies, function_call):
+def transform_function_call_expression(builtin_dependencies, symbol_list, function_call):
     if function_call.function.value in BUILTINS.keys():
         builtin_dependencies.add(function_call.function.value)
 
         return CFunctionCallExpression(
             name='builtin$' + function_call.function.value,
-            arguments=tuple(transform_expression(builtin_dependencies, arg) for arg in function_call.arguments),
+            arguments=tuple(
+                transform_expression(builtin_dependencies, symbol_list, arg)
+                for arg in function_call.arguments
+            ),
         )
 
     raise Exception()
 
-def transform_statement(builtin_dependencies, statement):
+def transform_statement(builtin_dependencies, symbol_list, statement):
     return {
         parsing.FurAssignmentStatement: transform_assignment_statement,
         parsing.FurFunctionCallExpression: transform_function_call_expression,
-    }[type(statement)](builtin_dependencies, statement)
+    }[type(statement)](builtin_dependencies, symbol_list, statement)
 
 def transform(program):
     builtins = set()
+    symbol_list = []
 
     c_statements = [
-        transform_statement(builtins, statement) for statement in program.statement_list
+        transform_statement(builtins, symbol_list, statement) for statement in program.statement_list
     ]
 
     standard_libraries = set()
@@ -172,6 +197,7 @@ def transform(program):
         builtins=builtins,
         statements=c_statements,
         standard_libraries=standard_libraries,
+        symbol_list=symbol_list,
     )