X-Git-Url: https://code.kerkeslager.com/?p=sandbox;a=blobdiff_plain;f=stutter.py;h=c2139e56085b39e876027d67e22003c3b301e641;hp=3f1409d1191bed39b39cb007327f067429bb1427;hb=23f27cf41ef383c4e1ef006393fe7bf94bd2d15c;hpb=b3971cab8767d0cfc68ddeabc25a7f8b34a2f44c diff --git a/stutter.py b/stutter.py index 3f1409d..c2139e5 100644 --- a/stutter.py +++ b/stutter.py @@ -6,6 +6,7 @@ To run this file: python stutter.py stutter_code.stt > c_code.c ''' +import itertools import re import string @@ -16,12 +17,70 @@ def is_integer(s_expression): and not s_expression is True \ and not s_expression is False +ESCAPE_CHARACTERS = { + '\\' : '\\', + 'n' : '\n', +} + +def undelimit_string(s): + assert len(s) >= 2 + + delimiter = s[0] + assert delimiter == '"' # This is temporary, " is currently the only delimiter + assert s[-1] == delimiter + + escape_characters = dict(ESCAPE_CHARACTERS) + escape_characters[delimiter] = delimiter + + s = s[1:-1] + + index = 0 + result = '' + + while index < len(s): + ch = s[index] + + if ch == '\\': + index += 1 + + # TODO Handle when it's not a valid escape character + ch = escape_characters[s[index]] + + index += 1 + result += ch + + return result + +TAB_WIDTH = 4 + +def indent(string): + assert isinstance(string, str) + + def indent_line(line): + line = line.rstrip() + + if line == '': + return line + + return ' ' * TAB_WIDTH + line + + return '\n'.join(indent_line(line) for line in string.splitlines()) + # String to s-expressions +class Symbol(object): + def __init__(self, string): + self.string = string + + def __eq__(self, other): + return self.string == other.string + TOKEN = re.compile(r'\s*({})'.format('|'.join('(?P<{}>{})'.format(*token) for token in [ ('open_parenthese', r'\('), ('close_parenthese', r'\)'), + ('identifier', r'[a-z\-]+'), # We can expand this as needed ('integer_literal', r'\d+'), + ('string_literal', r'"(\\"|[^"])*"'), ('unexpected_character', r'.'), ]))) @@ -41,9 +100,15 @@ def parse_all(source): stack[-1].append(tuple(items)) items = stack.pop() + elif token.group('identifier'): + items.append(Symbol(token.group('identifier'))) + elif token.group('integer_literal'): items.append(int(token.group('integer_literal'))) + elif token.group('string_literal'): + items.append(undelimit_string(token.group('string_literal'))) + elif token.group('unexpected_character'): raise Exception('Unexpected character {}'.format( token.group('unexpected_character'), @@ -52,7 +117,6 @@ def parse_all(source): else: raise Exception() - if len(stack) > 0: raise Exception('Parenthese opened but not closed') @@ -69,9 +133,9 @@ class CPointerType(CType): self.pointer_to = pointer_to class CArgumentDeclaration(object): - def __init__(self, type, name): - assert isinstance(type, CType) - self.type = type + def __init__(self, _type, name): + assert isinstance(_type, CType) + self._type = _type self.name = name class CExpression(object): @@ -79,13 +143,35 @@ class CExpression(object): class CIntegerLiteralExpression(CExpression): def __init__(self, integer): - assert isinstance(integer, int) + assert is_integer(integer) + self.integer = integer - # Booleans in Python are integers but we don't want them - assert not integer is True - assert not integer is False + def __eq__(self, other): + assert isinstance(other, CIntegerLiteralExpression) + return self.integer == other.integer - self.integer = integer +class CStringLiteralExpression(CExpression): + def __init__(self, string): + assert isinstance(string, str) + self.string = string + + def __eq__(self, other): + assert isinstance(other, CStringLiteralExpression) + return self.string == other.string + +class CVariableExpression(CExpression): + def __init__(self, name): + assert isinstance(name, str) + self.name = name + + def __eq__(self, other): + assert isinstance(other, CVariableExpression) + return self.name == other.name + +class CReferenceExpression(CExpression): + def __init__(self, referee): + assert isinstance(referee, CVariableExpression) + self.referee = referee class CFunctionCallExpression(CExpression): def __init__(self, name, arguments): @@ -93,6 +179,10 @@ class CFunctionCallExpression(CExpression): self.name = name self.arguments = arguments + def __eq__(self, other): + assert isinstance(other, CFunctionCallExpression) + return self.name == other.name and self.arguments == other.arguments + class CStatement(object): pass @@ -104,13 +194,28 @@ class CReturnStatement(CStatement): def __init__(self, expression): self.expression = expression +class CDefinitionStatement(CStatement): + def __init__(self, _type, name, definition): + assert isinstance(_type, CType) + assert isinstance(name, str) + assert isinstance(definition, CExpression) + + self._type = _type + self.name = name + self.definition = definition + +class CFunctionBody(object): + def __init__(self, statements): + statements = list(statements) + assert all(isinstance(s, CStatement) for s in statements) + self.statements = statements + class CFunctionDeclaration(object): def __init__(self, return_type, name, argument_declaration_list, body): assert isinstance(return_type, CType) assert isinstance(argument_declaration_list, list) assert all(isinstance(ad, CArgumentDeclaration) for ad in argument_declaration_list) - assert isinstance(body, list) - assert all(isinstance(s, CStatement) for s in body) + assert isinstance(body, CFunctionBody) self.return_type = return_type self.name = name @@ -119,42 +224,90 @@ class CFunctionDeclaration(object): # BEGIN S-expression to C AST layer -def evaluate_to_c(s_expression): +def quote_to_c(s_expression): if is_integer(s_expression): - return CIntegerLiteralExpression(s_expression) + return CFunctionCallExpression( + 'makeObjectPointerFromInteger', + [CIntegerLiteralExpression(s_expression)], + ) - raise Exception('Unable to evaluate expression {} to C'.format(s_expression)) + if isinstance(s_expression, str): + return CFunctionCallExpression( + 'makeObjectPointerFromString', + [CStringLiteralExpression(s_expression)], + ) -def evaluate_all_to_c(s_expressions): - c_expressions = list(map(evaluate_to_c, s_expressions)) - body = list(map(CExpressionStatement, c_expressions[:-1])) + [CReturnStatement(c_expressions[-1])] + if isinstance(s_expression, Symbol): + return CFunctionCallExpression( + 'getSymbol', + [CStringLiteralExpression(s_expression.string)], + ) + + raise Exception('Not implemented for type {}'.format(type(s_expression))) + +def evaluate_application_arguments_to_c( + arguments, + quote_to_c = quote_to_c, + ): - return CFunctionDeclaration( - CType('int'), - 'main', - [ - CArgumentDeclaration(CType('int'), 'argc'), - CArgumentDeclaration(CPointerType(CPointerType(CType('char'))), 'argv'), - ], - body, + if len(arguments) == 0: + return CVariableExpression('NULL') + + return CFunctionCallExpression( + 'c_cons', + ( + quote_to_c(arguments[0]), + evaluate_application_arguments_to_c(arguments[1:]), + ), + ) + +def evaluate_application_to_c( + s_expression, + evaluate_application_arguments_to_c = evaluate_application_arguments_to_c, + ): + + assert isinstance(s_expression, tuple) + if isinstance(s_expression[0], Symbol): + return CFunctionCallExpression( + s_expression[0].string, + ( + CReferenceExpression(CVariableExpression('env')), + evaluate_application_arguments_to_c(s_expression[1:]), + ), ) -# BEGIN C AST to C source layer + raise Exception('Not implemented') -TAB_WIDTH = 2 +def evaluate_to_c( + s_expression, + evaluate_application_to_c = evaluate_application_to_c, + ): -def indent(string): - assert isinstance(string, str) + if isinstance(s_expression, tuple): + return evaluate_application_to_c(s_expression) - def indent_line(line): - line = line.rstrip() + if is_integer(s_expression): + return CIntegerLiteralExpression(s_expression) - if line == '': - return line + if isinstance(s_expression, str): + return CStringLiteralExpression(s_expression) - return ' ' * TAB_WIDTH + line + raise Exception('Unable to evaluate expression {} to C'.format(s_expression)) - return '\n'.join(indent_line(line) for line in string.splitlines()) +def evaluate_all_to_c(s_expressions): + c_expressions = list(map(evaluate_to_c, s_expressions)) + + return CFunctionBody(itertools.chain( + [CDefinitionStatement( + CPointerType(CType('Environment')), + 'env', + CVariableExpression('NULL'), + )], + map(CExpressionStatement, c_expressions[:-1]), + [CReturnStatement(c_expressions[-1])], + )) + +# BEGIN C AST to C source layer def generate_pointer_type(pointer_type): assert isinstance(pointer_type, CPointerType) @@ -172,7 +325,10 @@ def generate_type( def generate_argument_declaration(argument_declaration): assert isinstance(argument_declaration, CArgumentDeclaration) - return '{} {}'.format(generate_type(argument_declaration.type), argument_declaration.name) + return '{} {}'.format( + generate_type(argument_declaration._type), + argument_declaration.name, + ) def generate_argument_declaration_list(argument_declarations): return ', '.join(generate_argument_declaration(ad) for ad in argument_declarations) @@ -181,6 +337,41 @@ def generate_integer_literal_expression(expression): assert isinstance(expression, CIntegerLiteralExpression) return str(expression.integer) +C_ESCAPE_SEQUENCES = { + # Taken from https://en.wikipedia.org/wiki/Escape_sequences_in_C + '\x07' : r'\a', + '\x08' : r'\b', + '\x0c' : r'\f', + '\x0a' : r'\n', + '\x0d' : r'\r', + '\x09' : r'\t', + '\x0b' : r'\v', + '\x5c' : r'\\', + '\x27' : r"\'", + '\x22' : r'\"', + '\x3f' : r'\?', +} + +def generate_string_literal_expression(expression): + assert isinstance(expression, CStringLiteralExpression) + + result = '"' + + for ch in expression.string: + result += C_ESCAPE_SEQUENCES.get(ch, ch) + + result += '"' + + return result + +def generate_variable_expression(expression): + assert isinstance(expression, CVariableExpression) + return expression.name + +def generate_reference_expression(expression): + assert isinstance(expression, CReferenceExpression) + return '&{}'.format(generate_variable_expression(expression.referee)) + def generate_function_call_expression(expression): assert isinstance(expression, CFunctionCallExpression) return '{}({})'.format( @@ -191,12 +382,24 @@ def generate_function_call_expression(expression): def generate_expression( expression, generate_integer_literal_expression = generate_integer_literal_expression, + generate_string_literal_expression = generate_string_literal_expression, + generate_variable_expression = generate_variable_expression, + generate_reference_expression = generate_reference_expression, generate_function_call_expression = generate_function_call_expression, ): if isinstance(expression, CIntegerLiteralExpression): return generate_integer_literal_expression(expression) + if isinstance(expression, CStringLiteralExpression): + return generate_string_literal_expression(expression) + + if isinstance(expression, CVariableExpression): + return generate_variable_expression(expression) + + if isinstance(expression, CReferenceExpression): + return generate_reference_expression(expression) + if isinstance(expression, CFunctionCallExpression): return generate_function_call_expression(expression) @@ -208,10 +411,18 @@ def generate_expression_statement(statement): def generate_return_statement(statement): return 'return {};'.format(generate_expression(statement.expression)) +def generate_definition_statement(statement): + return '{} {} = {};'.format( + generate_type(statement._type), + statement.name, + generate_expression(statement.definition), + ) + def generate_statement( statement, generate_expression_statement = generate_expression_statement, - generate_return_statement = generate_return_statement): + generate_return_statement = generate_return_statement, + generate_definition_statement = generate_definition_statement): if isinstance(statement, CExpressionStatement): return generate_expression_statement(statement) @@ -219,11 +430,14 @@ def generate_statement( if isinstance(statement, CReturnStatement): return generate_return_statement(statement) - raise Exception('Handling for statements of type {} not implemented'.format(type(statement.type))) + if isinstance(statement, CDefinitionStatement): + return generate_definition_statement(statement) + + raise Exception('Handling for statements of type {} not implemented'.format(type(statement))) -def generate_statement_list(statements): - assert all(isinstance(s, CStatement) for s in statements) - return '\n'.join(generate_statement(s) for s in statements) +def generate_function_body(function_body): + assert isinstance(function_body, CFunctionBody) + return '\n'.join(generate_statement(s) for s in function_body.statements) FUNCTION_DEFINITION_TEMPLATE = string.Template( ''' @@ -239,7 +453,286 @@ def generate_function_declaration(function_declaration): return_type = generate_type(function_declaration.return_type), name = function_declaration.name, argument_declaration_list = generate_argument_declaration_list(function_declaration.argument_declaration_list), - body = indent(generate_statement_list(function_declaration.body)), + body = indent(generate_function_body(function_declaration.body)), + ) + +PROGRAM_TEMPLATE = string.Template( +''' +#include +#include +#include +#include +#include + +struct Object; +typedef struct Object Object; + +enum Type +{ + CELL, + STRING, + SYMBOL +}; +typedef enum Type Type; + +#define MAX_TYPE_STRING_LENGTH 7 + +void typeToString(Type type, char* target) +{ + switch(type) + { + case CELL: + snprintf(target, MAX_TYPE_STRING_LENGTH, "CELL"); + return; + + case STRING: + snprintf(target, MAX_TYPE_STRING_LENGTH, "STRING"); + return; + + case SYMBOL: + snprintf(target, MAX_TYPE_STRING_LENGTH, "%s", "SYMBOL"); + return; + + default: + fprintf(stderr, "ERROR: Unknown type"); + exit(1); + } +} + +struct Cell; +typedef struct Cell Cell; +struct Cell +{ + Object* left; + Object* right; +}; + +struct Environment; +typedef struct Environment Environment; +struct Environment +{ + char* key; + Object* value; + Environment* next; +}; + +Environment makeEnvironment(char* key, Object* value, Environment* next) +{ + Environment result; + result.key = key; + result.value = value; + result.next = next; + return result; +} + +Environment* makeEnvironmentPointerFromEnvironment(Environment env) +{ + Environment* result = malloc(sizeof(Environment)); + *result = env; + return result; +} + +Environment* makeEnvironmentPointer(char* key, Object* value, Environment* next) +{ + return makeEnvironmentPointerFromEnvironment(makeEnvironment(key, value, next)); +} + +union Instance +{ + Cell cell; + char* string; + char* symbol; +}; +typedef union Instance Instance; + +Instance makeInstanceFromCell(Cell cell) +{ + Instance result; + result.cell = cell; + return result; +} + +Instance makeInstanceFromString(char* string) +{ + Instance result; + result.string = string; + return result; +} + +Instance makeInstanceFromSymbol(char* symbol) +{ + Instance result; + result.symbol = symbol; + return result; +} + +struct Object +{ + Type type; + Instance instance; +}; + +Object makeObject(Type t, Instance i) +{ + Object result; + result.type = t; + result.instance = i; + return result; +} + +Object makeObjectFromCell(Cell cell) +{ + return makeObject(CELL, makeInstanceFromCell(cell)); +} + +Object makeObjectFromString(char* string) +{ + return makeObject(STRING, makeInstanceFromString(string)); +} + +Object makeObjectFromSymbol(char* symbol) +{ + return makeObject(SYMBOL, makeInstanceFromSymbol(symbol)); +} + +Object* makeObjectPointerFromObject(Object o) +{ + Object* result = malloc(sizeof(Object)); + *result = o; + return result; +} + +Object* makeObjectPointerFromCell(Cell cell) +{ + return makeObjectPointerFromObject(makeObjectFromCell(cell)); +} + +Object* makeObjectPointerFromString(char* string) +{ + return makeObjectPointerFromObject(makeObjectFromString(string)); +} + +Object* makeObjectPointerFromSymbol(char* symbol) +{ + return makeObjectPointerFromObject(makeObjectFromSymbol(symbol)); +} + +Object* getSymbol(char* symbol) +{ + // This will not always be how this is implemented + return makeObjectPointerFromSymbol(symbol); +} + +Cell makeCell(Object* left, Object* right) +{ + Cell result; + result.left = left; + result.right = right; + return result; +} + +Object* c_cons(Object* left, Object* right) +{ + Cell cell = makeCell(left, right); + return makeObjectPointerFromCell(cell); +} + +void c_print(Object* stutter_string) +{ + if(stutter_string->type != STRING) + { + char typeName[MAX_TYPE_STRING_LENGTH]; + typeToString(stutter_string->type, typeName); + fprintf(stderr, "ERROR: Expected type STRING, got type %s.", typeName); + exit(1); + } + + char* c_string = stutter_string->instance.string; + printf("%s", c_string); +} + +bool c_symbol_equal(char* left, char* right) +{ + return strcmp(left, right) == 0; +} + +Object* c_evaluate_symbol(Environment* env, Object* s) +{ + if(env == NULL) + { + fprintf(stderr, "ERROR: symbol %s not found.", s->instance.symbol); + exit(1); + } + + if(c_symbol_equal(env->key, s->instance.symbol)) + { + return env->value; + } + + return c_evaluate_symbol(env->next, s); +} + +Object* c_evaluate(Environment** env, Object* o) +{ + switch(o->type) + { + case STRING: + return o; + + case SYMBOL: + return c_evaluate_symbol(*env, o); + + default: + break; + } + + char typeName[MAX_TYPE_STRING_LENGTH]; + typeToString(o->type, typeName); + fprintf(stderr, "ERROR: Could not evaluate type %s.", typeName); + exit(1); +} + +int countArgs(Object* args) +{ + if(args == NULL) return 0; + + assert(args->type == CELL); + return 1 + countArgs(args->instance.cell.right); +} + +Object* getArg(int index, Object* args) +{ + if(index == 0) return args->instance.cell.left; + + return getArg(index - 1, args->instance.cell.right); +} + +void print(Environment** parent, Object* args) +{ + assert(countArgs(args) == 1); + Object* stutter_string = c_evaluate(parent, getArg(0, args)); + c_print(stutter_string); +} + +void define(Environment** parent, Object* args) +{ + assert(countArgs(args) == 2); + Object* name = getArg(0, args); + Object* value = c_evaluate(parent, getArg(1, args)); + + assert(name->type == SYMBOL); + *parent = makeEnvironmentPointer(name->instance.symbol, value, *parent); +} + +int main(int argc, char** argv) +{ +$body +} +'''.strip()) + +def generate_program(body): + return PROGRAM_TEMPLATE.substitute( + body = body, ) if __name__ == '__main__': @@ -249,5 +742,8 @@ if __name__ == '__main__': with open(source_file_name, 'r') as source_file: source = source_file.read() - result = generate_function_declaration(evaluate_all_to_c(parse_all(source))) + result = generate_program( + indent(generate_function_body(evaluate_all_to_c(parse_all(source)))), + ) + print(result)