Added a description
[sandbox] / stutter.py
1 #!/usr/bin/env python
2
3 '''
4 To run this file:
5
6     python stutter.py stutter_code.stt > c_code.c
7 '''
8
9 import itertools
10 import re
11 import string
12
13 # Utility functions
14
15 def is_integer(s_expression):
16     return isinstance(s_expression, int) \
17         and not s_expression is True \
18         and not s_expression is False
19
20 ESCAPE_CHARACTERS = {
21     '\\'    : '\\',
22     'n'     : '\n',
23 }
24
25 def undelimit_string(s):
26     assert len(s) >= 2
27
28     delimiter = s[0]
29     assert delimiter == '"' # This is temporary, " is currently the only delimiter
30     assert s[-1] == delimiter
31
32     escape_characters = dict(ESCAPE_CHARACTERS)
33     escape_characters[delimiter] = delimiter
34
35     s = s[1:-1]
36
37     index = 0
38     result = ''
39
40     while index < len(s):
41         ch = s[index]
42
43         if ch == '\\':
44             index += 1
45
46             # TODO Handle when it's not a valid escape character
47             ch = escape_characters[s[index]]
48             
49         index += 1
50         result += ch
51
52     return result
53
54 TAB_WIDTH = 4
55
56 def indent(string):
57     assert isinstance(string, str)
58
59     def indent_line(line):
60         line = line.rstrip()
61
62         if line == '':
63             return line
64
65         return ' ' * TAB_WIDTH + line
66
67     return '\n'.join(indent_line(line) for line in string.splitlines())
68
69 # String to s-expressions
70
71 class Symbol(object):
72     def __init__(self, string):
73         self.string = string
74
75     def __eq__(self, other):
76         return self.string == other.string
77
78 TOKEN = re.compile(r'\s*({})'.format('|'.join('(?P<{}>{})'.format(*token) for token in [
79     ('open_parenthese',         r'\('),
80     ('close_parenthese',        r'\)'),
81     ('identifier',              r'[a-z\-]+'), # We can expand this as needed
82     ('integer_literal',         r'\d+'),
83     ('string_literal',          r'"(\\"|[^"])*"'),
84     ('unexpected_character',    r'.'),
85 ])))
86
87 def parse_all(source):
88     stack = []
89     items = []
90
91     for token in TOKEN.finditer(source):
92         if token.group('open_parenthese'):
93             stack.append(items)
94             items = []
95
96         elif token.group('close_parenthese'):
97             if len(stack) == 0:
98                 raise Exception('Parenthese closed but not opened')
99
100             stack[-1].append(tuple(items))
101             items = stack.pop()
102
103         elif token.group('identifier'):
104             items.append(Symbol(token.group('identifier')))
105
106         elif token.group('integer_literal'):
107             items.append(int(token.group('integer_literal')))
108
109         elif token.group('string_literal'):
110             items.append(undelimit_string(token.group('string_literal')))
111
112         elif token.group('unexpected_character'):
113             raise Exception('Unexpected character {}'.format(
114                 token.group('unexpected_character'),
115             ))
116
117         else:
118             raise Exception()
119
120     if len(stack) > 0:
121         raise Exception('Parenthese opened but not closed')
122
123     return items
124
125 # C AST Objects
126
127 class CType(object):
128     def __init__(self, name):
129         self.name = name
130
131 class CPointerType(CType):
132     def __init__(self, pointer_to):
133         self.pointer_to = pointer_to
134
135 class CArgumentDeclaration(object):
136     def __init__(self, _type, name):
137         assert isinstance(_type, CType)
138         self._type = _type
139         self.name = name
140
141 class CExpression(object):
142     pass
143
144 class CIntegerLiteralExpression(CExpression):
145     def __init__(self, integer):
146         assert is_integer(integer)
147         self.integer = integer
148
149     def __eq__(self, other):
150         assert isinstance(other, CIntegerLiteralExpression)
151         return self.integer == other.integer
152
153 class CStringLiteralExpression(CExpression):
154     def __init__(self, string):
155         assert isinstance(string, str)
156         self.string = string
157
158     def __eq__(self, other):
159         assert isinstance(other, CStringLiteralExpression)
160         return self.string == other.string
161
162 class CVariableExpression(CExpression):
163     def __init__(self, name):
164         assert isinstance(name, str)
165         self.name = name
166
167     def __eq__(self, other):
168         assert isinstance(other, CVariableExpression)
169         return self.name == other.name
170
171 class CReferenceExpression(CExpression):
172     def __init__(self, referee):
173         assert isinstance(referee, CVariableExpression)
174         self.referee = referee
175
176 class CFunctionCallExpression(CExpression):
177     def __init__(self, name, arguments):
178         assert all(isinstance(argument, CExpression) for argument in arguments)
179         self.name = name
180         self.arguments = arguments
181
182     def __eq__(self, other):
183         assert isinstance(other, CFunctionCallExpression)
184         return self.name == other.name and self.arguments == other.arguments
185
186 class CStatement(object):
187     pass
188
189 class CExpressionStatement(CStatement):
190     def __init__(self, expression):
191         self.expression = expression
192
193 class CReturnStatement(CStatement):
194     def __init__(self, expression):
195         self.expression = expression
196
197 class CDefinitionStatement(CStatement):
198     def __init__(self, _type, name, definition):
199         assert isinstance(_type, CType)
200         assert isinstance(name, str)
201         assert isinstance(definition, CExpression)
202
203         self._type = _type
204         self.name = name
205         self.definition = definition
206
207 class CFunctionBody(object):
208     def __init__(self, statements):
209         statements = list(statements)
210         assert all(isinstance(s, CStatement) for s in statements)
211         self.statements = statements
212
213 class CFunctionDeclaration(object):
214     def __init__(self, return_type, name, argument_declaration_list, body):
215         assert isinstance(return_type, CType)
216         assert isinstance(argument_declaration_list, list)
217         assert all(isinstance(ad, CArgumentDeclaration) for ad in argument_declaration_list)
218         assert isinstance(body, CFunctionBody)
219
220         self.return_type = return_type
221         self.name = name
222         self.argument_declaration_list = argument_declaration_list
223         self.body = body
224
225 # BEGIN S-expression to C AST layer
226
227 def quote_to_c(s_expression):
228     if is_integer(s_expression):
229         return CFunctionCallExpression(
230             'makeObjectPointerFromInteger',
231             [CIntegerLiteralExpression(s_expression)],
232         )
233
234     if isinstance(s_expression, str):
235         return CFunctionCallExpression(
236             'makeObjectPointerFromString',
237             [CStringLiteralExpression(s_expression)],
238         )
239
240     if isinstance(s_expression, Symbol):
241         return CFunctionCallExpression(
242             'getSymbol',
243             [CStringLiteralExpression(s_expression.string)],
244         )
245
246     raise Exception('Not implemented for type {}'.format(type(s_expression)))
247
248 def evaluate_application_arguments_to_c(
249         arguments,
250         quote_to_c = quote_to_c,
251     ):
252     
253     if len(arguments) == 0:
254         return CVariableExpression('NULL')
255
256     return CFunctionCallExpression(
257         'c_cons',
258         (
259             quote_to_c(arguments[0]),
260             evaluate_application_arguments_to_c(arguments[1:]),
261         ),
262     )
263
264 def evaluate_application_to_c(
265         s_expression,
266         evaluate_application_arguments_to_c = evaluate_application_arguments_to_c,
267     ):
268
269     assert isinstance(s_expression, tuple)
270     if isinstance(s_expression[0], Symbol):
271         return CFunctionCallExpression(
272             s_expression[0].string,
273             (
274                 CReferenceExpression(CVariableExpression('env')),
275                 evaluate_application_arguments_to_c(s_expression[1:]),
276             ),
277         )
278
279     raise Exception('Not implemented')
280
281 def evaluate_to_c(
282         s_expression,
283         evaluate_application_to_c = evaluate_application_to_c,
284     ):
285
286     if isinstance(s_expression, tuple):
287         return evaluate_application_to_c(s_expression)
288
289     if is_integer(s_expression):
290         return CIntegerLiteralExpression(s_expression)
291
292     if isinstance(s_expression, str):
293         return CStringLiteralExpression(s_expression)
294
295     raise Exception('Unable to evaluate expression {} to C'.format(s_expression))
296
297 def evaluate_all_to_c(s_expressions):
298     c_expressions = list(map(evaluate_to_c, s_expressions))
299
300     return CFunctionBody(itertools.chain(
301         [CDefinitionStatement(
302             CPointerType(CType('Environment')),
303             'env',
304             CVariableExpression('NULL'),
305         )],
306         map(CExpressionStatement, c_expressions[:-1]),
307         [CReturnStatement(c_expressions[-1])],
308     ))
309     
310 # BEGIN C AST to C source layer
311
312 def generate_pointer_type(pointer_type):
313     assert isinstance(pointer_type, CPointerType)
314     return '{}*'.format(generate_type(pointer_type.pointer_to))
315
316 def generate_type(
317         type,
318         generate_pointer_type = generate_pointer_type):
319     assert isinstance(type, CType)
320
321     if isinstance(type, CPointerType):
322         return generate_pointer_type(type)
323
324     return type.name
325
326 def generate_argument_declaration(argument_declaration):
327     assert isinstance(argument_declaration, CArgumentDeclaration)
328     return '{} {}'.format(
329         generate_type(argument_declaration._type),
330         argument_declaration.name,
331     )
332
333 def generate_argument_declaration_list(argument_declarations):
334     return ', '.join(generate_argument_declaration(ad) for ad in argument_declarations)
335
336 def generate_integer_literal_expression(expression):
337     assert isinstance(expression, CIntegerLiteralExpression)
338     return str(expression.integer)
339
340 C_ESCAPE_SEQUENCES = {
341     # Taken from https://en.wikipedia.org/wiki/Escape_sequences_in_C
342     '\x07'  : r'\a',
343     '\x08'  : r'\b',
344     '\x0c'  : r'\f',
345     '\x0a'  : r'\n',
346     '\x0d'  : r'\r',
347     '\x09'  : r'\t',
348     '\x0b'  : r'\v',
349     '\x5c'  : r'\\',
350     '\x27'  : r"\'",
351     '\x22'  : r'\"',
352     '\x3f'  : r'\?',
353 }
354
355 def generate_string_literal_expression(expression):
356     assert isinstance(expression, CStringLiteralExpression)
357
358     result = '"'
359
360     for ch in expression.string:
361         result += C_ESCAPE_SEQUENCES.get(ch, ch)
362
363     result += '"'
364
365     return result
366
367 def generate_variable_expression(expression):
368     assert isinstance(expression, CVariableExpression)
369     return expression.name
370
371 def generate_reference_expression(expression):
372     assert isinstance(expression, CReferenceExpression)
373     return '&{}'.format(generate_variable_expression(expression.referee))
374
375 def generate_function_call_expression(expression):
376     assert isinstance(expression, CFunctionCallExpression)
377     return '{}({})'.format(
378         expression.name,
379         ', '.join(generate_expression(e) for e in expression.arguments),
380     )
381
382 def generate_expression(
383         expression,
384         generate_integer_literal_expression = generate_integer_literal_expression,
385         generate_string_literal_expression = generate_string_literal_expression,
386         generate_variable_expression = generate_variable_expression,
387         generate_reference_expression = generate_reference_expression,
388         generate_function_call_expression = generate_function_call_expression,
389         ):
390
391     if isinstance(expression, CIntegerLiteralExpression):
392         return generate_integer_literal_expression(expression)
393
394     if isinstance(expression, CStringLiteralExpression):
395         return generate_string_literal_expression(expression)
396
397     if isinstance(expression, CVariableExpression):
398         return generate_variable_expression(expression)
399
400     if isinstance(expression, CReferenceExpression):
401         return generate_reference_expression(expression)
402
403     if isinstance(expression, CFunctionCallExpression):
404         return generate_function_call_expression(expression)
405
406     raise Exception('Expression type {} not implemented'.format(type(expression)))
407
408 def generate_expression_statement(statement):
409     return '{};'.format(generate_expression(statement.expression))
410
411 def generate_return_statement(statement):
412     return 'return {};'.format(generate_expression(statement.expression))
413
414 def generate_definition_statement(statement):
415     return '{} {} = {};'.format(
416         generate_type(statement._type),
417         statement.name,
418         generate_expression(statement.definition),
419     )
420
421 def generate_statement(
422         statement,
423         generate_expression_statement = generate_expression_statement,
424         generate_return_statement = generate_return_statement,
425         generate_definition_statement = generate_definition_statement):
426
427     if isinstance(statement, CExpressionStatement):
428         return generate_expression_statement(statement)
429
430     if isinstance(statement, CReturnStatement):
431         return generate_return_statement(statement)
432
433     if isinstance(statement, CDefinitionStatement):
434         return generate_definition_statement(statement)
435
436     raise Exception('Handling for statements of type {} not implemented'.format(type(statement)))
437
438 def generate_function_body(function_body):
439     assert isinstance(function_body, CFunctionBody)
440     return '\n'.join(generate_statement(s) for s in function_body.statements)
441
442 FUNCTION_DEFINITION_TEMPLATE = string.Template(
443 '''
444 $return_type $name($argument_declaration_list)
445 {
446 $body
447 }
448 '''.strip())
449
450 def generate_function_declaration(function_declaration):
451     assert isinstance(function_declaration, CFunctionDeclaration)
452     return FUNCTION_DEFINITION_TEMPLATE.substitute(
453         return_type = generate_type(function_declaration.return_type),
454         name = function_declaration.name,
455         argument_declaration_list = generate_argument_declaration_list(function_declaration.argument_declaration_list),
456         body = indent(generate_function_body(function_declaration.body)),
457     )
458
459 PROGRAM_TEMPLATE = string.Template(
460 '''
461 #include <assert.h>
462 #include <stdbool.h>
463 #include <stdio.h>
464 #include <stdlib.h>
465 #include <string.h>
466
467 struct Object;
468 typedef struct Object Object;
469
470 enum Type
471 {
472     CELL,
473     STRING,
474     SYMBOL
475 };
476 typedef enum Type Type;
477
478 #define MAX_TYPE_STRING_LENGTH 7
479
480 void typeToString(Type type, char* target)
481 {
482     switch(type)
483     {
484         case CELL:
485             snprintf(target, MAX_TYPE_STRING_LENGTH, "CELL");
486             return;
487
488         case STRING:
489             snprintf(target, MAX_TYPE_STRING_LENGTH, "STRING");
490             return;
491
492         case SYMBOL:
493             snprintf(target, MAX_TYPE_STRING_LENGTH, "%s", "SYMBOL");
494             return;
495
496         default:
497             fprintf(stderr, "ERROR: Unknown type");
498             exit(1);
499     }
500 }
501
502 struct Cell;
503 typedef struct Cell Cell;
504 struct Cell
505 {
506     Object* left;
507     Object* right;
508 };
509
510 struct Environment;
511 typedef struct Environment Environment;
512 struct Environment
513 {
514     char* key;
515     Object* value;
516     Environment* next;
517 };
518
519 Environment makeEnvironment(char* key, Object* value, Environment* next)
520 {
521     Environment result;
522     result.key = key;
523     result.value = value;
524     result.next = next;
525     return result;
526 }
527
528 Environment* makeEnvironmentPointerFromEnvironment(Environment env)
529 {
530     Environment* result = malloc(sizeof(Environment));
531     *result = env;
532     return result;
533 }
534
535 Environment* makeEnvironmentPointer(char* key, Object* value, Environment* next)
536 {
537     return makeEnvironmentPointerFromEnvironment(makeEnvironment(key, value, next));
538 }
539
540 union Instance
541 {
542     Cell cell;
543     char* string;
544     char* symbol;
545 };
546 typedef union Instance Instance;
547
548 Instance makeInstanceFromCell(Cell cell)
549 {
550     Instance result;
551     result.cell = cell;
552     return result;
553 }
554
555 Instance makeInstanceFromString(char* string)
556 {
557     Instance result;
558     result.string = string;
559     return result;
560 }
561
562 Instance makeInstanceFromSymbol(char* symbol)
563 {
564     Instance result;
565     result.symbol = symbol;
566     return result;
567 }
568
569 struct Object
570 {
571     Type type;
572     Instance instance;
573 };
574
575 Object makeObject(Type t, Instance i)
576 {
577     Object result;
578     result.type = t;
579     result.instance = i;
580     return result;
581 }
582
583 Object makeObjectFromCell(Cell cell)
584 {
585     return makeObject(CELL, makeInstanceFromCell(cell));
586 }
587
588 Object makeObjectFromString(char* string)
589 {
590     return makeObject(STRING, makeInstanceFromString(string));
591 }
592
593 Object makeObjectFromSymbol(char* symbol)
594 {
595     return makeObject(SYMBOL, makeInstanceFromSymbol(symbol));
596 }
597
598 Object* makeObjectPointerFromObject(Object o)
599 {
600     Object* result = malloc(sizeof(Object));
601     *result = o;
602     return result;
603 }
604
605 Object* makeObjectPointerFromCell(Cell cell)
606 {
607     return makeObjectPointerFromObject(makeObjectFromCell(cell));
608 }
609
610 Object* makeObjectPointerFromString(char* string)
611 {
612     return makeObjectPointerFromObject(makeObjectFromString(string));
613 }
614
615 Object* makeObjectPointerFromSymbol(char* symbol)
616 {
617     return makeObjectPointerFromObject(makeObjectFromSymbol(symbol));
618 }
619
620 Object* getSymbol(char* symbol)
621 {
622     // This will not always be how this is implemented
623     return makeObjectPointerFromSymbol(symbol);
624 }
625
626 Cell makeCell(Object* left, Object* right)
627 {
628     Cell result;
629     result.left = left;
630     result.right = right;
631     return result;
632 }
633
634 Object* c_cons(Object* left, Object* right)
635 {
636     Cell cell = makeCell(left, right);
637     return makeObjectPointerFromCell(cell);
638 }
639
640 void c_print(Object* stutter_string)
641 {
642     if(stutter_string->type != STRING)
643     {
644         char typeName[MAX_TYPE_STRING_LENGTH];
645         typeToString(stutter_string->type, typeName);
646         fprintf(stderr, "ERROR: Expected type STRING, got type %s.", typeName);
647         exit(1);
648     }
649
650     char* c_string = stutter_string->instance.string;
651     printf("%s", c_string);
652 }
653
654 bool c_symbol_equal(char* left, char* right)
655 {
656     return strcmp(left, right) == 0;
657 }
658
659 Object* c_evaluate_symbol(Environment* env, Object* s)
660 {
661     if(env == NULL)
662     {
663         fprintf(stderr, "ERROR: symbol %s not found.", s->instance.symbol);
664         exit(1);
665     }
666
667     if(c_symbol_equal(env->key, s->instance.symbol))
668     {
669         return env->value;
670     }
671
672     return c_evaluate_symbol(env->next, s);
673 }
674
675 Object* c_evaluate(Environment** env, Object* o)
676 {
677     switch(o->type)
678     {
679         case STRING:
680             return o;
681
682         case SYMBOL:
683             return c_evaluate_symbol(*env, o);
684
685         default:
686             break;
687     }
688
689     char typeName[MAX_TYPE_STRING_LENGTH];
690     typeToString(o->type, typeName);
691     fprintf(stderr, "ERROR: Could not evaluate type %s.", typeName);
692     exit(1);
693 }
694
695 int countArgs(Object* args)
696 {
697     if(args == NULL) return 0;
698
699     assert(args->type == CELL);
700     return 1 + countArgs(args->instance.cell.right);
701 }
702
703 Object* getArg(int index, Object* args)
704 {
705     if(index == 0) return args->instance.cell.left;
706
707     return getArg(index - 1, args->instance.cell.right);
708 }
709
710 void print(Environment** parent, Object* args)
711 {
712     assert(countArgs(args) == 1);
713     Object* stutter_string = c_evaluate(parent, getArg(0, args));
714     c_print(stutter_string);
715 }
716
717 void define(Environment** parent, Object* args)
718 {
719     assert(countArgs(args) == 2);
720     Object* name = getArg(0, args);
721     Object* value = c_evaluate(parent, getArg(1, args));
722
723     assert(name->type == SYMBOL);
724     *parent = makeEnvironmentPointer(name->instance.symbol, value, *parent);
725 }
726
727 int main(int argc, char** argv)
728 {
729 $body
730 }
731 '''.strip())
732
733 def generate_program(body):
734     return PROGRAM_TEMPLATE.substitute(
735         body = body,
736     )
737
738 if __name__ == '__main__':
739     import sys
740     source_file_name = sys.argv[1]
741
742     with open(source_file_name, 'r') as source_file:
743         source = source_file.read()
744
745     result = generate_program(
746         indent(generate_function_body(evaluate_all_to_c(parse_all(source)))),
747     )
748
749     print(result)