3e4dd2c026060d93ebe4a94f19618e332b4d5f68
[sandbox] / stutter.py
1 #!/usr/bin/env python
2
3 '''
4 To run this file:
5
6     python stutter.py stutter_code.stt > c_code.c
7 '''
8
9 import itertools
10 import re
11 import string
12
13 # Utility functions
14
15 def is_integer(s_expression):
16     return isinstance(s_expression, int) \
17         and not s_expression is True \
18         and not s_expression is False
19
20 ESCAPE_CHARACTERS = {
21     '\\'    : '\\',
22     'n'     : '\n',
23 }
24
25 def undelimit_string(s):
26     assert len(s) >= 2
27
28     delimiter = s[0]
29     assert delimiter == '"' # This is temporary, " is currently the only delimiter
30     assert s[-1] == delimiter
31
32     escape_characters = dict(ESCAPE_CHARACTERS)
33     escape_characters[delimiter] = delimiter
34
35     s = s[1:-1]
36
37     index = 0
38     result = ''
39
40     while index < len(s):
41         ch = s[index]
42
43         if ch == '\\':
44             index += 1
45
46             # TODO Handle when it's not a valid escape character
47             ch = escape_characters[s[index]]
48             
49         index += 1
50         result += ch
51
52     return result
53
54 TAB_WIDTH = 4
55
56 def indent(string):
57     assert isinstance(string, str)
58
59     def indent_line(line):
60         line = line.rstrip()
61
62         if line == '':
63             return line
64
65         return ' ' * TAB_WIDTH + line
66
67     return '\n'.join(indent_line(line) for line in string.splitlines())
68
69 # String to s-expressions
70
71 class Symbol(object):
72     def __init__(self, string):
73         self.string = string
74
75     def __eq__(self, other):
76         return self.string == other.string
77
78 TOKEN = re.compile(r'\s*({})'.format('|'.join('(?P<{}>{})'.format(*token) for token in [
79     ('open_parenthese',         r'\('),
80     ('close_parenthese',        r'\)'),
81     ('identifier',              r'[a-z]+'), # We can expand this as needed
82     ('integer_literal',         r'\d+'),
83     ('string_literal',          r'"(\\"|[^"])*"'),
84     ('unexpected_character',    r'.'),
85 ])))
86
87 def parse_all(source):
88     stack = []
89     items = []
90
91     for token in TOKEN.finditer(source):
92         if token.group('open_parenthese'):
93             stack.append(items)
94             items = []
95
96         elif token.group('close_parenthese'):
97             if len(stack) == 0:
98                 raise Exception('Parenthese closed but not opened')
99
100             stack[-1].append(tuple(items))
101             items = stack.pop()
102
103         elif token.group('identifier'):
104             items.append(Symbol(token.group('identifier')))
105
106         elif token.group('integer_literal'):
107             items.append(int(token.group('integer_literal')))
108
109         elif token.group('string_literal'):
110             items.append(undelimit_string(token.group('string_literal')))
111
112         elif token.group('unexpected_character'):
113             raise Exception('Unexpected character {}'.format(
114                 token.group('unexpected_character'),
115             ))
116
117         else:
118             raise Exception()
119
120     if len(stack) > 0:
121         raise Exception('Parenthese opened but not closed')
122
123     return items
124
125 # C AST Objects
126
127 class CType(object):
128     def __init__(self, name):
129         self.name = name
130
131 class CPointerType(CType):
132     def __init__(self, pointer_to):
133         self.pointer_to = pointer_to
134
135 class CArgumentDeclaration(object):
136     def __init__(self, type, name):
137         assert isinstance(type, CType)
138         self.type = type
139         self.name = name
140
141 class CExpression(object):
142     pass
143
144 class CIntegerLiteralExpression(CExpression):
145     def __init__(self, integer):
146         assert is_integer(integer)
147         self.integer = integer
148
149     def __eq__(self, other):
150         assert isinstance(other, CIntegerLiteralExpression)
151         return self.integer == other.integer
152
153 class CStringLiteralExpression(CExpression):
154     def __init__(self, string):
155         assert isinstance(string, str)
156         self.string = string
157
158     def __eq__(self, other):
159         assert isinstance(other, CStringLiteralExpression)
160         return self.string == other.string
161
162 class CVariableExpression(CExpression):
163     def __init__(self, name):
164         assert isinstance(name, str)
165         self.name = name
166
167     def __eq__(self, other):
168         assert isinstance(other, CVariableExpression)
169         return self.name == other.name
170
171 class CFunctionCallExpression(CExpression):
172     def __init__(self, name, arguments):
173         assert all(isinstance(argument, CExpression) for argument in arguments)
174         self.name = name
175         self.arguments = arguments
176
177     def __eq__(self, other):
178         assert isinstance(other, CFunctionCallExpression)
179         return self.name == other.name and self.arguments == other.arguments
180
181 class CStatement(object):
182     pass
183
184 class CExpressionStatement(CStatement):
185     def __init__(self, expression):
186         self.expression = expression
187
188 class CReturnStatement(CStatement):
189     def __init__(self, expression):
190         self.expression = expression
191
192 class CFunctionBody(object):
193     def __init__(self, statements):
194         statements = list(statements)
195         assert all(isinstance(s, CStatement) for s in statements)
196         self.statements = statements
197
198 class CFunctionDeclaration(object):
199     def __init__(self, return_type, name, argument_declaration_list, body):
200         assert isinstance(return_type, CType)
201         assert isinstance(argument_declaration_list, list)
202         assert all(isinstance(ad, CArgumentDeclaration) for ad in argument_declaration_list)
203         assert isinstance(body, CFunctionBody)
204
205         self.return_type = return_type
206         self.name = name
207         self.argument_declaration_list = argument_declaration_list
208         self.body = body
209
210 # BEGIN S-expression to C AST layer
211
212 def quote_to_c(s_expression):
213     if is_integer(s_expression):
214         return CFunctionCallExpression(
215             'makeObjectPointerFromInteger',
216             [CIntegerLiteralExpression(s_expression)],
217         )
218
219     if isinstance(s_expression, str):
220         return CFunctionCallExpression(
221             'makeObjectPointerFromString',
222             [CStringLiteralExpression(s_expression)],
223         )
224
225     raise Exception('Not implemented')
226
227 def evaluate_application_arguments_to_c(
228         arguments,
229         quote_to_c = quote_to_c,
230     ):
231     
232     if len(arguments) == 0:
233         return CVariableExpression('NULL')
234
235     return CFunctionCallExpression(
236         'c_cons',
237         (
238             quote_to_c(arguments[0]),
239             evaluate_application_arguments_to_c(arguments[1:]),
240         ),
241     )
242
243 def evaluate_application_to_c(
244         s_expression,
245         evaluate_application_arguments_to_c = evaluate_application_arguments_to_c,
246     ):
247
248     assert isinstance(s_expression, tuple)
249     if isinstance(s_expression[0], Symbol):
250         return CFunctionCallExpression(
251             s_expression[0].string,
252             (evaluate_application_arguments_to_c(s_expression[1:]),),
253         )
254
255     raise Exception('Not implemented')
256
257 def evaluate_to_c(
258         s_expression,
259         evaluate_application_to_c = evaluate_application_to_c,
260     ):
261
262     if isinstance(s_expression, tuple):
263         return evaluate_application_to_c(s_expression)
264
265     if is_integer(s_expression):
266         return CIntegerLiteralExpression(s_expression)
267
268     if isinstance(s_expression, str):
269         return CStringLiteralExpression(s_expression)
270
271     raise Exception('Unable to evaluate expression {} to C'.format(s_expression))
272
273 def evaluate_all_to_c(s_expressions):
274     c_expressions = list(map(evaluate_to_c, s_expressions))
275
276     return CFunctionBody(itertools.chain(
277         map(CExpressionStatement, c_expressions[:-1]),
278         [CReturnStatement(c_expressions[-1])],
279     ))
280     
281 # BEGIN C AST to C source layer
282
283 def generate_pointer_type(pointer_type):
284     assert isinstance(pointer_type, CPointerType)
285     return '{}*'.format(generate_type(pointer_type.pointer_to))
286
287 def generate_type(
288         type,
289         generate_pointer_type = generate_pointer_type):
290     assert isinstance(type, CType)
291
292     if isinstance(type, CPointerType):
293         return generate_pointer_type(type)
294
295     return type.name
296
297 def generate_argument_declaration(argument_declaration):
298     assert isinstance(argument_declaration, CArgumentDeclaration)
299     return '{} {}'.format(generate_type(argument_declaration.type), argument_declaration.name)
300
301 def generate_argument_declaration_list(argument_declarations):
302     return ', '.join(generate_argument_declaration(ad) for ad in argument_declarations)
303
304 def generate_integer_literal_expression(expression):
305     assert isinstance(expression, CIntegerLiteralExpression)
306     return str(expression.integer)
307
308 C_ESCAPE_SEQUENCES = {
309     # Taken from https://en.wikipedia.org/wiki/Escape_sequences_in_C
310     '\x07'  : r'\a',
311     '\x08'  : r'\b',
312     '\x0c'  : r'\f',
313     '\x0a'  : r'\n',
314     '\x0d'  : r'\r',
315     '\x09'  : r'\t',
316     '\x0b'  : r'\v',
317     '\x5c'  : r'\\',
318     '\x27'  : r"\'",
319     '\x22'  : r'\"',
320     '\x3f'  : r'\?',
321 }
322
323 def generate_string_literal_expression(expression):
324     assert isinstance(expression, CStringLiteralExpression)
325
326     result = '"'
327
328     for ch in expression.string:
329         result += C_ESCAPE_SEQUENCES.get(ch, ch)
330
331     result += '"'
332
333     return result
334
335 def generate_variable_expression(expression):
336     assert isinstance(expression, CVariableExpression)
337     return expression.name
338
339 def generate_function_call_expression(expression):
340     assert isinstance(expression, CFunctionCallExpression)
341     return '{}({})'.format(
342         expression.name,
343         ', '.join(generate_expression(e) for e in expression.arguments),
344     )
345
346 def generate_expression(
347         expression,
348         generate_integer_literal_expression = generate_integer_literal_expression,
349         generate_string_literal_expression = generate_string_literal_expression,
350         generate_variable_expression = generate_variable_expression,
351         generate_function_call_expression = generate_function_call_expression,
352         ):
353
354     if isinstance(expression, CIntegerLiteralExpression):
355         return generate_integer_literal_expression(expression)
356
357     if isinstance(expression, CStringLiteralExpression):
358         return generate_string_literal_expression(expression)
359
360     if isinstance(expression, CVariableExpression):
361         return generate_variable_expression(expression)
362
363     if isinstance(expression, CFunctionCallExpression):
364         return generate_function_call_expression(expression)
365
366     raise Exception('Expression type {} not implemented'.format(type(expression)))
367
368 def generate_expression_statement(statement):
369     return '{};'.format(generate_expression(statement.expression))
370
371 def generate_return_statement(statement):
372     return 'return {};'.format(generate_expression(statement.expression))
373
374 def generate_statement(
375         statement,
376         generate_expression_statement = generate_expression_statement,
377         generate_return_statement = generate_return_statement):
378
379     if isinstance(statement, CExpressionStatement):
380         return generate_expression_statement(statement)
381
382     if isinstance(statement, CReturnStatement):
383         return generate_return_statement(statement)
384
385     raise Exception('Handling for statements of type {} not implemented'.format(type(statement.type)))
386
387 def generate_function_body(function_body):
388     assert isinstance(function_body, CFunctionBody)
389     return '\n'.join(generate_statement(s) for s in function_body.statements)
390
391 FUNCTION_DEFINITION_TEMPLATE = string.Template(
392 '''
393 $return_type $name($argument_declaration_list)
394 {
395 $body
396 }
397 '''.strip())
398
399 def generate_function_declaration(function_declaration):
400     assert isinstance(function_declaration, CFunctionDeclaration)
401     return FUNCTION_DEFINITION_TEMPLATE.substitute(
402         return_type = generate_type(function_declaration.return_type),
403         name = function_declaration.name,
404         argument_declaration_list = generate_argument_declaration_list(function_declaration.argument_declaration_list),
405         body = indent(generate_function_body(function_declaration.body)),
406     )
407
408 PROGRAM_TEMPLATE = string.Template(
409 '''
410 #include <assert.h>
411 #include <stdio.h>
412 #include <stdlib.h>
413
414 struct Object;
415 typedef struct Object Object;
416
417 enum Type
418 {
419     CELL,
420     STRING
421 };
422 typedef enum Type Type;
423
424 struct Cell;
425 typedef struct Cell Cell;
426 struct Cell
427 {
428     Object* left;
429     Object* right;
430 };
431
432 union Instance
433 {
434     Cell cell;
435     char* string;
436 };
437 typedef union Instance Instance;
438
439 Instance makeInstanceFromCell(Cell cell)
440 {
441     Instance result;
442     result.cell = cell;
443     return result;
444 }
445
446 Instance makeInstanceFromString(char* string)
447 {
448     Instance result;
449     result.string = string;
450     return result;
451 }
452
453 struct Object
454 {
455     Type type;
456     Instance instance;
457 };
458
459 Object makeObject(Type t, Instance i)
460 {
461     Object result;
462     result.type = t;
463     result.instance = i;
464     return result;
465 }
466
467 Object makeObjectFromCell(Cell cell)
468 {
469     return makeObject(CELL, makeInstanceFromCell(cell));
470 }
471
472 Object makeObjectFromString(char* string)
473 {
474     return makeObject(STRING, makeInstanceFromString(string));
475 }
476
477 Object* makeObjectPointerFromObject(Object o)
478 {
479     Object* result = malloc(sizeof(Object));
480     *result = o;
481     return result;
482 }
483
484 Object* makeObjectPointerFromCell(Cell cell)
485 {
486     return makeObjectPointerFromObject(makeObjectFromCell(cell));
487 }
488
489 Object* makeObjectPointerFromString(char* string)
490 {
491     return makeObjectPointerFromObject(makeObjectFromString(string));
492 }
493
494 Cell makeCell(Object* left, Object* right)
495 {
496     Cell result;
497     result.left = left;
498     result.right = right;
499     return result;
500 }
501
502 Object* c_cons(Object* left, Object* right)
503 {
504     Cell cell = makeCell(left, right);
505     return makeObjectPointerFromCell(cell);
506 }
507
508 void c_print(Object* stutter_string)
509 {
510     assert(stutter_string->type == STRING);
511     char* c_string = stutter_string->instance.string;
512     printf("%s", c_string);
513 }
514
515 int countArgs(Object* args)
516 {
517     if(args == NULL) return 0;
518
519     assert(args->type == CELL);
520     return 1 + countArgs(args->instance.cell.right);
521 }
522
523 Object* getArg(int index, Object* args)
524 {
525     if(index == 0) return args->instance.cell.left;
526
527     return getArg(index - 1, args->instance.cell.right);
528 }
529
530 void print(Object* args)
531 {
532     assert(countArgs(args) == 1);
533     Object* stutter_string = getArg(0, args);
534     c_print(stutter_string);
535 }
536
537 int main(int argc, char** argv)
538 {
539 $body
540 }
541 '''.strip())
542
543 def generate_program(body):
544     return PROGRAM_TEMPLATE.substitute(
545         body = body,
546     )
547
548 if __name__ == '__main__':
549     import sys
550     source_file_name = sys.argv[1]
551
552     with open(source_file_name, 'r') as source_file:
553         source = source_file.read()
554
555     result = generate_program(
556         indent(generate_function_body(evaluate_all_to_c(parse_all(source)))),
557     )
558
559     print(result)