From 4ba4fcfbb2712a22a9f3211182c9ec6cee9dd0f8 Mon Sep 17 00:00:00 2001 From: David Kerkeslager Date: Thu, 3 Aug 2017 15:26:54 -0400 Subject: [PATCH] Added a very rudimentary fur-to-c compiler --- examples/01_hello.fur | 1 + generation.py | 38 +++++++++++++++ main.py | 22 +++++++++ parsing.py | 94 +++++++++++++++++++++++++++++++++++++ requirements.txt | 1 + templates/program.c | 18 +++++++ tokenization.py | 107 ++++++++++++++++++++++++++++++++++++++++++ transformation.py | 71 ++++++++++++++++++++++++++++ util.py | 22 +++++++++ 9 files changed, 374 insertions(+) create mode 100644 examples/01_hello.fur create mode 100644 generation.py create mode 100644 main.py create mode 100644 parsing.py create mode 100644 requirements.txt create mode 100644 templates/program.c create mode 100644 tokenization.py create mode 100644 transformation.py create mode 100644 util.py diff --git a/examples/01_hello.fur b/examples/01_hello.fur new file mode 100644 index 0000000..72ee2bc --- /dev/null +++ b/examples/01_hello.fur @@ -0,0 +1 @@ +print('Hello, world') diff --git a/generation.py b/generation.py new file mode 100644 index 0000000..67c44a2 --- /dev/null +++ b/generation.py @@ -0,0 +1,38 @@ +import jinja2 + +ENV = jinja2.Environment( + autoescape=jinja2.select_autoescape([]), + loader=jinja2.FileSystemLoader('templates'), + trim_blocks=True, +) + +def generate_argument(c_string_literal): + def c_escape(ch): + return { + '\n': r'\n', + '"': r'\"', + '\\': r'\\', + }.get(ch, ch) + + return '"{}"'.format( + ''.join(c_escape(ch for ch in c_string_literal.value)), + ) + +def generate_statement(c_function_call_statement): + return '{}({});'.format( + c_function_call_statement.name, + ', '.join(generate_argument(argument) for argument in c_function_call_statement.arguments), + ) + +def generate(c_program): + template = ENV.get_template('program.c') + return template.render( + builtins=list(sorted(c_program.builtins)), + statements=[generate_statement(statement) for statement in c_program.statements], + standard_libraries=set(['stdio.h']), + ) + +if __name__ == '__main__': + import unittest + + unittest.main() diff --git a/main.py b/main.py new file mode 100644 index 0000000..5d9f9de --- /dev/null +++ b/main.py @@ -0,0 +1,22 @@ +import sys + +import generation +import parsing +import tokenization +import transformation + +source_path = sys.argv[1] + +with open(source_path, 'r') as f: + source = f.read() + +tokens = tokenization.tokenize(source) +parsed = parsing.parse(tokens) +transformed = transformation.transform(parsed) +generated = generation.generate(transformed) + +assert source_path.endswith('.fur') +destination_path = source_path[:-4] + '.c' + +with open(destination_path, 'w') as f: + f.write(generated) diff --git a/parsing.py b/parsing.py new file mode 100644 index 0000000..62ac92f --- /dev/null +++ b/parsing.py @@ -0,0 +1,94 @@ +import collections + +StringLiteral = collections.namedtuple( + 'StringLiteral', + [ + 'value', + ], +) + +def _string_literal_parser(index, tokens): + failure = (False, index, None) + + if tokens[index].type != 'single_quoted_string_literal': + return failure + value = tokens[index].match[1:-1] + index += 1 + + return True, index, StringLiteral(value=value) + + +FunctionCall = collections.namedtuple( + 'FunctionCall', + [ + 'name', + 'arguments', + ], +) + +def _function_call_parser(index, tokens): + failure = (False, index, None) + + if tokens[index].type != 'symbol': + return failure + name = tokens[index].match + index += 1 + + if tokens[index].type != 'open_parenthese': + return failure + index += 1 + + success, index, argument = _string_literal_parser(index, tokens) + + if not success: + return failure + + if tokens[index].type != 'close_parenthese': + return failure + index += 1 + + return True, index, FunctionCall(name=name, arguments=(argument,)) + +def _parse(parser, tokens): + success, index, result = parser(0, tokens) + + if success: + return result + + raise Exception('Unable to parse') + + +def parse(tokens): + return _parse(_function_call_parser, tokens) + +if __name__ == '__main__': + import unittest + + import tokenization + + class StringLiteralParserTests(unittest.TestCase): + def test_parses_single_quoted_string_literal(self): + self.assertEqual( + _string_literal_parser(0, tokenization.tokenize("'Hello, world'")), + ( + True, + 1, + StringLiteral(value='Hello, world'), + ), + ) + + class FunctionCallParserTests(unittest.TestCase): + def test_parses_function_with_string_literal_argument(self): + self.assertEqual( + _function_call_parser(0, tokenization.tokenize("print('Hello, world')")), + ( + True, + 4, + FunctionCall( + name='print', + arguments=(StringLiteral(value='Hello, world'),), + ), + ), + ) + + unittest.main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..a7bcc47 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +Jinja2==2.9.6 diff --git a/templates/program.c b/templates/program.c new file mode 100644 index 0000000..3d15076 --- /dev/null +++ b/templates/program.c @@ -0,0 +1,18 @@ +{% for standard_library in standard_libraries %} +#include<{{standard_library}}> +{% endfor %} + +{% if 'print' in builtins %} +void builtin$print(const char* output) +{ + printf("%s\n", output); +} +{% endif %} + +int main(int argc, char** argv) +{ + {% for statement in statements %} + {{ statement }} + {% endfor %} + return 0; +} diff --git a/tokenization.py b/tokenization.py new file mode 100644 index 0000000..c1b30df --- /dev/null +++ b/tokenization.py @@ -0,0 +1,107 @@ +import collections +import re + +import util + +Token = collections.namedtuple( + 'Token', + [ + 'type', + 'match', + ], +) + +def _make_token_matcher(definition): + name, regex = definition + regex_matcher = re.compile(regex) + + def token_matcher(index, source): + match = regex_matcher.match(source[index:]) + + if match is None: + return False, index, None + + return True, index + len(match.group()), Token(type=name, match=match.group()) + + return token_matcher + + +_TOKEN_MATCHERS = [ + ('open_parenthese', r'\('), + ('close_parenthese', r'\)'), + ('symbol', r'[a-z]+'), + ('single_quoted_string_literal', r"'.*?'"), +] + +_TOKEN_MATCHERS = list(map(_make_token_matcher, _TOKEN_MATCHERS)) + +@util.force_generator +def tokenize(source): + index = 0 + + while index < len(source): + success = False + + for matcher in _TOKEN_MATCHERS: + success, index, token = matcher(index, source) + + if success: + yield token + break + + if not success: + raise Exception('Unexpected character "{}"'.format(source[index])) + + while index < len(source) and source[index] in set(['\n']): + index += 1 + +if __name__ == '__main__': + import unittest + + class TokenizeTests(unittest.TestCase): + def test_tokenizes_open_parenthese(self): + self.assertEqual( + tokenize('('), + [Token( + type='open_parenthese', + match='(', + )], + ) + + def test_tokenizes_close_parenthese(self): + self.assertEqual( + tokenize(')'), + [Token( + type='close_parenthese', + match=')', + )], + ) + + def test_tokenizes_symbol(self): + self.assertEqual( + tokenize('print'), + [Token( + type='symbol', + match='print', + )], + ) + + def test_tokenizes_single_quoted_string_literal(self): + self.assertEqual( + tokenize("'Hello, world'"), + [Token( + type='single_quoted_string_literal', + match="'Hello, world'", + )], + ) + + def test_handles_trailing_newline(self): + self.assertEqual( + tokenize('print\n'), + [Token( + type='symbol', + match='print', + )], + ) + + unittest.main() diff --git a/transformation.py b/transformation.py new file mode 100644 index 0000000..9ba0c0a --- /dev/null +++ b/transformation.py @@ -0,0 +1,71 @@ +import collections + +import parsing + +CStringLiteral = collections.namedtuple( + 'CStringLiteral', + [ + 'value', + ], +) + +CFunctionCallStatement = collections.namedtuple( + 'CFunctionCallStatement', + [ + 'name', + 'arguments', + ], +) + +CProgram = collections.namedtuple( + 'CProgram', + [ + 'builtins', + 'statements', + 'standard_libraries', + ], +) + +BUILTINS = { + 'print': ['stdio.h.'], +} + +def transform_argument(builtin_dependencies, argument): + if isinstance(argument, parsing.StringLiteral): + return CStringLiteral(value=argument.value) + + raise Exception() + +def transform_function_call_statement(builtin_dependencies, function_call): + if function_call.name in BUILTINS.keys(): + builtin_dependencies.add(function_call.name) + + return CFunctionCallStatement( + name='builtin$' + function_call.name, + arguments=tuple(transform_argument(builtin_dependencies, arg) for arg in function_call.arguments), + ) + + raise Exception() + + +def transform(function_call): + builtins = set() + + statement = transform_function_call_statement(builtins, function_call) + + standard_libraries = set() + for builtin in builtins: + for standard_library in BUILTINS[builtin]: + standard_libraries.add(standard_library) + + return CProgram( + builtins=builtins, + statements=[statement], + standard_libraries=standard_libraries, + ) + + +if __name__ == '__main__': + import unittest + + unittest.main() diff --git a/util.py b/util.py new file mode 100644 index 0000000..d73990d --- /dev/null +++ b/util.py @@ -0,0 +1,22 @@ +import functools + +def force_generator(generator_function): + @functools.wraps(generator_function) + def forced_generator(*args, **kwargs): + return list(generator_function(*args, **kwargs)) + + return forced_generator + +if __name__ == '__main__': + import unittest + + class ForceGeneratorTests(unittest.TestCase): + def test_forces_generator(self): + forced_range = force_generator(range) + + self.assertEqual( + forced_range(10), + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + ) + + unittest.main() -- 2.20.1