Added a very rudimentary fur-to-c compiler

author David Kerkeslager <kerkeslager@gmail.com>

Thu, 3 Aug 2017 19:26:54 +0000 (15:26 -0400)

committer David Kerkeslager <kerkeslager@gmail.com>

Thu, 3 Aug 2017 19:26:54 +0000 (15:26 -0400)
author David Kerkeslager <kerkeslager@gmail.com>
Thu, 3 Aug 2017 19:26:54 +0000 (15:26 -0400)
committer David Kerkeslager <kerkeslager@gmail.com>
Thu, 3 Aug 2017 19:26:54 +0000 (15:26 -0400)
diff --git a/examples/01_hello.fur b/examples/01_hello.fur

new file mode 100644 (file)

index 0000000..72ee2bc
--- /dev/null
+++ b/examples/01_hello.fur
@@ -0,0 +1 @@
+print('Hello, world')
diff --git a/generation.py b/generation.py

new file mode 100644 (file)

index 0000000..67c44a2
--- /dev/null
+++ b/generation.py
@@ -0,0 +1,38 @@
+import jinja2
+
+ENV = jinja2.Environment(
+    autoescape=jinja2.select_autoescape([]),
+    loader=jinja2.FileSystemLoader('templates'),
+    trim_blocks=True,
+)
+
+def generate_argument(c_string_literal):
+    def c_escape(ch):
+        return {
+            '\n': r'\n',
+            '"': r'\"',
+            '\\': r'\\',
+        }.get(ch, ch)
+
+    return '"{}"'.format(
+        ''.join(c_escape(ch for ch in c_string_literal.value)),
+    )
+
+def generate_statement(c_function_call_statement):
+    return '{}({});'.format(
+        c_function_call_statement.name,
+        ', '.join(generate_argument(argument) for argument in c_function_call_statement.arguments),
+    )
+
+def generate(c_program):
+    template = ENV.get_template('program.c')
+    return template.render(
+        builtins=list(sorted(c_program.builtins)),
+        statements=[generate_statement(statement) for statement in c_program.statements],
+        standard_libraries=set(['stdio.h']),
+    )
+
+if __name__ == '__main__':
+    import unittest
+
+    unittest.main()
diff --git a/main.py b/main.py

new file mode 100644 (file)

index 0000000..5d9f9de
--- /dev/null
+++ b/main.py
@@ -0,0 +1,22 @@
+import sys
+
+import generation
+import parsing
+import tokenization
+import transformation
+
+source_path = sys.argv[1]
+
+with open(source_path, 'r') as f:
+    source = f.read()
+
+tokens = tokenization.tokenize(source)
+parsed = parsing.parse(tokens)
+transformed = transformation.transform(parsed)
+generated = generation.generate(transformed)
+
+assert source_path.endswith('.fur')
+destination_path = source_path[:-4] + '.c'
+
+with open(destination_path, 'w') as f:
+    f.write(generated)
diff --git a/parsing.py b/parsing.py

new file mode 100644 (file)

index 0000000..62ac92f
--- /dev/null
+++ b/parsing.py
@@ -0,0 +1,94 @@
+import collections
+
+StringLiteral = collections.namedtuple(
+    'StringLiteral',
+    [
+        'value',
+    ],
+)
+
+def _string_literal_parser(index, tokens):
+    failure = (False, index, None)
+
+    if tokens[index].type != 'single_quoted_string_literal':
+        return failure
+    value = tokens[index].match[1:-1]
+    index += 1
+
+    return True, index, StringLiteral(value=value)
+
+
+FunctionCall = collections.namedtuple(
+    'FunctionCall',
+    [
+        'name',
+        'arguments',
+    ],
+)
+
+def _function_call_parser(index, tokens):
+    failure = (False, index, None)
+
+    if tokens[index].type != 'symbol':
+        return failure
+    name = tokens[index].match
+    index += 1
+
+    if tokens[index].type != 'open_parenthese':
+        return failure
+    index += 1
+
+    success, index, argument = _string_literal_parser(index, tokens)
+
+    if not success:
+        return failure
+
+    if tokens[index].type != 'close_parenthese':
+        return failure
+    index += 1
+    
+    return True, index, FunctionCall(name=name, arguments=(argument,))
+
+def _parse(parser, tokens):
+    success, index, result = parser(0, tokens)
+
+    if success:
+        return result
+
+    raise Exception('Unable to parse')
+
+
+def parse(tokens):
+    return _parse(_function_call_parser, tokens)
+
+if __name__ == '__main__':
+    import unittest
+
+    import tokenization
+
+    class StringLiteralParserTests(unittest.TestCase):
+        def test_parses_single_quoted_string_literal(self):
+            self.assertEqual(
+                _string_literal_parser(0, tokenization.tokenize("'Hello, world'")),
+                (
+                    True,
+                    1,
+                    StringLiteral(value='Hello, world'),
+                ),
+            )
+
+    class FunctionCallParserTests(unittest.TestCase):
+        def test_parses_function_with_string_literal_argument(self):
+            self.assertEqual(
+                _function_call_parser(0, tokenization.tokenize("print('Hello, world')")),
+                (
+                    True,
+                    4,
+                    FunctionCall(
+                        name='print',
+                        arguments=(StringLiteral(value='Hello, world'),),
+                    ),
+                ),
+            )
+
+    unittest.main()
diff --git a/requirements.txt b/requirements.txt

new file mode 100644 (file)

index 0000000..a7bcc47
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
+Jinja2==2.9.6
diff --git a/templates/program.c b/templates/program.c

new file mode 100644 (file)

index 0000000..3d15076
--- /dev/null
+++ b/templates/program.c
@@ -0,0 +1,18 @@
+{% for standard_library in standard_libraries %}
+#include<{{standard_library}}>
+{% endfor %}
+
+{% if 'print' in builtins %}
+void builtin$print(const char* output)
+{
+  printf("%s\n", output);
+}
+{% endif %}
+
+int main(int argc, char** argv)
+{
+  {% for statement in statements %}
+  {{ statement }}
+  {% endfor %}
+  return 0;
+}
diff --git a/tokenization.py b/tokenization.py

new file mode 100644 (file)

index 0000000..c1b30df
--- /dev/null
+++ b/tokenization.py
@@ -0,0 +1,107 @@
+import collections
+import re
+
+import util
+
+Token = collections.namedtuple(
+    'Token',
+    [
+        'type',
+        'match',
+    ],
+)
+
+def _make_token_matcher(definition):
+    name, regex = definition
+    regex_matcher = re.compile(regex)
+
+    def token_matcher(index, source):
+        match = regex_matcher.match(source[index:])
+
+        if match is None:
+            return False, index, None
+
+        return True, index + len(match.group()), Token(type=name, match=match.group())
+
+    return token_matcher
+
+
+_TOKEN_MATCHERS = [
+    ('open_parenthese',                 r'\('),
+    ('close_parenthese',                r'\)'),
+    ('symbol',                          r'[a-z]+'),
+    ('single_quoted_string_literal',    r"'.*?'"),
+]
+
+_TOKEN_MATCHERS = list(map(_make_token_matcher, _TOKEN_MATCHERS))
+
+@util.force_generator
+def tokenize(source):
+    index = 0
+
+    while index < len(source):
+        success = False
+
+        for matcher in _TOKEN_MATCHERS:
+            success, index, token = matcher(index, source)
+
+            if success:
+                yield token
+                break
+
+        if not success:
+            raise Exception('Unexpected character "{}"'.format(source[index]))
+
+        while index < len(source) and source[index] in set(['\n']):
+            index += 1
+
+if __name__ == '__main__':
+    import unittest
+
+    class TokenizeTests(unittest.TestCase):
+        def test_tokenizes_open_parenthese(self):
+            self.assertEqual(
+                tokenize('('),
+                [Token(
+                    type='open_parenthese',
+                    match='(',
+                )],
+            )
+
+        def test_tokenizes_close_parenthese(self):
+            self.assertEqual(
+                tokenize(')'),
+                [Token(
+                    type='close_parenthese',
+                    match=')',
+                )],
+            )
+
+        def test_tokenizes_symbol(self):
+            self.assertEqual(
+                tokenize('print'),
+                [Token(
+                    type='symbol',
+                    match='print',
+                )],
+            )
+
+        def test_tokenizes_single_quoted_string_literal(self):
+            self.assertEqual(
+                tokenize("'Hello, world'"),
+                [Token(
+                    type='single_quoted_string_literal',
+                    match="'Hello, world'",
+                )],
+            )
+
+        def test_handles_trailing_newline(self):
+            self.assertEqual(
+                tokenize('print\n'),
+                [Token(
+                    type='symbol',
+                    match='print',
+                )],
+            )
+
+    unittest.main()
diff --git a/transformation.py b/transformation.py

new file mode 100644 (file)

index 0000000..9ba0c0a
--- /dev/null
+++ b/transformation.py
@@ -0,0 +1,71 @@
+import collections
+
+import parsing
+
+CStringLiteral = collections.namedtuple(
+    'CStringLiteral',
+    [
+        'value',
+    ],
+)
+
+CFunctionCallStatement = collections.namedtuple(
+    'CFunctionCallStatement',
+    [
+        'name',
+        'arguments',
+    ],
+)
+
+CProgram = collections.namedtuple(
+    'CProgram',
+    [
+        'builtins',
+        'statements',
+        'standard_libraries',
+    ],
+)
+
+BUILTINS = {
+    'print': ['stdio.h.'],
+}
+
+def transform_argument(builtin_dependencies, argument):
+    if isinstance(argument, parsing.StringLiteral):
+        return CStringLiteral(value=argument.value)
+
+    raise Exception()
+
+def transform_function_call_statement(builtin_dependencies, function_call):
+    if function_call.name in BUILTINS.keys():
+        builtin_dependencies.add(function_call.name)
+
+        return CFunctionCallStatement(
+            name='builtin$' + function_call.name,
+            arguments=tuple(transform_argument(builtin_dependencies, arg) for arg in function_call.arguments),
+        )
+
+    raise Exception()
+
+
+def transform(function_call):
+    builtins = set()
+
+    statement = transform_function_call_statement(builtins, function_call)
+
+    standard_libraries = set()
+    for builtin in builtins:
+        for standard_library in BUILTINS[builtin]:
+            standard_libraries.add(standard_library)
+
+    return CProgram(
+        builtins=builtins,
+        statements=[statement],
+        standard_libraries=standard_libraries,
+    )
+
+
+if __name__ == '__main__':
+    import unittest
+
+    unittest.main()
diff --git a/util.py b/util.py

new file mode 100644 (file)

index 0000000..d73990d
--- /dev/null
+++ b/util.py
@@ -0,0 +1,22 @@
+import functools
+
+def force_generator(generator_function):
+    @functools.wraps(generator_function)
+    def forced_generator(*args, **kwargs):
+        return list(generator_function(*args, **kwargs))
+
+    return forced_generator
+
+if __name__ == '__main__':
+    import unittest
+
+    class ForceGeneratorTests(unittest.TestCase):
+        def test_forces_generator(self):
+            forced_range = force_generator(range)
+
+            self.assertEqual(
+                forced_range(10),
+                [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
+            )
+
+    unittest.main()
author	David Kerkeslager <kerkeslager@gmail.com>
	Thu, 3 Aug 2017 19:26:54 +0000 (15:26 -0400)
committer	David Kerkeslager <kerkeslager@gmail.com>
	Thu, 3 Aug 2017 19:26:54 +0000 (15:26 -0400)
examples/01_hello.fur	[new file with mode: 0644]	patch \| blob
generation.py	[new file with mode: 0644]	patch \| blob
main.py	[new file with mode: 0644]	patch \| blob
parsing.py	[new file with mode: 0644]	patch \| blob
requirements.txt	[new file with mode: 0644]	patch \| blob
templates/program.c	[new file with mode: 0644]	patch \| blob
tokenization.py	[new file with mode: 0644]	patch \| blob
transformation.py	[new file with mode: 0644]	patch \| blob
util.py	[new file with mode: 0644]	patch \| blob