Add basic math
[fur] / tokenization.py
1 import collections
2 import re
3
4 import util
5
6 Token = collections.namedtuple(
7     'Token',
8     [
9         'type',
10         'match',
11         'index',
12     ],
13 )
14
15 def _make_token_matcher(definition):
16     name, regex = definition
17     regex_matcher = re.compile(regex)
18
19     def token_matcher(index, source):
20         match = regex_matcher.match(source[index:])
21
22         if match is None:
23             return False, index, None
24
25         return (
26             True,
27             index + len(match.group()),
28             Token(type=name, match=match.group(), index=index),
29         )
30
31     return token_matcher
32
33
34 _TOKEN_MATCHERS = [
35     ('open_parenthese',                 r'\('),
36     ('close_parenthese',                r'\)'),
37     ('integer_literal',                 r'\d+'),
38     ('symbol',                          r'[a-z]+'),
39     ('single_quoted_string_literal',    r"'.*?'"),
40     ('addition_level_operator',         r'(\+|-)'),
41     ('multiplication_level_operator',  r'(\*|//|%)'),
42 ]
43
44 _TOKEN_MATCHERS = list(map(_make_token_matcher, _TOKEN_MATCHERS))
45
46 @util.force_generator(tuple)
47 def tokenize(source):
48     index = 0
49
50     while index < len(source):
51         if source[index] == ' ':
52             index += 1
53             continue
54
55         success = False
56
57         for matcher in _TOKEN_MATCHERS:
58             success, index, token = matcher(index, source)
59
60             if success:
61                 yield token
62                 break
63
64         if not success:
65             raise Exception('Unexpected character "{}"'.format(source[index]))
66
67         while index < len(source) and source[index] in set(['\n']):
68             index += 1
69
70 if __name__ == '__main__':
71     import unittest
72
73     class TokenizeTests(unittest.TestCase):
74         def test_tokenizes_open_parenthese(self):
75             self.assertEqual(
76                 tokenize('('),
77                 (Token(
78                     type='open_parenthese',
79                     match='(',
80                     index=0,
81                 ),),
82             )
83
84         def test_tokenizes_close_parenthese(self):
85             self.assertEqual(
86                 tokenize(')'),
87                 (Token(
88                     type='close_parenthese',
89                     match=')',
90                     index=0,
91                 ),),
92             )
93
94         def test_tokenizes_symbol(self):
95             self.assertEqual(
96                 tokenize('print'),
97                 (Token(
98                     type='symbol',
99                     match='print',
100                     index=0,
101                 ),),
102             )
103
104         def test_tokenizes_single_quoted_string_literal(self):
105             self.assertEqual(
106                 tokenize("'Hello, world'"),
107                 (Token(
108                     type='single_quoted_string_literal',
109                     match="'Hello, world'",
110                     index=0,
111                 ),),
112             )
113
114         def test_tokenizes_plus(self):
115             self.assertEqual(
116                 tokenize('+'),
117                 (Token(
118                     type='addition_level_operator',
119                     match='+',
120                     index=0,
121                 ),),
122             )
123
124         def test_tokenizes_minus(self):
125             self.assertEqual(
126                 tokenize('-'),
127                 (Token(
128                     type='addition_level_operator',
129                     match='-',
130                     index=0,
131                 ),),
132             )
133
134         def test_tokenizes_times(self):
135             self.assertEqual(
136                 tokenize('*'),
137                 (Token(
138                     type='multiplication_level_operator',
139                     match='*',
140                     index=0,
141                 ),),
142             )
143
144         def test_tokenizes_integer_divide(self):
145             self.assertEqual(
146                 tokenize('//'),
147                 (Token(
148                     type='multiplication_level_operator',
149                     match='//',
150                     index=0,
151                 ),),
152             )
153
154         def test_tokenizes_modular_divide(self):
155             self.assertEqual(
156                 tokenize('%'),
157                 (Token(
158                     type='multiplication_level_operator',
159                     match='%',
160                     index=0,
161                 ),),
162             )
163
164         def test_handles_trailing_newline(self):
165             self.assertEqual(
166                 tokenize('print\n'),
167                 (Token(
168                     type='symbol',
169                     match='print',
170                     index=0,
171                 ),),
172             )
173
174         def test_handles_leading_space(self):
175             self.assertEqual(
176                 tokenize(' print'),
177                 (Token(
178                     type='symbol',
179                     match='print',
180                     index=1,
181                 ),),
182             )
183
184     unittest.main()