X-Git-Url: https://code.kerkeslager.com/?a=blobdiff_plain;f=tokenization.py;h=e6cad0a0bc7c3ac035294f6d0720c5e46a0a56e4;hb=59cb91d6dbee0b40416ff265565b426958770d45;hp=0421b84592fa58280c8f84c105870fd197f65c0e;hpb=fd918259dd949c8fababcf49ced426ab3c39da38;p=fur

diff --git a/tokenization.py b/tokenization.py
index 0421b84..e6cad0a 100644
--- a/tokenization.py
+++ b/tokenization.py
@@ -9,6 +9,7 @@ Token = collections.namedtuple(
         'type',
         'match',
         'index',
+        'line',
     ],
 )
 
@@ -16,7 +17,7 @@ def _make_token_matcher(definition):
     name, regex = definition
     regex_matcher = re.compile(regex)
 
-    def token_matcher(index, source):
+    def token_matcher(index, source, line):
         match = regex_matcher.match(source[index:])
 
         if match is None:
@@ -25,7 +26,7 @@ def _make_token_matcher(definition):
         return (
             True,
             index + len(match.group()),
-            Token(type=name, match=match.group(), index=index),
+            Token(type=name, match=match.group(), index=index, line=line),
         )
 
     return token_matcher
@@ -34,11 +35,14 @@ def _make_token_matcher(definition):
 _TOKEN_MATCHERS = [
     ('open_parenthese',                 r'\('),
     ('close_parenthese',                r'\)'),
+    ('comma',                           r','),
     ('integer_literal',                 r'\d+'),
     ('symbol',                          r'[a-z]+'),
     ('single_quoted_string_literal',    r"'.*?'"),
+    ('equality_level_operator',         r'(<=|>=|==|!=|<|>)'),
     ('addition_level_operator',         r'(\+|-)'),
-    ('multiplication_level_operator',  r'(\*|//|%)'),
+    ('multiplication_level_operator',   r'(\*|//|%)'),
+    ('assignment_operator',             r'='),
 ]
 
 _TOKEN_MATCHERS = list(map(_make_token_matcher, _TOKEN_MATCHERS))
@@ -46,6 +50,7 @@ _TOKEN_MATCHERS = list(map(_make_token_matcher, _TOKEN_MATCHERS))
 @util.force_generator(tuple)
 def tokenize(source):
     index = 0
+    line = 1
 
     while index < len(source):
         if source[index] == ' ':
@@ -55,7 +60,7 @@ def tokenize(source):
         success = False
 
         for matcher in _TOKEN_MATCHERS:
-            success, index, token = matcher(index, source)
+            success, index, token = matcher(index, source, line)
 
             if success:
                 yield token
@@ -65,6 +70,7 @@ def tokenize(source):
             raise Exception('Unexpected character "{}"'.format(source[index]))
 
         while index < len(source) and source[index] in set(['\n']):
+            line += 1
             index += 1
 
 if __name__ == '__main__':
@@ -78,6 +84,7 @@ if __name__ == '__main__':
                     type='open_parenthese',
                     match='(',
                     index=0,
+                    line=1,
                 ),),
             )
 
@@ -88,6 +95,7 @@ if __name__ == '__main__':
                     type='close_parenthese',
                     match=')',
                     index=0,
+                    line=1,
                 ),),
             )
 
@@ -98,6 +106,7 @@ if __name__ == '__main__':
                     type='symbol',
                     match='print',
                     index=0,
+                    line=1,
                 ),),
             )
 
@@ -108,6 +117,7 @@ if __name__ == '__main__':
                     type='single_quoted_string_literal',
                     match="'Hello, world'",
                     index=0,
+                    line=1,
                 ),),
             )
 
@@ -118,6 +128,7 @@ if __name__ == '__main__':
                     type='addition_level_operator',
                     match='+',
                     index=0,
+                    line=1,
                 ),),
             )
 
@@ -128,6 +139,7 @@ if __name__ == '__main__':
                     type='addition_level_operator',
                     match='-',
                     index=0,
+                    line=1,
                 ),),
             )
 
@@ -138,6 +150,7 @@ if __name__ == '__main__':
                     type='multiplication_level_operator',
                     match='*',
                     index=0,
+                    line=1,
                 ),),
             )
 
@@ -148,6 +161,7 @@ if __name__ == '__main__':
                     type='multiplication_level_operator',
                     match='//',
                     index=0,
+                    line=1,
                 ),),
             )
 
@@ -158,6 +172,95 @@ if __name__ == '__main__':
                     type='multiplication_level_operator',
                     match='%',
                     index=0,
+                    line=1,
+                ),),
+            )
+
+        def test_tokenizes_comma(self):
+            self.assertEqual(
+                tokenize(','),
+                (Token(
+                    type='comma',
+                    match=',',
+                    index=0,
+                    line=1,
+                ),),
+            )
+
+        def test_tokenizes_assignment_operator(self):
+            self.assertEqual(
+                tokenize('='),
+                (Token(
+                    type='assignment_operator',
+                    match='=',
+                    index=0,
+                    line=1,
+                ),),
+            )
+
+        def test_tokenizes_equality_operator(self):
+            self.assertEqual(
+                tokenize('=='),
+                (Token(
+                    type='equality_level_operator',
+                    match='==',
+                    index=0,
+                    line=1,
+                ),),
+            )
+
+        def test_tokenizes_greater_than_or_equal_operator(self):
+            self.assertEqual(
+                tokenize('>='),
+                (Token(
+                    type='equality_level_operator',
+                    match='>=',
+                    index=0,
+                    line=1,
+                ),),
+            )
+
+        def test_tokenizes_less_than_or_equal_operator(self):
+            self.assertEqual(
+                tokenize('<='),
+                (Token(
+                    type='equality_level_operator',
+                    match='<=',
+                    index=0,
+                    line=1,
+                ),),
+            )
+
+        def test_tokenizes_greater_than_equal_operator(self):
+            self.assertEqual(
+                tokenize('>'),
+                (Token(
+                    type='equality_level_operator',
+                    match='>',
+                    index=0,
+                    line=1,
+                ),),
+            )
+
+        def test_tokenizes_less_than_equal_operator(self):
+            self.assertEqual(
+                tokenize('<'),
+                (Token(
+                    type='equality_level_operator',
+                    match='<',
+                    index=0,
+                    line=1,
+                ),),
+            )
+
+        def test_tokenizes_not_equal_operator(self):
+            self.assertEqual(
+                tokenize('!='),
+                (Token(
+                    type='equality_level_operator',
+                    match='!=',
+                    index=0,
+                    line=1,
                 ),),
             )
 
@@ -168,6 +271,7 @@ if __name__ == '__main__':
                     type='symbol',
                     match='print',
                     index=0,
+                    line=1,
                 ),),
             )
 
@@ -178,7 +282,28 @@ if __name__ == '__main__':
                     type='symbol',
                     match='print',
                     index=1,
+                    line=1,
                 ),),
             )
 
+        def test_tokenizes_with_proper_line_numbers(self):
+            self.assertEqual(
+                tokenize('print\n('),
+                (
+                    Token(
+                        type='symbol',
+                        match='print',
+                        index=0,
+                        line=1,
+                    ),
+                    Token(
+                        type='open_parenthese',
+                        match='(',
+                        index=6,
+                        line=2,
+                    ),
+                ),
+            )
+
+
     unittest.main()