Use autotag in don.string
[ton] / don / string.py
index ee925f6..796cd07 100644 (file)
@@ -1,4 +1,6 @@
 import binascii
+import collections
+import functools
 import re
 
 from don import tags, _shared
@@ -55,12 +57,21 @@ _STRING_SERIALIZERS = {
 }
 
 def serialize(o):
-    o = tags._tag(o)
+    o = tags.autotag(o)
     
     return _STRING_SERIALIZERS[o.tag](o.value)
 
-def _make_constant_parser(constant, value):
+def _consume_leading_whitespace(wrapped_parser):
+    @functools.wraps(wrapped_parser)
     def parser(s):
+        s = s.lstrip()
+        return wrapped_parser(s)
+
+    return parser
+
+def _make_constant_parser(constant, value):
+    @_consume_leading_whitespace
+    def constant_parser(s):
         if s.startswith(constant):
             result = _shared.ParseResult(
                 success = True,
@@ -71,12 +82,13 @@ def _make_constant_parser(constant, value):
 
         return _shared._FAILED_PARSE_RESULT
 
-    return parser
+    return constant_parser
 
 def _make_integer_parser(width):
     matcher = re.compile(r'(-?\d+)i' + str(width))
 
-    def parser(s):
+    @_consume_leading_whitespace
+    def integer_parser(s):
         match = matcher.match(s)
 
         if match:
@@ -89,16 +101,17 @@ def _make_integer_parser(width):
 
         return _shared._FAILED_PARSE_RESULT
 
-    return parser
+    return integer_parser
 
 _BINARY32_MATCHER = re.compile(r'(-?\d+\.\d+)f')
 _BINARY64_MATCHER = re.compile(r'(-?\d+\.\d+)d')
 
+@_consume_leading_whitespace
 def _binary32_parser(s):
     match = _BINARY32_MATCHER.match(s)
 
     if match:
-        # TODO Validate that the double is in range
+        # TODO Validate that the float is in range
         return _shared.ParseResult(
             success = True,
             value = float(match.group(1)),
@@ -107,6 +120,7 @@ def _binary32_parser(s):
 
     return _shared._FAILED_PARSE_RESULT
 
+@_consume_leading_whitespace
 def _binary64_parser(s):
     match = _BINARY64_MATCHER.match(s)
 
@@ -122,11 +136,11 @@ def _binary64_parser(s):
 
 _BINARY_MATCHER = re.compile(r'"([\da-f]*)"b')
 
+@_consume_leading_whitespace
 def _binary_parser(s):
     match = _BINARY_MATCHER.match(s)
 
     if match:
-        # TODO Validate that the double is in range
         return _shared.ParseResult(
             success = True,
             value = binascii.unhexlify(match.group(1)),
@@ -135,6 +149,132 @@ def _binary_parser(s):
 
     return _shared._FAILED_PARSE_RESULT
 
+def _make_utf_parser(encoding):
+    matcher = re.compile(r'"(.*?)"' + encoding)
+
+    @_consume_leading_whitespace
+    def utf_parser(s):
+        match = matcher.match(s)
+
+        if match:
+            return _shared.ParseResult(
+                success = True,
+                value = match.group(1),
+                remaining = s[match.end():],
+            )
+
+        return _shared._FAILED_PARSE_RESULT
+
+    return utf_parser
+
+def _make_consume_constant_parser(constant):
+    @_consume_leading_whitespace
+    def consume_character_parser(s):
+        if s.startswith(constant):
+            return _shared.ParseResult(
+                success = True,
+                value = None,
+                remaining = s[len(constant):],
+            )
+        return _shared._FAILED_PARSE_RESULT
+
+    return consume_character_parser
+
+_consume_comma_parser = _make_consume_constant_parser(',')
+
+def _prefix_with_comma(parser):
+    def wrapped(s):
+        result = _consume_comma_parser(s)
+        if result.success:
+            s = result.remaining
+        else:
+            return _shared._FAILED_PARSE_RESULT
+
+        result = parser(s)
+        if not result.success:
+            raise Exception('Trailing comma before "{}"'.format(s))
+
+        return result
+
+    return wrapped
+
+def _comma_separate_and_wrap(wrapped_parser, start_wrap, end_wrap, typecaster):
+    parser_prefixed_with_comma = _prefix_with_comma(wrapped_parser)
+    start_wrap_parser = _make_consume_constant_parser(start_wrap)
+    end_wrap_parser = _make_consume_constant_parser(end_wrap)
+
+    def parser(s):
+        result = start_wrap_parser(s)
+        if result.success:
+            s = result.remaining
+        else:
+            return _shared._FAILED_PARSE_RESULT
+
+        value = []
+        first = True
+
+        parse_result = wrapped_parser(s)
+
+        while parse_result.success:
+            value.append(parse_result.value)
+            s = parse_result.remaining
+            parse_result = parser_prefixed_with_comma(s)
+
+        result = end_wrap_parser(s)
+        if result.success:
+            s = result.remaining
+        else:
+            return _shared._FAILED_PARSE_RESULT
+
+        return _shared.ParseResult(
+            success = True,
+            value = typecaster(value),
+            remaining = s,
+        )
+
+    return parser
+
+# This uses _PARSERS which has not been defined yet, but is defined here so it can be used in
+# the definition of _list_parser
+def _object_parser(source):
+    for parser in _PARSERS:
+        result = parser(source)
+
+        if result.success:
+            return result
+
+    return _shared._FAILED_PARSE_RESULT
+
+_list_parser = _comma_separate_and_wrap(_object_parser, '[', ']', list)
+
+_consume_colon_parser = _make_consume_constant_parser(':')
+
+def _kvp_parser(s):
+    key_parse_result = _object_parser(s)
+    if key_parse_result.success:
+        s = key_parse_result.remaining
+    else:
+        return _shared._FAILED_PARSE_RESULT
+
+    result = _consume_colon_parser(s)
+    if result.success:
+        s = result.remaining
+    else:
+        return _shared._FAILED_PARSE_RESULT
+
+    value_parse_result = _object_parser(s)
+    if value_parse_result.success:
+        s = value_parse_result.remaining
+    else:
+        return _shared._FAILED_PARSE_RESULT
+
+    return _shared.ParseResult(
+        success = True,
+        value = (key_parse_result.value, value_parse_result.value),
+        remaining = s,
+    )
+
+_dictionary_parser = _comma_separate_and_wrap(_kvp_parser, '{', '}', collections.OrderedDict)
 
 
 _PARSERS = [
@@ -148,17 +288,13 @@ _PARSERS = [
     _binary32_parser,
     _binary64_parser,
     _binary_parser,
+    _make_utf_parser('utf8'),
+    _make_utf_parser('utf16'),
+    _make_utf_parser('utf32'),
+    _list_parser,
+    _dictionary_parser,
 ]
 
-def _object_parser(source):
-    for parser in _PARSERS:
-        result = parser(source)
-
-        if result.success:
-            return result
-
-    return _shared._FAILED_PARSE_RESULT
-
 def _parse(parser, source):
     result = parser(source)