Implemented dictionary parsing
[ton] / don / string.py
index ee925f6..e22b328 100644 (file)
@@ -1,4 +1,5 @@
 import binascii
+import collections
 import re
 
 from don import tags, _shared
@@ -98,7 +99,7 @@ def _binary32_parser(s):
     match = _BINARY32_MATCHER.match(s)
 
     if match:
-        # TODO Validate that the double is in range
+        # TODO Validate that the float is in range
         return _shared.ParseResult(
             success = True,
             value = float(match.group(1)),
@@ -126,7 +127,6 @@ def _binary_parser(s):
     match = _BINARY_MATCHER.match(s)
 
     if match:
-        # TODO Validate that the double is in range
         return _shared.ParseResult(
             success = True,
             value = binascii.unhexlify(match.group(1)),
@@ -135,6 +135,108 @@ def _binary_parser(s):
 
     return _shared._FAILED_PARSE_RESULT
 
+def _make_utf_parser(encoding):
+    matcher = re.compile(r'"(.*?)"' + encoding)
+
+    def parser(s):
+        match = matcher.match(s)
+
+        if match:
+            return _shared.ParseResult(
+                success = True,
+                value = match.group(1),
+                remaining = s[match.end():],
+            )
+
+        return _shared._FAILED_PARSE_RESULT
+
+    return parser
+
+def _prefix_with_comma(parser):
+    def wrapped(s):
+        if s.startswith(','):
+            s = s[1:]
+
+            result = parser(s)
+            if not result.success:
+                raise Exception('Trailing comma before "{}"'.format(s))
+
+            return result
+
+        return _shared._FAILED_PARSE_RESULT
+
+    return wrapped
+
+def _comma_separate_and_wrap(wrapped_parser, start_wrap, end_wrap, typecaster):
+    parser_prefixed_with_comma = _prefix_with_comma(wrapped_parser)
+
+    def parser(s):
+        if s.startswith(start_wrap):
+            s = s[1:]
+        else:
+            return _shared._FAILED_PARSE_RESULT
+
+        value = []
+        first = True
+
+        parse_result = wrapped_parser(s)
+
+        while parse_result.success:
+            value.append(parse_result.value)
+            s = parse_result.remaining
+            parse_result = parser_prefixed_with_comma(s)
+
+        if s.startswith(end_wrap):
+            s = s[1:]
+        else:
+            return _shared._FAILED_PARSE_RESULT
+
+        return _shared.ParseResult(
+            success = True,
+            value = typecaster(value),
+            remaining = s,
+        )
+
+    return parser
+
+# This uses _PARSERS which has not been defined yet, but is defined here so it can be used in
+# the definition of _list_parser
+def _object_parser(source):
+    for parser in _PARSERS:
+        result = parser(source)
+
+        if result.success:
+            return result
+
+    return _shared._FAILED_PARSE_RESULT
+
+_list_parser = _comma_separate_and_wrap(_object_parser, '[', ']', list)
+
+def _kvp_parser(s):
+    key_parse_result = _object_parser(s)
+    if key_parse_result.success:
+        s = key_parse_result.remaining
+    else:
+        return _shared._FAILED_PARSE_RESULT
+
+    if s.startswith(':'):
+        s = s[1:]
+    else:
+        return _shared._FAILED_PARSE_RESULT
+
+    value_parse_result = _object_parser(s)
+    if value_parse_result.success:
+        s = value_parse_result.remaining
+    else:
+        return _shared._FAILED_PARSE_RESULT
+
+    return _shared.ParseResult(
+        success = True,
+        value = (key_parse_result.value, value_parse_result.value),
+        remaining = s,
+    )
+
+_dictionary_parser = _comma_separate_and_wrap(_kvp_parser, '{', '}', collections.OrderedDict)
 
 
 _PARSERS = [
@@ -148,17 +250,13 @@ _PARSERS = [
     _binary32_parser,
     _binary64_parser,
     _binary_parser,
+    _make_utf_parser('utf8'),
+    _make_utf_parser('utf16'),
+    _make_utf_parser('utf32'),
+    _list_parser,
+    _dictionary_parser,
 ]
 
-def _object_parser(source):
-    for parser in _PARSERS:
-        result = parser(source)
-
-        if result.success:
-            return result
-
-    return _shared._FAILED_PARSE_RESULT
-
 def _parse(parser, source):
     result = parser(source)