"Dual Object Notation" -> "Twofold Object Notation"
authorDavid Kerkeslager <kerkeslager@gmail.com>
Thu, 4 May 2017 02:48:25 +0000 (22:48 -0400)
committerDavid Kerkeslager <kerkeslager@gmail.com>
Thu, 4 May 2017 02:48:25 +0000 (22:48 -0400)
15 files changed:
README.rst
don/__init__.py [deleted file]
don/_shared.py [deleted file]
don/binary.py [deleted file]
don/string.py [deleted file]
don/tags.py [deleted file]
setup.py
test_binary.py
test_string.py
test_tags.py
ton/__init__.py [new file with mode: 0644]
ton/_shared.py [new file with mode: 0644]
ton/binary.py [new file with mode: 0644]
ton/string.py [new file with mode: 0644]
ton/tags.py [new file with mode: 0644]

index 809a325..4a841d6 100644 (file)
@@ -1,2 +1 @@
-# don
-Dual Object Notation
+# TON: Twofold Object Notation
diff --git a/don/__init__.py b/don/__init__.py
deleted file mode 100644 (file)
index 6fa0d33..0000000
+++ /dev/null
@@ -1,11 +0,0 @@
-import collections
-import struct
-
-from don import binary
-from don import string
-
-def binary_to_string(b):
-    return string.serialize(binary.deserialize(b))
-
-def string_to_binary(s):
-    return binary.serialize(string.deserialize(s))
diff --git a/don/_shared.py b/don/_shared.py
deleted file mode 100644 (file)
index 9ce9ca4..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-import collections
-
-from don import tags
-
-ParseResult = collections.namedtuple(
-    'ParseResult',
-    [
-        'success',
-        'value',
-        'remaining',
-    ],
-)
-
-_FAILED_PARSE_RESULT = ParseResult(success = False, value = None, remaining = None)
diff --git a/don/binary.py b/don/binary.py
deleted file mode 100644 (file)
index 79bdff2..0000000
+++ /dev/null
@@ -1,198 +0,0 @@
-import collections
-import struct
-
-from don import tags, _shared
-
-def _binary_serialize_tag_only_type(o):
-    return b''
-
-def _pack_format_string_to_binary_serializer(pfs):
-    def serializer(i):
-        return struct.pack(pfs, i)
-    return serializer
-
-def _encoder_to_binary_serializer(e):
-    def serializer(s):
-        encoded = e(s)
-        return struct.pack('!I', len(encoded)) + encoded
-    return serializer
-
-def _binary_serialize_list(items):
-    # TODO Enforce that items are all the same type
-    items = [tags._tag(i) for i in items]
-
-    if len(items) == 0:
-        item_tag = tags.VOID
-    else:
-        item_tag = items[0].tag
-
-    item_serializer = _BINARY_SERIALIZERS[item_tag]
-    items = [item_serializer(i.value) for i in items]
-    item_length = len(items)
-    items = b''.join(items)
-    byte_length = len(items)
-    return struct.pack('!BII', item_tag, byte_length, item_length) + items
-
-def _serialize_key(o):
-    o = tags.autotag(o)
-    assert o.tag in tags.STRING_TAGS
-    return struct.pack('!B', o.tag) + _BINARY_SERIALIZERS[o.tag](o.value)
-
-def _binary_serialize_dict(d):
-    item_length = 0
-    serialized = b''
-
-    key_serializer = _BINARY_SERIALIZERS[tags.UTF8]
-
-    for key, value in d.items():
-        item_length += 1
-        serialized += _serialize_key(key) + serialize(value)
-
-    byte_length = len(serialized)
-    return struct.pack('!II', byte_length, item_length) + serialized
-
-_BINARY_SERIALIZERS = {
-    tags.VOID: _binary_serialize_tag_only_type,
-    tags.TRUE: _binary_serialize_tag_only_type,
-    tags.FALSE: _binary_serialize_tag_only_type,
-    tags.INT8: _pack_format_string_to_binary_serializer('!b'),
-    tags.INT16: _pack_format_string_to_binary_serializer('!h'),
-    tags.INT32: _pack_format_string_to_binary_serializer('!i'),
-    tags.BINARY: _encoder_to_binary_serializer(lambda b: b),
-    tags.UTF8: _encoder_to_binary_serializer(lambda s: s.encode('utf-8')),
-    tags.UTF16: _encoder_to_binary_serializer(lambda s: s.encode('utf-16')),
-    tags.UTF32: _encoder_to_binary_serializer(lambda s: s.encode('utf-32')),
-    tags.LIST: _binary_serialize_list,
-    tags.DICTIONARY: _binary_serialize_dict,
-}
-
-def serialize(o):
-    o = tags.autotag(o)
-    return struct.pack('!B', o.tag) + _BINARY_SERIALIZERS[o.tag](o.value)
-
-_BYTE_SIZES_TO_UNPACK_FORMATS = {
-    1: '!b',
-    2: '!h',
-    4: '!i',
-    8: '!q',
-}
-
-def make_integer_parser(size_in_bytes):
-    unpack_format = _BYTE_SIZES_TO_UNPACK_FORMATS[size_in_bytes]
-
-    def integer_parser(source):
-        value = struct.unpack(unpack_format, source[:size_in_bytes])[0]
-        remaining = source[size_in_bytes:]
-
-        return _shared.ParseResult(success = True, value = value, remaining = remaining)
-
-    return integer_parser
-
-def binary64_parser(source):
-    return _shared.ParseResult(
-        success = True,
-        value = struct.unpack('!d', source[:8])[0],
-        remaining = source[8:],
-    )
-
-def make_string_parser(decoder):
-    def string_parser(source):
-        length = struct.unpack('!I', source[:4])[0]
-        source = source[4:]
-        return _shared.ParseResult(
-            success = True,
-            value = decoder(source[:length]),
-            remaining = source[length:],
-        )
-
-    return string_parser
-
-def _list_parser(source):
-    tag = source[0]
-    parser = _TAGS_TO_PARSERS[tag]
-
-    source = source[1:]
-    byte_length, items_length = struct.unpack('!II', source[:8])
-    source = source[8:]
-
-    remaining = source[byte_length:]
-    source = source[:byte_length]
-
-    def item_iterator(source):
-        count = 0
-
-        while len(source) > 0:
-            parse_result = parser(source)
-
-            if parse_result.success:
-                count += 1
-                yield parse_result.value
-                source = parse_result.remaining
-
-        assert count == items_length
-    
-    return _shared.ParseResult(
-        success = True,
-        value = item_iterator(source),
-        remaining = remaining,
-    )
-
-def dictionary_parser(source):
-    key_parser = _TAGS_TO_PARSERS[tags.UTF8]
-
-    byte_length, item_length = struct.unpack('!II', source[:8])
-    source = source[8:]
-
-    remaining = source[byte_length:]
-    source = source[:byte_length]
-
-    def kvp_iterator(source):
-        count = 0
-
-        while len(source) > 0:
-            count += 1
-            key_parse_result = key_parser(source)
-            key, source = key_parse_result.value, key_parse_result.remaining
-            value_parse_result = _object_parser(source)
-            value, source = value_parse_result.value, value_parse_result.remaining
-
-            yield key, value
-
-        assert count == item_length
-
-    return _shared.ParseResult(
-        success = True,
-        value = collections.OrderedDict(kvp_iterator(source)),
-        remaining = remaining,
-    )
-
-
-_TAGS_TO_PARSERS = {
-    tags.VOID: lambda r: _shared.ParseResult(True, None, r),
-    tags.TRUE: lambda r: _shared.ParseResult(True, True, r),
-    tags.FALSE: lambda r: _shared.ParseResult(True, False, r),
-    tags.INT8: make_integer_parser(1),
-    tags.INT16: make_integer_parser(2),
-    tags.INT32: make_integer_parser(4),
-    tags.INT64: make_integer_parser(8),
-    tags.BINARY: make_string_parser(lambda b : b),
-    tags.UTF8: make_string_parser(lambda b : b.decode('utf-8')),
-    tags.UTF16: make_string_parser(lambda b : b.decode('utf-16')),
-    tags.UTF32: make_string_parser(lambda b : b.decode('utf-32')),
-    tags.LIST: _list_parser,
-    tags.DICTIONARY: dictionary_parser,
-}
-
-def _object_parser(source):
-    return _TAGS_TO_PARSERS[source[0]](source[1:])
-
-def _parse(parser, source):
-    result = parser(source)
-
-    if result.success and result.remaining == b'':
-        return result.value
-
-    raise Exception('Unparsed trailing bytes: {}'.format(result.remaining))
-
-def deserialize(b):
-    return _parse(_object_parser, b)
diff --git a/don/string.py b/don/string.py
deleted file mode 100644 (file)
index a65a260..0000000
+++ /dev/null
@@ -1,269 +0,0 @@
-import binascii
-import collections
-import functools
-import re
-
-from don import tags, _shared
-
-def _integer_size_to_string_serializer(integer_size):
-    minimum = -(2 ** (integer_size - 1))
-    maximum = 2 ** (integer_size - 1) - 1
-    
-    def serializer(integer):
-        assert minimum <= integer and integer <= maximum
-        return '{}i{}'.format(integer, integer_size)
-
-    return serializer
-
-def _serialize_binary(b):
-    return '"{}"b'.format(binascii.hexlify(b).decode('ascii'))
-
-def _utf_encoding_to_serializer(utf_encoding):
-    def serializer(s):
-        return '"{}"{}'.format(s, utf_encoding)
-
-    return serializer
-
-def _string_serialize_list(l):
-    return '[{}]'.format(', '.join(map(serialize, l)))
-
-def _string_serialize_dictionary(d):
-    def serialize_kvp(kvp):
-        return serialize(kvp[0]) + ': ' + serialize(kvp[1])
-    return '{ ' + ', '.join(map(serialize_kvp, d.items())) + ' }'
-
-_STRING_SERIALIZERS = {
-    tags.VOID: lambda o: 'null',
-    tags.TRUE: lambda o: 'true',
-    tags.FALSE: lambda o: 'false',
-    tags.INT8: _integer_size_to_string_serializer(8),
-    tags.INT16: _integer_size_to_string_serializer(16),
-    tags.INT32: _integer_size_to_string_serializer(32),
-    tags.INT64: _integer_size_to_string_serializer(64),
-    tags.BINARY: _serialize_binary,
-    tags.UTF8: _utf_encoding_to_serializer('utf8'),
-    tags.UTF16: _utf_encoding_to_serializer('utf16'),
-    tags.UTF32: _utf_encoding_to_serializer('utf32'),
-    tags.LIST: _string_serialize_list,
-    tags.DICTIONARY: _string_serialize_dictionary,
-}
-
-def serialize(o):
-    o = tags.autotag(o)
-    
-    return _STRING_SERIALIZERS[o.tag](o.value)
-
-def _consume_leading_whitespace(wrapped_parser):
-    @functools.wraps(wrapped_parser)
-    def parser(s):
-        s = s.lstrip()
-        return wrapped_parser(s)
-
-    return parser
-
-def _make_constant_parser(constant, value):
-    @_consume_leading_whitespace
-    def constant_parser(s):
-        if s.startswith(constant):
-            result = _shared.ParseResult(
-                success = True,
-                value = value,
-                remaining = s[len(constant):],
-            )
-            return result
-
-        return _shared._FAILED_PARSE_RESULT
-
-    return constant_parser
-
-def _make_integer_parser(width):
-    matcher = re.compile(r'(-?\d+)i' + str(width))
-
-    @_consume_leading_whitespace
-    def integer_parser(s):
-        match = matcher.match(s)
-
-        if match:
-            # TODO Validate that the integer is in range
-            return _shared.ParseResult(
-                success = True,
-                value = int(match.group(1)),
-                remaining = s[match.end():],
-            )
-
-        return _shared._FAILED_PARSE_RESULT
-
-    return integer_parser
-
-_BINARY_MATCHER = re.compile(r'"([\da-f]*)"b')
-
-@_consume_leading_whitespace
-def _binary_parser(s):
-    match = _BINARY_MATCHER.match(s)
-
-    if match:
-        return _shared.ParseResult(
-            success = True,
-            value = binascii.unhexlify(match.group(1)),
-            remaining = s[match.end():],
-        )
-
-    return _shared._FAILED_PARSE_RESULT
-
-def _make_utf_parser(encoding):
-    matcher = re.compile(r'"(.*?)"' + encoding)
-
-    @_consume_leading_whitespace
-    def utf_parser(s):
-        match = matcher.match(s)
-
-        if match:
-            return _shared.ParseResult(
-                success = True,
-                value = match.group(1),
-                remaining = s[match.end():],
-            )
-
-        return _shared._FAILED_PARSE_RESULT
-
-    return utf_parser
-
-def _make_consume_constant_parser(constant):
-    @_consume_leading_whitespace
-    def consume_character_parser(s):
-        if s.startswith(constant):
-            return _shared.ParseResult(
-                success = True,
-                value = None,
-                remaining = s[len(constant):],
-            )
-        return _shared._FAILED_PARSE_RESULT
-
-    return consume_character_parser
-
-_consume_comma_parser = _make_consume_constant_parser(',')
-
-def _prefix_with_comma(parser):
-    def wrapped(s):
-        result = _consume_comma_parser(s)
-        if result.success:
-            s = result.remaining
-        else:
-            return _shared._FAILED_PARSE_RESULT
-
-        result = parser(s)
-        if not result.success:
-            raise Exception('Trailing comma before "{}"'.format(s))
-
-        return result
-
-    return wrapped
-
-def _comma_separate_and_wrap(wrapped_parser, start_wrap, end_wrap, typecaster):
-    parser_prefixed_with_comma = _prefix_with_comma(wrapped_parser)
-    start_wrap_parser = _make_consume_constant_parser(start_wrap)
-    end_wrap_parser = _make_consume_constant_parser(end_wrap)
-
-    def parser(s):
-        result = start_wrap_parser(s)
-        if result.success:
-            s = result.remaining
-        else:
-            return _shared._FAILED_PARSE_RESULT
-
-        value = []
-        first = True
-
-        parse_result = wrapped_parser(s)
-
-        while parse_result.success:
-            value.append(parse_result.value)
-            s = parse_result.remaining
-            parse_result = parser_prefixed_with_comma(s)
-
-        result = end_wrap_parser(s)
-        if result.success:
-            s = result.remaining
-        else:
-            return _shared._FAILED_PARSE_RESULT
-
-        return _shared.ParseResult(
-            success = True,
-            value = typecaster(value),
-            remaining = s,
-        )
-
-    return parser
-
-# This uses _PARSERS which has not been defined yet, but is defined here so it can be used in
-# the definition of _list_parser
-def _object_parser(source):
-    for parser in _PARSERS:
-        result = parser(source)
-
-        if result.success:
-            return result
-
-    return _shared._FAILED_PARSE_RESULT
-
-_list_parser = _comma_separate_and_wrap(_object_parser, '[', ']', list)
-
-_consume_colon_parser = _make_consume_constant_parser(':')
-
-def _kvp_parser(s):
-    key_parse_result = _object_parser(s)
-    if key_parse_result.success:
-        s = key_parse_result.remaining
-    else:
-        return _shared._FAILED_PARSE_RESULT
-
-    result = _consume_colon_parser(s)
-    if result.success:
-        s = result.remaining
-    else:
-        return _shared._FAILED_PARSE_RESULT
-
-    value_parse_result = _object_parser(s)
-    if value_parse_result.success:
-        s = value_parse_result.remaining
-    else:
-        return _shared._FAILED_PARSE_RESULT
-
-    return _shared.ParseResult(
-        success = True,
-        value = (key_parse_result.value, value_parse_result.value),
-        remaining = s,
-    )
-
-_dictionary_parser = _comma_separate_and_wrap(_kvp_parser, '{', '}', collections.OrderedDict)
-
-
-_PARSERS = [
-    _make_constant_parser('null', None),
-    _make_constant_parser('true', True),
-    _make_constant_parser('false', False),
-    _make_integer_parser(8),
-    _make_integer_parser(16),
-    _make_integer_parser(32),
-    _make_integer_parser(64),
-    _binary_parser,
-    _make_utf_parser('utf8'),
-    _make_utf_parser('utf16'),
-    _make_utf_parser('utf32'),
-    _list_parser,
-    _dictionary_parser,
-]
-
-def _parse(parser, source):
-    result = parser(source)
-
-    if result.success:
-        if result.remaining.strip() == '':
-            return result.value
-
-        raise Exception('Unparsed trailing characters: "{}"'.format(result.remaining))
-
-    raise Exception('Unable to parse: "{}"'.format(source))
-
-def deserialize(s):
-    return _parse(_object_parser, s)
diff --git a/don/tags.py b/don/tags.py
deleted file mode 100644 (file)
index db54dfe..0000000
+++ /dev/null
@@ -1,138 +0,0 @@
-import collections
-
-VOID = 0x00
-TRUE = 0x01
-FALSE = 0x02
-BOOL = (TRUE, FALSE)
-INT8 = 0x10
-INT16 = 0x11
-INT32 = 0x12
-INT64 = 0x13
-# These are to be supported in the future
-# FLOAT = 0x20
-# DOUBLE = 0x21
-BINARY = 0x30
-UTF8 = 0x31
-UTF16 = 0x32
-UTF32 = 0x33
-LIST = 0x40
-DICTIONARY = 0x41
-
-STRING_TAGS = set([UTF8, UTF16, UTF32])
-
-DEFAULT_INTEGER_ENCODING = INT32
-DEFAULT_STRING_ENCODING = UTF8
-
-TaggedObject = collections.namedtuple('TaggedObject', ['tag', 'value'])
-
-_TYPES_TO_TAGS = {
-    int: DEFAULT_INTEGER_ENCODING,
-    bytes: BINARY,
-    str: DEFAULT_STRING_ENCODING,
-    list: LIST,
-    dict: DICTIONARY,
-    collections.OrderedDict: DICTIONARY,
-}
-
-def _tag(o):
-    if isinstance(o, TaggedObject):
-        return o
-
-    if o is None:
-        return TaggedObject(tag = VOID, value = o)
-
-    if o is True:
-        return TaggedObject(tag = TRUE, value = o)
-
-    if o is False:
-        return TaggedObject(tag = FALSE, value = o)
-
-    return TaggedObject(tag = _TYPES_TO_TAGS[type(o)], value = o)
-
-_NONE = TaggedObject(tag = VOID, value = None)
-_TRUE = TaggedObject(tag = TRUE, value = True)
-_FALSE = TaggedObject(tag = FALSE, value = False)
-
-_TAGS_TO_IN_RANGE_PREDICATES = collections.OrderedDict([
-    (INT8,  lambda i: -128 <= i and i <= 127),
-    (INT16, lambda i: -32768 <= i and i <= 32767),
-    (INT32, lambda i: -2147483648 <= i and i <= 2147483647),
-    (INT64, lambda i: -9223372036854775808 <= i and i <= 9223372036854775807),
-])
-
-class TooWideError(Exception):
-    pass
-
-SMALLEST = object()
-
-def autotag(o, **kwargs):
-    preferred_integer_tag = kwargs.pop('preferred_integer_tag', DEFAULT_INTEGER_ENCODING)
-    preferred_string_tag = kwargs.pop('preferred_string_tag', DEFAULT_STRING_ENCODING)
-
-    if kwargs:
-        raise TypeError("autotag() got an unexpected keyword argument '{}'".format(
-            list(kwargs.keys())[0],
-        ))
-
-    if isinstance(o, TaggedObject):
-        return o
-
-    if o is None:
-        return _NONE
-
-    if o is True:
-        return _TRUE
-
-    if o is False:
-        return _FALSE
-
-    if isinstance(o, int):
-        if preferred_integer_tag is not SMALLEST and _TAGS_TO_IN_RANGE_PREDICATES[preferred_integer_tag](o):
-            return TaggedObject(tag = preferred_integer_tag, value = o)
-
-        else:
-            for tag, in_range_predicate in _TAGS_TO_IN_RANGE_PREDICATES.items():
-                if in_range_predicate(o):
-                    return TaggedObject(tag = tag, value = o)
-
-            raise TooWideError("Integer {} is too wide to be serialized")
-
-    if isinstance(o, str):
-        # TODO Support SMALLEST for preferred string tag
-        return TaggedObject(tag = preferred_string_tag, value = o)
-
-    if isinstance(o, bytes):
-        return TaggedObject(tag = BINARY, value = o)
-
-    if isinstance(o, list):
-        return TaggedObject(
-            tag = LIST,
-            value = [
-                autotag(
-                    i,
-                    preferred_integer_tag = preferred_integer_tag,
-                    preferred_string_tag = preferred_string_tag,
-                ) for i in o
-            ],
-        )
-
-    if isinstance(o, dict):
-        return TaggedObject(
-            tag = DICTIONARY,
-            value = collections.OrderedDict([
-                (
-                    autotag(
-                        key,
-                        preferred_integer_tag = preferred_integer_tag,
-                        preferred_string_tag = preferred_string_tag,
-                    ),
-                    autotag(
-                        value,
-                        preferred_integer_tag = preferred_integer_tag,
-                        preferred_string_tag = preferred_string_tag,
-                    ),
-                ) for key, value in o.items()
-            ]),
-        )
-
-    raise Exception('Unsupported type {}'.format(type(o)))
index 158bf7a..01dffee 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -9,11 +9,11 @@ with open(path.join(here, 'README.rst'), encoding='utf-8') as f:
     long_description = f.read()
 
 setup(
-    name='don',
+    name='ton',
     version='0.0.1',
     description='A dual-format serialization library and reference implementation for Dual Object Notation',
     long_description=long_description,
-    url='https://github.com/kerkeslager/don',
+    url='https://github.com/kerkeslager/ton',
     author='David Kerkeslager',
     author_email='',
     license='GPL-3.0',
index b75a3d7..932a030 100644 (file)
@@ -2,7 +2,7 @@
 import collections
 import unittest
 
-from don import binary, tags
+from ton import binary, tags
 
 class TestBinarySerialize(unittest.TestCase):
     def test_serializes_null(self):
index 3a74fdb..7d235de 100644 (file)
@@ -1,7 +1,7 @@
 import collections
 import unittest
 
-from don import string, tags
+from ton import string, tags
 
 class TestStringSerialize(unittest.TestCase):
     def test_serializes_null(self):
index ef48a37..a6afcaf 100644 (file)
@@ -1,7 +1,7 @@
 import collections
 import unittest
 
-from don import tags
+from ton import tags
 
 class AutoTagTests(unittest.TestCase):
     def test_autotags_void(self):
diff --git a/ton/__init__.py b/ton/__init__.py
new file mode 100644 (file)
index 0000000..157ec10
--- /dev/null
@@ -0,0 +1,10 @@
+import collections
+import struct
+
+from ton import binary, string
+
+def binary_to_string(b):
+    return string.serialize(binary.deserialize(b))
+
+def string_to_binary(s):
+    return binary.serialize(string.deserialize(s))
diff --git a/ton/_shared.py b/ton/_shared.py
new file mode 100644 (file)
index 0000000..c0ce786
--- /dev/null
@@ -0,0 +1,14 @@
+import collections
+
+from ton import tags
+
+ParseResult = collections.namedtuple(
+    'ParseResult',
+    [
+        'success',
+        'value',
+        'remaining',
+    ],
+)
+
+_FAILED_PARSE_RESULT = ParseResult(success = False, value = None, remaining = None)
diff --git a/ton/binary.py b/ton/binary.py
new file mode 100644 (file)
index 0000000..08f541a
--- /dev/null
@@ -0,0 +1,198 @@
+import collections
+import struct
+
+from ton import tags, _shared
+
+def _binary_serialize_tag_only_type(o):
+    return b''
+
+def _pack_format_string_to_binary_serializer(pfs):
+    def serializer(i):
+        return struct.pack(pfs, i)
+    return serializer
+
+def _encoder_to_binary_serializer(e):
+    def serializer(s):
+        encoded = e(s)
+        return struct.pack('!I', len(encoded)) + encoded
+    return serializer
+
+def _binary_serialize_list(items):
+    # TODO Enforce that items are all the same type
+    items = [tags._tag(i) for i in items]
+
+    if len(items) == 0:
+        item_tag = tags.VOID
+    else:
+        item_tag = items[0].tag
+
+    item_serializer = _BINARY_SERIALIZERS[item_tag]
+    items = [item_serializer(i.value) for i in items]
+    item_length = len(items)
+    items = b''.join(items)
+    byte_length = len(items)
+    return struct.pack('!BII', item_tag, byte_length, item_length) + items
+
+def _serialize_key(o):
+    o = tags.autotag(o)
+    assert o.tag in tags.STRING_TAGS
+    return struct.pack('!B', o.tag) + _BINARY_SERIALIZERS[o.tag](o.value)
+
+def _binary_serialize_dict(d):
+    item_length = 0
+    serialized = b''
+
+    key_serializer = _BINARY_SERIALIZERS[tags.UTF8]
+
+    for key, value in d.items():
+        item_length += 1
+        serialized += _serialize_key(key) + serialize(value)
+
+    byte_length = len(serialized)
+    return struct.pack('!II', byte_length, item_length) + serialized
+
+_BINARY_SERIALIZERS = {
+    tags.VOID: _binary_serialize_tag_only_type,
+    tags.TRUE: _binary_serialize_tag_only_type,
+    tags.FALSE: _binary_serialize_tag_only_type,
+    tags.INT8: _pack_format_string_to_binary_serializer('!b'),
+    tags.INT16: _pack_format_string_to_binary_serializer('!h'),
+    tags.INT32: _pack_format_string_to_binary_serializer('!i'),
+    tags.BINARY: _encoder_to_binary_serializer(lambda b: b),
+    tags.UTF8: _encoder_to_binary_serializer(lambda s: s.encode('utf-8')),
+    tags.UTF16: _encoder_to_binary_serializer(lambda s: s.encode('utf-16')),
+    tags.UTF32: _encoder_to_binary_serializer(lambda s: s.encode('utf-32')),
+    tags.LIST: _binary_serialize_list,
+    tags.DICTIONARY: _binary_serialize_dict,
+}
+
+def serialize(o):
+    o = tags.autotag(o)
+    return struct.pack('!B', o.tag) + _BINARY_SERIALIZERS[o.tag](o.value)
+
+_BYTE_SIZES_TO_UNPACK_FORMATS = {
+    1: '!b',
+    2: '!h',
+    4: '!i',
+    8: '!q',
+}
+
+def make_integer_parser(size_in_bytes):
+    unpack_format = _BYTE_SIZES_TO_UNPACK_FORMATS[size_in_bytes]
+
+    def integer_parser(source):
+        value = struct.unpack(unpack_format, source[:size_in_bytes])[0]
+        remaining = source[size_in_bytes:]
+
+        return _shared.ParseResult(success = True, value = value, remaining = remaining)
+
+    return integer_parser
+
+def binary64_parser(source):
+    return _shared.ParseResult(
+        success = True,
+        value = struct.unpack('!d', source[:8])[0],
+        remaining = source[8:],
+    )
+
+def make_string_parser(decoder):
+    def string_parser(source):
+        length = struct.unpack('!I', source[:4])[0]
+        source = source[4:]
+        return _shared.ParseResult(
+            success = True,
+            value = decoder(source[:length]),
+            remaining = source[length:],
+        )
+
+    return string_parser
+
+def _list_parser(source):
+    tag = source[0]
+    parser = _TAGS_TO_PARSERS[tag]
+
+    source = source[1:]
+    byte_length, items_length = struct.unpack('!II', source[:8])
+    source = source[8:]
+
+    remaining = source[byte_length:]
+    source = source[:byte_length]
+
+    def item_iterator(source):
+        count = 0
+
+        while len(source) > 0:
+            parse_result = parser(source)
+
+            if parse_result.success:
+                count += 1
+                yield parse_result.value
+                source = parse_result.remaining
+
+        assert count == items_length
+    
+    return _shared.ParseResult(
+        success = True,
+        value = item_iterator(source),
+        remaining = remaining,
+    )
+
+def dictionary_parser(source):
+    key_parser = _TAGS_TO_PARSERS[tags.UTF8]
+
+    byte_length, item_length = struct.unpack('!II', source[:8])
+    source = source[8:]
+
+    remaining = source[byte_length:]
+    source = source[:byte_length]
+
+    def kvp_iterator(source):
+        count = 0
+
+        while len(source) > 0:
+            count += 1
+            key_parse_result = key_parser(source)
+            key, source = key_parse_result.value, key_parse_result.remaining
+            value_parse_result = _object_parser(source)
+            value, source = value_parse_result.value, value_parse_result.remaining
+
+            yield key, value
+
+        assert count == item_length
+
+    return _shared.ParseResult(
+        success = True,
+        value = collections.OrderedDict(kvp_iterator(source)),
+        remaining = remaining,
+    )
+
+
+_TAGS_TO_PARSERS = {
+    tags.VOID: lambda r: _shared.ParseResult(True, None, r),
+    tags.TRUE: lambda r: _shared.ParseResult(True, True, r),
+    tags.FALSE: lambda r: _shared.ParseResult(True, False, r),
+    tags.INT8: make_integer_parser(1),
+    tags.INT16: make_integer_parser(2),
+    tags.INT32: make_integer_parser(4),
+    tags.INT64: make_integer_parser(8),
+    tags.BINARY: make_string_parser(lambda b : b),
+    tags.UTF8: make_string_parser(lambda b : b.decode('utf-8')),
+    tags.UTF16: make_string_parser(lambda b : b.decode('utf-16')),
+    tags.UTF32: make_string_parser(lambda b : b.decode('utf-32')),
+    tags.LIST: _list_parser,
+    tags.DICTIONARY: dictionary_parser,
+}
+
+def _object_parser(source):
+    return _TAGS_TO_PARSERS[source[0]](source[1:])
+
+def _parse(parser, source):
+    result = parser(source)
+
+    if result.success and result.remaining == b'':
+        return result.value
+
+    raise Exception('Unparsed trailing bytes: {}'.format(result.remaining))
+
+def deserialize(b):
+    return _parse(_object_parser, b)
diff --git a/ton/string.py b/ton/string.py
new file mode 100644 (file)
index 0000000..7cec429
--- /dev/null
@@ -0,0 +1,269 @@
+import binascii
+import collections
+import functools
+import re
+
+from ton import tags, _shared
+
+def _integer_size_to_string_serializer(integer_size):
+    minimum = -(2 ** (integer_size - 1))
+    maximum = 2 ** (integer_size - 1) - 1
+    
+    def serializer(integer):
+        assert minimum <= integer and integer <= maximum
+        return '{}i{}'.format(integer, integer_size)
+
+    return serializer
+
+def _serialize_binary(b):
+    return '"{}"b'.format(binascii.hexlify(b).decode('ascii'))
+
+def _utf_encoding_to_serializer(utf_encoding):
+    def serializer(s):
+        return '"{}"{}'.format(s, utf_encoding)
+
+    return serializer
+
+def _string_serialize_list(l):
+    return '[{}]'.format(', '.join(map(serialize, l)))
+
+def _string_serialize_dictionary(d):
+    def serialize_kvp(kvp):
+        return serialize(kvp[0]) + ': ' + serialize(kvp[1])
+    return '{ ' + ', '.join(map(serialize_kvp, d.items())) + ' }'
+
+_STRING_SERIALIZERS = {
+    tags.VOID: lambda o: 'null',
+    tags.TRUE: lambda o: 'true',
+    tags.FALSE: lambda o: 'false',
+    tags.INT8: _integer_size_to_string_serializer(8),
+    tags.INT16: _integer_size_to_string_serializer(16),
+    tags.INT32: _integer_size_to_string_serializer(32),
+    tags.INT64: _integer_size_to_string_serializer(64),
+    tags.BINARY: _serialize_binary,
+    tags.UTF8: _utf_encoding_to_serializer('utf8'),
+    tags.UTF16: _utf_encoding_to_serializer('utf16'),
+    tags.UTF32: _utf_encoding_to_serializer('utf32'),
+    tags.LIST: _string_serialize_list,
+    tags.DICTIONARY: _string_serialize_dictionary,
+}
+
+def serialize(o):
+    o = tags.autotag(o)
+    
+    return _STRING_SERIALIZERS[o.tag](o.value)
+
+def _consume_leading_whitespace(wrapped_parser):
+    @functools.wraps(wrapped_parser)
+    def parser(s):
+        s = s.lstrip()
+        return wrapped_parser(s)
+
+    return parser
+
+def _make_constant_parser(constant, value):
+    @_consume_leading_whitespace
+    def constant_parser(s):
+        if s.startswith(constant):
+            result = _shared.ParseResult(
+                success = True,
+                value = value,
+                remaining = s[len(constant):],
+            )
+            return result
+
+        return _shared._FAILED_PARSE_RESULT
+
+    return constant_parser
+
+def _make_integer_parser(width):
+    matcher = re.compile(r'(-?\d+)i' + str(width))
+
+    @_consume_leading_whitespace
+    def integer_parser(s):
+        match = matcher.match(s)
+
+        if match:
+            # TODO Validate that the integer is in range
+            return _shared.ParseResult(
+                success = True,
+                value = int(match.group(1)),
+                remaining = s[match.end():],
+            )
+
+        return _shared._FAILED_PARSE_RESULT
+
+    return integer_parser
+
+_BINARY_MATCHER = re.compile(r'"([\da-f]*)"b')
+
+@_consume_leading_whitespace
+def _binary_parser(s):
+    match = _BINARY_MATCHER.match(s)
+
+    if match:
+        return _shared.ParseResult(
+            success = True,
+            value = binascii.unhexlify(match.group(1)),
+            remaining = s[match.end():],
+        )
+
+    return _shared._FAILED_PARSE_RESULT
+
+def _make_utf_parser(encoding):
+    matcher = re.compile(r'"(.*?)"' + encoding)
+
+    @_consume_leading_whitespace
+    def utf_parser(s):
+        match = matcher.match(s)
+
+        if match:
+            return _shared.ParseResult(
+                success = True,
+                value = match.group(1),
+                remaining = s[match.end():],
+            )
+
+        return _shared._FAILED_PARSE_RESULT
+
+    return utf_parser
+
+def _make_consume_constant_parser(constant):
+    @_consume_leading_whitespace
+    def consume_character_parser(s):
+        if s.startswith(constant):
+            return _shared.ParseResult(
+                success = True,
+                value = None,
+                remaining = s[len(constant):],
+            )
+        return _shared._FAILED_PARSE_RESULT
+
+    return consume_character_parser
+
+_consume_comma_parser = _make_consume_constant_parser(',')
+
+def _prefix_with_comma(parser):
+    def wrapped(s):
+        result = _consume_comma_parser(s)
+        if result.success:
+            s = result.remaining
+        else:
+            return _shared._FAILED_PARSE_RESULT
+
+        result = parser(s)
+        if not result.success:
+            raise Exception('Trailing comma before "{}"'.format(s))
+
+        return result
+
+    return wrapped
+
+def _comma_separate_and_wrap(wrapped_parser, start_wrap, end_wrap, typecaster):
+    parser_prefixed_with_comma = _prefix_with_comma(wrapped_parser)
+    start_wrap_parser = _make_consume_constant_parser(start_wrap)
+    end_wrap_parser = _make_consume_constant_parser(end_wrap)
+
+    def parser(s):
+        result = start_wrap_parser(s)
+        if result.success:
+            s = result.remaining
+        else:
+            return _shared._FAILED_PARSE_RESULT
+
+        value = []
+        first = True
+
+        parse_result = wrapped_parser(s)
+
+        while parse_result.success:
+            value.append(parse_result.value)
+            s = parse_result.remaining
+            parse_result = parser_prefixed_with_comma(s)
+
+        result = end_wrap_parser(s)
+        if result.success:
+            s = result.remaining
+        else:
+            return _shared._FAILED_PARSE_RESULT
+
+        return _shared.ParseResult(
+            success = True,
+            value = typecaster(value),
+            remaining = s,
+        )
+
+    return parser
+
+# This uses _PARSERS which has not been defined yet, but is defined here so it can be used in
+# the definition of _list_parser
+def _object_parser(source):
+    for parser in _PARSERS:
+        result = parser(source)
+
+        if result.success:
+            return result
+
+    return _shared._FAILED_PARSE_RESULT
+
+_list_parser = _comma_separate_and_wrap(_object_parser, '[', ']', list)
+
+_consume_colon_parser = _make_consume_constant_parser(':')
+
+def _kvp_parser(s):
+    key_parse_result = _object_parser(s)
+    if key_parse_result.success:
+        s = key_parse_result.remaining
+    else:
+        return _shared._FAILED_PARSE_RESULT
+
+    result = _consume_colon_parser(s)
+    if result.success:
+        s = result.remaining
+    else:
+        return _shared._FAILED_PARSE_RESULT
+
+    value_parse_result = _object_parser(s)
+    if value_parse_result.success:
+        s = value_parse_result.remaining
+    else:
+        return _shared._FAILED_PARSE_RESULT
+
+    return _shared.ParseResult(
+        success = True,
+        value = (key_parse_result.value, value_parse_result.value),
+        remaining = s,
+    )
+
+_dictionary_parser = _comma_separate_and_wrap(_kvp_parser, '{', '}', collections.OrderedDict)
+
+
+_PARSERS = [
+    _make_constant_parser('null', None),
+    _make_constant_parser('true', True),
+    _make_constant_parser('false', False),
+    _make_integer_parser(8),
+    _make_integer_parser(16),
+    _make_integer_parser(32),
+    _make_integer_parser(64),
+    _binary_parser,
+    _make_utf_parser('utf8'),
+    _make_utf_parser('utf16'),
+    _make_utf_parser('utf32'),
+    _list_parser,
+    _dictionary_parser,
+]
+
+def _parse(parser, source):
+    result = parser(source)
+
+    if result.success:
+        if result.remaining.strip() == '':
+            return result.value
+
+        raise Exception('Unparsed trailing characters: "{}"'.format(result.remaining))
+
+    raise Exception('Unable to parse: "{}"'.format(source))
+
+def deserialize(s):
+    return _parse(_object_parser, s)
diff --git a/ton/tags.py b/ton/tags.py
new file mode 100644 (file)
index 0000000..db54dfe
--- /dev/null
@@ -0,0 +1,138 @@
+import collections
+
+VOID = 0x00
+TRUE = 0x01
+FALSE = 0x02
+BOOL = (TRUE, FALSE)
+INT8 = 0x10
+INT16 = 0x11
+INT32 = 0x12
+INT64 = 0x13
+# These are to be supported in the future
+# FLOAT = 0x20
+# DOUBLE = 0x21
+BINARY = 0x30
+UTF8 = 0x31
+UTF16 = 0x32
+UTF32 = 0x33
+LIST = 0x40
+DICTIONARY = 0x41
+
+STRING_TAGS = set([UTF8, UTF16, UTF32])
+
+DEFAULT_INTEGER_ENCODING = INT32
+DEFAULT_STRING_ENCODING = UTF8
+
+TaggedObject = collections.namedtuple('TaggedObject', ['tag', 'value'])
+
+_TYPES_TO_TAGS = {
+    int: DEFAULT_INTEGER_ENCODING,
+    bytes: BINARY,
+    str: DEFAULT_STRING_ENCODING,
+    list: LIST,
+    dict: DICTIONARY,
+    collections.OrderedDict: DICTIONARY,
+}
+
+def _tag(o):
+    if isinstance(o, TaggedObject):
+        return o
+
+    if o is None:
+        return TaggedObject(tag = VOID, value = o)
+
+    if o is True:
+        return TaggedObject(tag = TRUE, value = o)
+
+    if o is False:
+        return TaggedObject(tag = FALSE, value = o)
+
+    return TaggedObject(tag = _TYPES_TO_TAGS[type(o)], value = o)
+
+_NONE = TaggedObject(tag = VOID, value = None)
+_TRUE = TaggedObject(tag = TRUE, value = True)
+_FALSE = TaggedObject(tag = FALSE, value = False)
+
+_TAGS_TO_IN_RANGE_PREDICATES = collections.OrderedDict([
+    (INT8,  lambda i: -128 <= i and i <= 127),
+    (INT16, lambda i: -32768 <= i and i <= 32767),
+    (INT32, lambda i: -2147483648 <= i and i <= 2147483647),
+    (INT64, lambda i: -9223372036854775808 <= i and i <= 9223372036854775807),
+])
+
+class TooWideError(Exception):
+    pass
+
+SMALLEST = object()
+
+def autotag(o, **kwargs):
+    preferred_integer_tag = kwargs.pop('preferred_integer_tag', DEFAULT_INTEGER_ENCODING)
+    preferred_string_tag = kwargs.pop('preferred_string_tag', DEFAULT_STRING_ENCODING)
+
+    if kwargs:
+        raise TypeError("autotag() got an unexpected keyword argument '{}'".format(
+            list(kwargs.keys())[0],
+        ))
+
+    if isinstance(o, TaggedObject):
+        return o
+
+    if o is None:
+        return _NONE
+
+    if o is True:
+        return _TRUE
+
+    if o is False:
+        return _FALSE
+
+    if isinstance(o, int):
+        if preferred_integer_tag is not SMALLEST and _TAGS_TO_IN_RANGE_PREDICATES[preferred_integer_tag](o):
+            return TaggedObject(tag = preferred_integer_tag, value = o)
+
+        else:
+            for tag, in_range_predicate in _TAGS_TO_IN_RANGE_PREDICATES.items():
+                if in_range_predicate(o):
+                    return TaggedObject(tag = tag, value = o)
+
+            raise TooWideError("Integer {} is too wide to be serialized")
+
+    if isinstance(o, str):
+        # TODO Support SMALLEST for preferred string tag
+        return TaggedObject(tag = preferred_string_tag, value = o)
+
+    if isinstance(o, bytes):
+        return TaggedObject(tag = BINARY, value = o)
+
+    if isinstance(o, list):
+        return TaggedObject(
+            tag = LIST,
+            value = [
+                autotag(
+                    i,
+                    preferred_integer_tag = preferred_integer_tag,
+                    preferred_string_tag = preferred_string_tag,
+                ) for i in o
+            ],
+        )
+
+    if isinstance(o, dict):
+        return TaggedObject(
+            tag = DICTIONARY,
+            value = collections.OrderedDict([
+                (
+                    autotag(
+                        key,
+                        preferred_integer_tag = preferred_integer_tag,
+                        preferred_string_tag = preferred_string_tag,
+                    ),
+                    autotag(
+                        value,
+                        preferred_integer_tag = preferred_integer_tag,
+                        preferred_string_tag = preferred_string_tag,
+                    ),
+                ) for key, value in o.items()
+            ]),
+        )
+
+    raise Exception('Unsupported type {}'.format(type(o)))