From: David Kerkeslager Date: Thu, 4 May 2017 02:48:25 +0000 (-0400) Subject: "Dual Object Notation" -> "Twofold Object Notation" X-Git-Url: https://code.kerkeslager.com/?p=ton;a=commitdiff_plain;h=6102d28307a3efe75f1d9cb6b317c37eaa0d0907 "Dual Object Notation" -> "Twofold Object Notation" --- diff --git a/README.rst b/README.rst index 809a325..4a841d6 100644 --- a/README.rst +++ b/README.rst @@ -1,2 +1 @@ -# don -Dual Object Notation +# TON: Twofold Object Notation diff --git a/don/__init__.py b/don/__init__.py deleted file mode 100644 index 6fa0d33..0000000 --- a/don/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -import collections -import struct - -from don import binary -from don import string - -def binary_to_string(b): - return string.serialize(binary.deserialize(b)) - -def string_to_binary(s): - return binary.serialize(string.deserialize(s)) diff --git a/don/_shared.py b/don/_shared.py deleted file mode 100644 index 9ce9ca4..0000000 --- a/don/_shared.py +++ /dev/null @@ -1,14 +0,0 @@ -import collections - -from don import tags - -ParseResult = collections.namedtuple( - 'ParseResult', - [ - 'success', - 'value', - 'remaining', - ], -) - -_FAILED_PARSE_RESULT = ParseResult(success = False, value = None, remaining = None) diff --git a/don/binary.py b/don/binary.py deleted file mode 100644 index 79bdff2..0000000 --- a/don/binary.py +++ /dev/null @@ -1,198 +0,0 @@ -import collections -import struct - -from don import tags, _shared - -def _binary_serialize_tag_only_type(o): - return b'' - -def _pack_format_string_to_binary_serializer(pfs): - def serializer(i): - return struct.pack(pfs, i) - return serializer - -def _encoder_to_binary_serializer(e): - def serializer(s): - encoded = e(s) - return struct.pack('!I', len(encoded)) + encoded - return serializer - -def _binary_serialize_list(items): - # TODO Enforce that items are all the same type - items = [tags._tag(i) for i in items] - - if len(items) == 0: - item_tag = tags.VOID - else: - item_tag = items[0].tag - - item_serializer = _BINARY_SERIALIZERS[item_tag] - items = [item_serializer(i.value) for i in items] - item_length = len(items) - items = b''.join(items) - byte_length = len(items) - return struct.pack('!BII', item_tag, byte_length, item_length) + items - -def _serialize_key(o): - o = tags.autotag(o) - assert o.tag in tags.STRING_TAGS - return struct.pack('!B', o.tag) + _BINARY_SERIALIZERS[o.tag](o.value) - -def _binary_serialize_dict(d): - item_length = 0 - serialized = b'' - - key_serializer = _BINARY_SERIALIZERS[tags.UTF8] - - for key, value in d.items(): - item_length += 1 - serialized += _serialize_key(key) + serialize(value) - - byte_length = len(serialized) - return struct.pack('!II', byte_length, item_length) + serialized - -_BINARY_SERIALIZERS = { - tags.VOID: _binary_serialize_tag_only_type, - tags.TRUE: _binary_serialize_tag_only_type, - tags.FALSE: _binary_serialize_tag_only_type, - tags.INT8: _pack_format_string_to_binary_serializer('!b'), - tags.INT16: _pack_format_string_to_binary_serializer('!h'), - tags.INT32: _pack_format_string_to_binary_serializer('!i'), - tags.BINARY: _encoder_to_binary_serializer(lambda b: b), - tags.UTF8: _encoder_to_binary_serializer(lambda s: s.encode('utf-8')), - tags.UTF16: _encoder_to_binary_serializer(lambda s: s.encode('utf-16')), - tags.UTF32: _encoder_to_binary_serializer(lambda s: s.encode('utf-32')), - tags.LIST: _binary_serialize_list, - tags.DICTIONARY: _binary_serialize_dict, -} - -def serialize(o): - o = tags.autotag(o) - return struct.pack('!B', o.tag) + _BINARY_SERIALIZERS[o.tag](o.value) - -_BYTE_SIZES_TO_UNPACK_FORMATS = { - 1: '!b', - 2: '!h', - 4: '!i', - 8: '!q', -} - -def make_integer_parser(size_in_bytes): - unpack_format = _BYTE_SIZES_TO_UNPACK_FORMATS[size_in_bytes] - - def integer_parser(source): - value = struct.unpack(unpack_format, source[:size_in_bytes])[0] - remaining = source[size_in_bytes:] - - return _shared.ParseResult(success = True, value = value, remaining = remaining) - - return integer_parser - -def binary64_parser(source): - return _shared.ParseResult( - success = True, - value = struct.unpack('!d', source[:8])[0], - remaining = source[8:], - ) - -def make_string_parser(decoder): - def string_parser(source): - length = struct.unpack('!I', source[:4])[0] - source = source[4:] - return _shared.ParseResult( - success = True, - value = decoder(source[:length]), - remaining = source[length:], - ) - - return string_parser - -def _list_parser(source): - tag = source[0] - parser = _TAGS_TO_PARSERS[tag] - - source = source[1:] - byte_length, items_length = struct.unpack('!II', source[:8]) - source = source[8:] - - remaining = source[byte_length:] - source = source[:byte_length] - - def item_iterator(source): - count = 0 - - while len(source) > 0: - parse_result = parser(source) - - if parse_result.success: - count += 1 - yield parse_result.value - source = parse_result.remaining - - assert count == items_length - - return _shared.ParseResult( - success = True, - value = item_iterator(source), - remaining = remaining, - ) - -def dictionary_parser(source): - key_parser = _TAGS_TO_PARSERS[tags.UTF8] - - byte_length, item_length = struct.unpack('!II', source[:8]) - source = source[8:] - - remaining = source[byte_length:] - source = source[:byte_length] - - def kvp_iterator(source): - count = 0 - - while len(source) > 0: - count += 1 - key_parse_result = key_parser(source) - key, source = key_parse_result.value, key_parse_result.remaining - value_parse_result = _object_parser(source) - value, source = value_parse_result.value, value_parse_result.remaining - - yield key, value - - assert count == item_length - - return _shared.ParseResult( - success = True, - value = collections.OrderedDict(kvp_iterator(source)), - remaining = remaining, - ) - - -_TAGS_TO_PARSERS = { - tags.VOID: lambda r: _shared.ParseResult(True, None, r), - tags.TRUE: lambda r: _shared.ParseResult(True, True, r), - tags.FALSE: lambda r: _shared.ParseResult(True, False, r), - tags.INT8: make_integer_parser(1), - tags.INT16: make_integer_parser(2), - tags.INT32: make_integer_parser(4), - tags.INT64: make_integer_parser(8), - tags.BINARY: make_string_parser(lambda b : b), - tags.UTF8: make_string_parser(lambda b : b.decode('utf-8')), - tags.UTF16: make_string_parser(lambda b : b.decode('utf-16')), - tags.UTF32: make_string_parser(lambda b : b.decode('utf-32')), - tags.LIST: _list_parser, - tags.DICTIONARY: dictionary_parser, -} - -def _object_parser(source): - return _TAGS_TO_PARSERS[source[0]](source[1:]) - -def _parse(parser, source): - result = parser(source) - - if result.success and result.remaining == b'': - return result.value - - raise Exception('Unparsed trailing bytes: {}'.format(result.remaining)) - -def deserialize(b): - return _parse(_object_parser, b) diff --git a/don/string.py b/don/string.py deleted file mode 100644 index a65a260..0000000 --- a/don/string.py +++ /dev/null @@ -1,269 +0,0 @@ -import binascii -import collections -import functools -import re - -from don import tags, _shared - -def _integer_size_to_string_serializer(integer_size): - minimum = -(2 ** (integer_size - 1)) - maximum = 2 ** (integer_size - 1) - 1 - - def serializer(integer): - assert minimum <= integer and integer <= maximum - return '{}i{}'.format(integer, integer_size) - - return serializer - -def _serialize_binary(b): - return '"{}"b'.format(binascii.hexlify(b).decode('ascii')) - -def _utf_encoding_to_serializer(utf_encoding): - def serializer(s): - return '"{}"{}'.format(s, utf_encoding) - - return serializer - -def _string_serialize_list(l): - return '[{}]'.format(', '.join(map(serialize, l))) - -def _string_serialize_dictionary(d): - def serialize_kvp(kvp): - return serialize(kvp[0]) + ': ' + serialize(kvp[1]) - return '{ ' + ', '.join(map(serialize_kvp, d.items())) + ' }' - -_STRING_SERIALIZERS = { - tags.VOID: lambda o: 'null', - tags.TRUE: lambda o: 'true', - tags.FALSE: lambda o: 'false', - tags.INT8: _integer_size_to_string_serializer(8), - tags.INT16: _integer_size_to_string_serializer(16), - tags.INT32: _integer_size_to_string_serializer(32), - tags.INT64: _integer_size_to_string_serializer(64), - tags.BINARY: _serialize_binary, - tags.UTF8: _utf_encoding_to_serializer('utf8'), - tags.UTF16: _utf_encoding_to_serializer('utf16'), - tags.UTF32: _utf_encoding_to_serializer('utf32'), - tags.LIST: _string_serialize_list, - tags.DICTIONARY: _string_serialize_dictionary, -} - -def serialize(o): - o = tags.autotag(o) - - return _STRING_SERIALIZERS[o.tag](o.value) - -def _consume_leading_whitespace(wrapped_parser): - @functools.wraps(wrapped_parser) - def parser(s): - s = s.lstrip() - return wrapped_parser(s) - - return parser - -def _make_constant_parser(constant, value): - @_consume_leading_whitespace - def constant_parser(s): - if s.startswith(constant): - result = _shared.ParseResult( - success = True, - value = value, - remaining = s[len(constant):], - ) - return result - - return _shared._FAILED_PARSE_RESULT - - return constant_parser - -def _make_integer_parser(width): - matcher = re.compile(r'(-?\d+)i' + str(width)) - - @_consume_leading_whitespace - def integer_parser(s): - match = matcher.match(s) - - if match: - # TODO Validate that the integer is in range - return _shared.ParseResult( - success = True, - value = int(match.group(1)), - remaining = s[match.end():], - ) - - return _shared._FAILED_PARSE_RESULT - - return integer_parser - -_BINARY_MATCHER = re.compile(r'"([\da-f]*)"b') - -@_consume_leading_whitespace -def _binary_parser(s): - match = _BINARY_MATCHER.match(s) - - if match: - return _shared.ParseResult( - success = True, - value = binascii.unhexlify(match.group(1)), - remaining = s[match.end():], - ) - - return _shared._FAILED_PARSE_RESULT - -def _make_utf_parser(encoding): - matcher = re.compile(r'"(.*?)"' + encoding) - - @_consume_leading_whitespace - def utf_parser(s): - match = matcher.match(s) - - if match: - return _shared.ParseResult( - success = True, - value = match.group(1), - remaining = s[match.end():], - ) - - return _shared._FAILED_PARSE_RESULT - - return utf_parser - -def _make_consume_constant_parser(constant): - @_consume_leading_whitespace - def consume_character_parser(s): - if s.startswith(constant): - return _shared.ParseResult( - success = True, - value = None, - remaining = s[len(constant):], - ) - return _shared._FAILED_PARSE_RESULT - - return consume_character_parser - -_consume_comma_parser = _make_consume_constant_parser(',') - -def _prefix_with_comma(parser): - def wrapped(s): - result = _consume_comma_parser(s) - if result.success: - s = result.remaining - else: - return _shared._FAILED_PARSE_RESULT - - result = parser(s) - if not result.success: - raise Exception('Trailing comma before "{}"'.format(s)) - - return result - - return wrapped - -def _comma_separate_and_wrap(wrapped_parser, start_wrap, end_wrap, typecaster): - parser_prefixed_with_comma = _prefix_with_comma(wrapped_parser) - start_wrap_parser = _make_consume_constant_parser(start_wrap) - end_wrap_parser = _make_consume_constant_parser(end_wrap) - - def parser(s): - result = start_wrap_parser(s) - if result.success: - s = result.remaining - else: - return _shared._FAILED_PARSE_RESULT - - value = [] - first = True - - parse_result = wrapped_parser(s) - - while parse_result.success: - value.append(parse_result.value) - s = parse_result.remaining - parse_result = parser_prefixed_with_comma(s) - - result = end_wrap_parser(s) - if result.success: - s = result.remaining - else: - return _shared._FAILED_PARSE_RESULT - - return _shared.ParseResult( - success = True, - value = typecaster(value), - remaining = s, - ) - - return parser - -# This uses _PARSERS which has not been defined yet, but is defined here so it can be used in -# the definition of _list_parser -def _object_parser(source): - for parser in _PARSERS: - result = parser(source) - - if result.success: - return result - - return _shared._FAILED_PARSE_RESULT - -_list_parser = _comma_separate_and_wrap(_object_parser, '[', ']', list) - -_consume_colon_parser = _make_consume_constant_parser(':') - -def _kvp_parser(s): - key_parse_result = _object_parser(s) - if key_parse_result.success: - s = key_parse_result.remaining - else: - return _shared._FAILED_PARSE_RESULT - - result = _consume_colon_parser(s) - if result.success: - s = result.remaining - else: - return _shared._FAILED_PARSE_RESULT - - value_parse_result = _object_parser(s) - if value_parse_result.success: - s = value_parse_result.remaining - else: - return _shared._FAILED_PARSE_RESULT - - return _shared.ParseResult( - success = True, - value = (key_parse_result.value, value_parse_result.value), - remaining = s, - ) - -_dictionary_parser = _comma_separate_and_wrap(_kvp_parser, '{', '}', collections.OrderedDict) - - -_PARSERS = [ - _make_constant_parser('null', None), - _make_constant_parser('true', True), - _make_constant_parser('false', False), - _make_integer_parser(8), - _make_integer_parser(16), - _make_integer_parser(32), - _make_integer_parser(64), - _binary_parser, - _make_utf_parser('utf8'), - _make_utf_parser('utf16'), - _make_utf_parser('utf32'), - _list_parser, - _dictionary_parser, -] - -def _parse(parser, source): - result = parser(source) - - if result.success: - if result.remaining.strip() == '': - return result.value - - raise Exception('Unparsed trailing characters: "{}"'.format(result.remaining)) - - raise Exception('Unable to parse: "{}"'.format(source)) - -def deserialize(s): - return _parse(_object_parser, s) diff --git a/don/tags.py b/don/tags.py deleted file mode 100644 index db54dfe..0000000 --- a/don/tags.py +++ /dev/null @@ -1,138 +0,0 @@ -import collections - -VOID = 0x00 -TRUE = 0x01 -FALSE = 0x02 -BOOL = (TRUE, FALSE) -INT8 = 0x10 -INT16 = 0x11 -INT32 = 0x12 -INT64 = 0x13 -# These are to be supported in the future -# FLOAT = 0x20 -# DOUBLE = 0x21 -BINARY = 0x30 -UTF8 = 0x31 -UTF16 = 0x32 -UTF32 = 0x33 -LIST = 0x40 -DICTIONARY = 0x41 - -STRING_TAGS = set([UTF8, UTF16, UTF32]) - -DEFAULT_INTEGER_ENCODING = INT32 -DEFAULT_STRING_ENCODING = UTF8 - -TaggedObject = collections.namedtuple('TaggedObject', ['tag', 'value']) - -_TYPES_TO_TAGS = { - int: DEFAULT_INTEGER_ENCODING, - bytes: BINARY, - str: DEFAULT_STRING_ENCODING, - list: LIST, - dict: DICTIONARY, - collections.OrderedDict: DICTIONARY, -} - -def _tag(o): - if isinstance(o, TaggedObject): - return o - - if o is None: - return TaggedObject(tag = VOID, value = o) - - if o is True: - return TaggedObject(tag = TRUE, value = o) - - if o is False: - return TaggedObject(tag = FALSE, value = o) - - return TaggedObject(tag = _TYPES_TO_TAGS[type(o)], value = o) - -_NONE = TaggedObject(tag = VOID, value = None) -_TRUE = TaggedObject(tag = TRUE, value = True) -_FALSE = TaggedObject(tag = FALSE, value = False) - -_TAGS_TO_IN_RANGE_PREDICATES = collections.OrderedDict([ - (INT8, lambda i: -128 <= i and i <= 127), - (INT16, lambda i: -32768 <= i and i <= 32767), - (INT32, lambda i: -2147483648 <= i and i <= 2147483647), - (INT64, lambda i: -9223372036854775808 <= i and i <= 9223372036854775807), -]) - -class TooWideError(Exception): - pass - -SMALLEST = object() - -def autotag(o, **kwargs): - preferred_integer_tag = kwargs.pop('preferred_integer_tag', DEFAULT_INTEGER_ENCODING) - preferred_string_tag = kwargs.pop('preferred_string_tag', DEFAULT_STRING_ENCODING) - - if kwargs: - raise TypeError("autotag() got an unexpected keyword argument '{}'".format( - list(kwargs.keys())[0], - )) - - if isinstance(o, TaggedObject): - return o - - if o is None: - return _NONE - - if o is True: - return _TRUE - - if o is False: - return _FALSE - - if isinstance(o, int): - if preferred_integer_tag is not SMALLEST and _TAGS_TO_IN_RANGE_PREDICATES[preferred_integer_tag](o): - return TaggedObject(tag = preferred_integer_tag, value = o) - - else: - for tag, in_range_predicate in _TAGS_TO_IN_RANGE_PREDICATES.items(): - if in_range_predicate(o): - return TaggedObject(tag = tag, value = o) - - raise TooWideError("Integer {} is too wide to be serialized") - - if isinstance(o, str): - # TODO Support SMALLEST for preferred string tag - return TaggedObject(tag = preferred_string_tag, value = o) - - if isinstance(o, bytes): - return TaggedObject(tag = BINARY, value = o) - - if isinstance(o, list): - return TaggedObject( - tag = LIST, - value = [ - autotag( - i, - preferred_integer_tag = preferred_integer_tag, - preferred_string_tag = preferred_string_tag, - ) for i in o - ], - ) - - if isinstance(o, dict): - return TaggedObject( - tag = DICTIONARY, - value = collections.OrderedDict([ - ( - autotag( - key, - preferred_integer_tag = preferred_integer_tag, - preferred_string_tag = preferred_string_tag, - ), - autotag( - value, - preferred_integer_tag = preferred_integer_tag, - preferred_string_tag = preferred_string_tag, - ), - ) for key, value in o.items() - ]), - ) - - raise Exception('Unsupported type {}'.format(type(o))) diff --git a/setup.py b/setup.py index 158bf7a..01dffee 100644 --- a/setup.py +++ b/setup.py @@ -9,11 +9,11 @@ with open(path.join(here, 'README.rst'), encoding='utf-8') as f: long_description = f.read() setup( - name='don', + name='ton', version='0.0.1', description='A dual-format serialization library and reference implementation for Dual Object Notation', long_description=long_description, - url='https://github.com/kerkeslager/don', + url='https://github.com/kerkeslager/ton', author='David Kerkeslager', author_email='', license='GPL-3.0', diff --git a/test_binary.py b/test_binary.py index b75a3d7..932a030 100644 --- a/test_binary.py +++ b/test_binary.py @@ -2,7 +2,7 @@ import collections import unittest -from don import binary, tags +from ton import binary, tags class TestBinarySerialize(unittest.TestCase): def test_serializes_null(self): diff --git a/test_string.py b/test_string.py index 3a74fdb..7d235de 100644 --- a/test_string.py +++ b/test_string.py @@ -1,7 +1,7 @@ import collections import unittest -from don import string, tags +from ton import string, tags class TestStringSerialize(unittest.TestCase): def test_serializes_null(self): diff --git a/test_tags.py b/test_tags.py index ef48a37..a6afcaf 100644 --- a/test_tags.py +++ b/test_tags.py @@ -1,7 +1,7 @@ import collections import unittest -from don import tags +from ton import tags class AutoTagTests(unittest.TestCase): def test_autotags_void(self): diff --git a/ton/__init__.py b/ton/__init__.py new file mode 100644 index 0000000..157ec10 --- /dev/null +++ b/ton/__init__.py @@ -0,0 +1,10 @@ +import collections +import struct + +from ton import binary, string + +def binary_to_string(b): + return string.serialize(binary.deserialize(b)) + +def string_to_binary(s): + return binary.serialize(string.deserialize(s)) diff --git a/ton/_shared.py b/ton/_shared.py new file mode 100644 index 0000000..c0ce786 --- /dev/null +++ b/ton/_shared.py @@ -0,0 +1,14 @@ +import collections + +from ton import tags + +ParseResult = collections.namedtuple( + 'ParseResult', + [ + 'success', + 'value', + 'remaining', + ], +) + +_FAILED_PARSE_RESULT = ParseResult(success = False, value = None, remaining = None) diff --git a/ton/binary.py b/ton/binary.py new file mode 100644 index 0000000..08f541a --- /dev/null +++ b/ton/binary.py @@ -0,0 +1,198 @@ +import collections +import struct + +from ton import tags, _shared + +def _binary_serialize_tag_only_type(o): + return b'' + +def _pack_format_string_to_binary_serializer(pfs): + def serializer(i): + return struct.pack(pfs, i) + return serializer + +def _encoder_to_binary_serializer(e): + def serializer(s): + encoded = e(s) + return struct.pack('!I', len(encoded)) + encoded + return serializer + +def _binary_serialize_list(items): + # TODO Enforce that items are all the same type + items = [tags._tag(i) for i in items] + + if len(items) == 0: + item_tag = tags.VOID + else: + item_tag = items[0].tag + + item_serializer = _BINARY_SERIALIZERS[item_tag] + items = [item_serializer(i.value) for i in items] + item_length = len(items) + items = b''.join(items) + byte_length = len(items) + return struct.pack('!BII', item_tag, byte_length, item_length) + items + +def _serialize_key(o): + o = tags.autotag(o) + assert o.tag in tags.STRING_TAGS + return struct.pack('!B', o.tag) + _BINARY_SERIALIZERS[o.tag](o.value) + +def _binary_serialize_dict(d): + item_length = 0 + serialized = b'' + + key_serializer = _BINARY_SERIALIZERS[tags.UTF8] + + for key, value in d.items(): + item_length += 1 + serialized += _serialize_key(key) + serialize(value) + + byte_length = len(serialized) + return struct.pack('!II', byte_length, item_length) + serialized + +_BINARY_SERIALIZERS = { + tags.VOID: _binary_serialize_tag_only_type, + tags.TRUE: _binary_serialize_tag_only_type, + tags.FALSE: _binary_serialize_tag_only_type, + tags.INT8: _pack_format_string_to_binary_serializer('!b'), + tags.INT16: _pack_format_string_to_binary_serializer('!h'), + tags.INT32: _pack_format_string_to_binary_serializer('!i'), + tags.BINARY: _encoder_to_binary_serializer(lambda b: b), + tags.UTF8: _encoder_to_binary_serializer(lambda s: s.encode('utf-8')), + tags.UTF16: _encoder_to_binary_serializer(lambda s: s.encode('utf-16')), + tags.UTF32: _encoder_to_binary_serializer(lambda s: s.encode('utf-32')), + tags.LIST: _binary_serialize_list, + tags.DICTIONARY: _binary_serialize_dict, +} + +def serialize(o): + o = tags.autotag(o) + return struct.pack('!B', o.tag) + _BINARY_SERIALIZERS[o.tag](o.value) + +_BYTE_SIZES_TO_UNPACK_FORMATS = { + 1: '!b', + 2: '!h', + 4: '!i', + 8: '!q', +} + +def make_integer_parser(size_in_bytes): + unpack_format = _BYTE_SIZES_TO_UNPACK_FORMATS[size_in_bytes] + + def integer_parser(source): + value = struct.unpack(unpack_format, source[:size_in_bytes])[0] + remaining = source[size_in_bytes:] + + return _shared.ParseResult(success = True, value = value, remaining = remaining) + + return integer_parser + +def binary64_parser(source): + return _shared.ParseResult( + success = True, + value = struct.unpack('!d', source[:8])[0], + remaining = source[8:], + ) + +def make_string_parser(decoder): + def string_parser(source): + length = struct.unpack('!I', source[:4])[0] + source = source[4:] + return _shared.ParseResult( + success = True, + value = decoder(source[:length]), + remaining = source[length:], + ) + + return string_parser + +def _list_parser(source): + tag = source[0] + parser = _TAGS_TO_PARSERS[tag] + + source = source[1:] + byte_length, items_length = struct.unpack('!II', source[:8]) + source = source[8:] + + remaining = source[byte_length:] + source = source[:byte_length] + + def item_iterator(source): + count = 0 + + while len(source) > 0: + parse_result = parser(source) + + if parse_result.success: + count += 1 + yield parse_result.value + source = parse_result.remaining + + assert count == items_length + + return _shared.ParseResult( + success = True, + value = item_iterator(source), + remaining = remaining, + ) + +def dictionary_parser(source): + key_parser = _TAGS_TO_PARSERS[tags.UTF8] + + byte_length, item_length = struct.unpack('!II', source[:8]) + source = source[8:] + + remaining = source[byte_length:] + source = source[:byte_length] + + def kvp_iterator(source): + count = 0 + + while len(source) > 0: + count += 1 + key_parse_result = key_parser(source) + key, source = key_parse_result.value, key_parse_result.remaining + value_parse_result = _object_parser(source) + value, source = value_parse_result.value, value_parse_result.remaining + + yield key, value + + assert count == item_length + + return _shared.ParseResult( + success = True, + value = collections.OrderedDict(kvp_iterator(source)), + remaining = remaining, + ) + + +_TAGS_TO_PARSERS = { + tags.VOID: lambda r: _shared.ParseResult(True, None, r), + tags.TRUE: lambda r: _shared.ParseResult(True, True, r), + tags.FALSE: lambda r: _shared.ParseResult(True, False, r), + tags.INT8: make_integer_parser(1), + tags.INT16: make_integer_parser(2), + tags.INT32: make_integer_parser(4), + tags.INT64: make_integer_parser(8), + tags.BINARY: make_string_parser(lambda b : b), + tags.UTF8: make_string_parser(lambda b : b.decode('utf-8')), + tags.UTF16: make_string_parser(lambda b : b.decode('utf-16')), + tags.UTF32: make_string_parser(lambda b : b.decode('utf-32')), + tags.LIST: _list_parser, + tags.DICTIONARY: dictionary_parser, +} + +def _object_parser(source): + return _TAGS_TO_PARSERS[source[0]](source[1:]) + +def _parse(parser, source): + result = parser(source) + + if result.success and result.remaining == b'': + return result.value + + raise Exception('Unparsed trailing bytes: {}'.format(result.remaining)) + +def deserialize(b): + return _parse(_object_parser, b) diff --git a/ton/string.py b/ton/string.py new file mode 100644 index 0000000..7cec429 --- /dev/null +++ b/ton/string.py @@ -0,0 +1,269 @@ +import binascii +import collections +import functools +import re + +from ton import tags, _shared + +def _integer_size_to_string_serializer(integer_size): + minimum = -(2 ** (integer_size - 1)) + maximum = 2 ** (integer_size - 1) - 1 + + def serializer(integer): + assert minimum <= integer and integer <= maximum + return '{}i{}'.format(integer, integer_size) + + return serializer + +def _serialize_binary(b): + return '"{}"b'.format(binascii.hexlify(b).decode('ascii')) + +def _utf_encoding_to_serializer(utf_encoding): + def serializer(s): + return '"{}"{}'.format(s, utf_encoding) + + return serializer + +def _string_serialize_list(l): + return '[{}]'.format(', '.join(map(serialize, l))) + +def _string_serialize_dictionary(d): + def serialize_kvp(kvp): + return serialize(kvp[0]) + ': ' + serialize(kvp[1]) + return '{ ' + ', '.join(map(serialize_kvp, d.items())) + ' }' + +_STRING_SERIALIZERS = { + tags.VOID: lambda o: 'null', + tags.TRUE: lambda o: 'true', + tags.FALSE: lambda o: 'false', + tags.INT8: _integer_size_to_string_serializer(8), + tags.INT16: _integer_size_to_string_serializer(16), + tags.INT32: _integer_size_to_string_serializer(32), + tags.INT64: _integer_size_to_string_serializer(64), + tags.BINARY: _serialize_binary, + tags.UTF8: _utf_encoding_to_serializer('utf8'), + tags.UTF16: _utf_encoding_to_serializer('utf16'), + tags.UTF32: _utf_encoding_to_serializer('utf32'), + tags.LIST: _string_serialize_list, + tags.DICTIONARY: _string_serialize_dictionary, +} + +def serialize(o): + o = tags.autotag(o) + + return _STRING_SERIALIZERS[o.tag](o.value) + +def _consume_leading_whitespace(wrapped_parser): + @functools.wraps(wrapped_parser) + def parser(s): + s = s.lstrip() + return wrapped_parser(s) + + return parser + +def _make_constant_parser(constant, value): + @_consume_leading_whitespace + def constant_parser(s): + if s.startswith(constant): + result = _shared.ParseResult( + success = True, + value = value, + remaining = s[len(constant):], + ) + return result + + return _shared._FAILED_PARSE_RESULT + + return constant_parser + +def _make_integer_parser(width): + matcher = re.compile(r'(-?\d+)i' + str(width)) + + @_consume_leading_whitespace + def integer_parser(s): + match = matcher.match(s) + + if match: + # TODO Validate that the integer is in range + return _shared.ParseResult( + success = True, + value = int(match.group(1)), + remaining = s[match.end():], + ) + + return _shared._FAILED_PARSE_RESULT + + return integer_parser + +_BINARY_MATCHER = re.compile(r'"([\da-f]*)"b') + +@_consume_leading_whitespace +def _binary_parser(s): + match = _BINARY_MATCHER.match(s) + + if match: + return _shared.ParseResult( + success = True, + value = binascii.unhexlify(match.group(1)), + remaining = s[match.end():], + ) + + return _shared._FAILED_PARSE_RESULT + +def _make_utf_parser(encoding): + matcher = re.compile(r'"(.*?)"' + encoding) + + @_consume_leading_whitespace + def utf_parser(s): + match = matcher.match(s) + + if match: + return _shared.ParseResult( + success = True, + value = match.group(1), + remaining = s[match.end():], + ) + + return _shared._FAILED_PARSE_RESULT + + return utf_parser + +def _make_consume_constant_parser(constant): + @_consume_leading_whitespace + def consume_character_parser(s): + if s.startswith(constant): + return _shared.ParseResult( + success = True, + value = None, + remaining = s[len(constant):], + ) + return _shared._FAILED_PARSE_RESULT + + return consume_character_parser + +_consume_comma_parser = _make_consume_constant_parser(',') + +def _prefix_with_comma(parser): + def wrapped(s): + result = _consume_comma_parser(s) + if result.success: + s = result.remaining + else: + return _shared._FAILED_PARSE_RESULT + + result = parser(s) + if not result.success: + raise Exception('Trailing comma before "{}"'.format(s)) + + return result + + return wrapped + +def _comma_separate_and_wrap(wrapped_parser, start_wrap, end_wrap, typecaster): + parser_prefixed_with_comma = _prefix_with_comma(wrapped_parser) + start_wrap_parser = _make_consume_constant_parser(start_wrap) + end_wrap_parser = _make_consume_constant_parser(end_wrap) + + def parser(s): + result = start_wrap_parser(s) + if result.success: + s = result.remaining + else: + return _shared._FAILED_PARSE_RESULT + + value = [] + first = True + + parse_result = wrapped_parser(s) + + while parse_result.success: + value.append(parse_result.value) + s = parse_result.remaining + parse_result = parser_prefixed_with_comma(s) + + result = end_wrap_parser(s) + if result.success: + s = result.remaining + else: + return _shared._FAILED_PARSE_RESULT + + return _shared.ParseResult( + success = True, + value = typecaster(value), + remaining = s, + ) + + return parser + +# This uses _PARSERS which has not been defined yet, but is defined here so it can be used in +# the definition of _list_parser +def _object_parser(source): + for parser in _PARSERS: + result = parser(source) + + if result.success: + return result + + return _shared._FAILED_PARSE_RESULT + +_list_parser = _comma_separate_and_wrap(_object_parser, '[', ']', list) + +_consume_colon_parser = _make_consume_constant_parser(':') + +def _kvp_parser(s): + key_parse_result = _object_parser(s) + if key_parse_result.success: + s = key_parse_result.remaining + else: + return _shared._FAILED_PARSE_RESULT + + result = _consume_colon_parser(s) + if result.success: + s = result.remaining + else: + return _shared._FAILED_PARSE_RESULT + + value_parse_result = _object_parser(s) + if value_parse_result.success: + s = value_parse_result.remaining + else: + return _shared._FAILED_PARSE_RESULT + + return _shared.ParseResult( + success = True, + value = (key_parse_result.value, value_parse_result.value), + remaining = s, + ) + +_dictionary_parser = _comma_separate_and_wrap(_kvp_parser, '{', '}', collections.OrderedDict) + + +_PARSERS = [ + _make_constant_parser('null', None), + _make_constant_parser('true', True), + _make_constant_parser('false', False), + _make_integer_parser(8), + _make_integer_parser(16), + _make_integer_parser(32), + _make_integer_parser(64), + _binary_parser, + _make_utf_parser('utf8'), + _make_utf_parser('utf16'), + _make_utf_parser('utf32'), + _list_parser, + _dictionary_parser, +] + +def _parse(parser, source): + result = parser(source) + + if result.success: + if result.remaining.strip() == '': + return result.value + + raise Exception('Unparsed trailing characters: "{}"'.format(result.remaining)) + + raise Exception('Unable to parse: "{}"'.format(source)) + +def deserialize(s): + return _parse(_object_parser, s) diff --git a/ton/tags.py b/ton/tags.py new file mode 100644 index 0000000..db54dfe --- /dev/null +++ b/ton/tags.py @@ -0,0 +1,138 @@ +import collections + +VOID = 0x00 +TRUE = 0x01 +FALSE = 0x02 +BOOL = (TRUE, FALSE) +INT8 = 0x10 +INT16 = 0x11 +INT32 = 0x12 +INT64 = 0x13 +# These are to be supported in the future +# FLOAT = 0x20 +# DOUBLE = 0x21 +BINARY = 0x30 +UTF8 = 0x31 +UTF16 = 0x32 +UTF32 = 0x33 +LIST = 0x40 +DICTIONARY = 0x41 + +STRING_TAGS = set([UTF8, UTF16, UTF32]) + +DEFAULT_INTEGER_ENCODING = INT32 +DEFAULT_STRING_ENCODING = UTF8 + +TaggedObject = collections.namedtuple('TaggedObject', ['tag', 'value']) + +_TYPES_TO_TAGS = { + int: DEFAULT_INTEGER_ENCODING, + bytes: BINARY, + str: DEFAULT_STRING_ENCODING, + list: LIST, + dict: DICTIONARY, + collections.OrderedDict: DICTIONARY, +} + +def _tag(o): + if isinstance(o, TaggedObject): + return o + + if o is None: + return TaggedObject(tag = VOID, value = o) + + if o is True: + return TaggedObject(tag = TRUE, value = o) + + if o is False: + return TaggedObject(tag = FALSE, value = o) + + return TaggedObject(tag = _TYPES_TO_TAGS[type(o)], value = o) + +_NONE = TaggedObject(tag = VOID, value = None) +_TRUE = TaggedObject(tag = TRUE, value = True) +_FALSE = TaggedObject(tag = FALSE, value = False) + +_TAGS_TO_IN_RANGE_PREDICATES = collections.OrderedDict([ + (INT8, lambda i: -128 <= i and i <= 127), + (INT16, lambda i: -32768 <= i and i <= 32767), + (INT32, lambda i: -2147483648 <= i and i <= 2147483647), + (INT64, lambda i: -9223372036854775808 <= i and i <= 9223372036854775807), +]) + +class TooWideError(Exception): + pass + +SMALLEST = object() + +def autotag(o, **kwargs): + preferred_integer_tag = kwargs.pop('preferred_integer_tag', DEFAULT_INTEGER_ENCODING) + preferred_string_tag = kwargs.pop('preferred_string_tag', DEFAULT_STRING_ENCODING) + + if kwargs: + raise TypeError("autotag() got an unexpected keyword argument '{}'".format( + list(kwargs.keys())[0], + )) + + if isinstance(o, TaggedObject): + return o + + if o is None: + return _NONE + + if o is True: + return _TRUE + + if o is False: + return _FALSE + + if isinstance(o, int): + if preferred_integer_tag is not SMALLEST and _TAGS_TO_IN_RANGE_PREDICATES[preferred_integer_tag](o): + return TaggedObject(tag = preferred_integer_tag, value = o) + + else: + for tag, in_range_predicate in _TAGS_TO_IN_RANGE_PREDICATES.items(): + if in_range_predicate(o): + return TaggedObject(tag = tag, value = o) + + raise TooWideError("Integer {} is too wide to be serialized") + + if isinstance(o, str): + # TODO Support SMALLEST for preferred string tag + return TaggedObject(tag = preferred_string_tag, value = o) + + if isinstance(o, bytes): + return TaggedObject(tag = BINARY, value = o) + + if isinstance(o, list): + return TaggedObject( + tag = LIST, + value = [ + autotag( + i, + preferred_integer_tag = preferred_integer_tag, + preferred_string_tag = preferred_string_tag, + ) for i in o + ], + ) + + if isinstance(o, dict): + return TaggedObject( + tag = DICTIONARY, + value = collections.OrderedDict([ + ( + autotag( + key, + preferred_integer_tag = preferred_integer_tag, + preferred_string_tag = preferred_string_tag, + ), + autotag( + value, + preferred_integer_tag = preferred_integer_tag, + preferred_string_tag = preferred_string_tag, + ), + ) for key, value in o.items() + ]), + ) + + raise Exception('Unsupported type {}'.format(type(o)))