--- /dev/null
+The following needs doing before a 1.0 release:
+1. String serialization settings (defaults):
+ a. Width (80)
+ b. Maintain tags for default types (true)
+ c. Handling of non-displayable and non-ascii characters
+2. String deserialization settings (defaults):
+ a. Default string type (utf-8)
+ b. Defailt integer type (int32)
+ c. Default decimal type (double)
+ d. Maintain tags universally, not at all, or only for non-default types (universally)
+3. Binary serialization settings (defaults):
+ a. Default string type (utf-8)
+ b. Default integer type (int32)
+ c. Default decimal type (double)
+4. Binary deserialization settings (defaults):
+ a. Maintain tags universally, not at all, or only for non-default types (universally)
+ b. Return lists as iterables or as lists (lists)
+ c. Return dictionaries as OrderedDicts, dicts, lists of key-value pairs, or iterables of key-value pairs (OrderedDict)
+5. Add unsinged integer types.
+6. Think about type signatures for string serialization (do we want 1i32 or 1int32?).
+7. Include the string encoding byte for dictionary keys.
+ a. It adds only 1 byte per key.
+ b. It prevents excessively long keys in utf16 friendly languages.
+8. Consider limiting key string length to 2^16 or even 2^8:
+ a. It saves 2 or 3 bytes per key, and keys are typically short.
+ b. Perhaps we should create a struct type with this feature.
+9. Add a rationale doc that includes rationale for the technical decisions made.
+10. Binary deserialization needs tooling for lazy deserializtion and deserializing from buffers.
+11. String escaping in string serialization and deserialization.
+12.
+import binascii
import collections
import struct
collections.OrderedDict: DICTIONARY,
}
-def tag(o):
+def _tag(o):
if isinstance(o, TaggedObject):
return o
return TaggedObject(tag = _TYPES_TO_TAGS[type(o)], value = o)
-def serialize_tag_only_type(o):
+def _binary_serialize_tag_only_type(o):
return b''
-def make_serializer_from_pack_format_string(pfs):
+def _pack_format_string_to_binary_serializer(pfs):
def serializer(i):
return struct.pack(pfs, i)
return serializer
-def make_string_serializer_from_encoder(e):
+def _encoder_to_binary_serializer(e):
def serializer(s):
encoded = e(s)
return struct.pack('!I', len(encoded)) + encoded
return serializer
-def serialize_list(items):
+def _binary_serialize_list(items):
# TODO Enforce that items are all the same type
- items = [tag(i) for i in items]
+ items = [_tag(i) for i in items]
if len(items) == 0:
item_tag = VOID
else:
item_tag = items[0].tag
- item_serializer = _SERIALIZERS[item_tag]
+ item_serializer = _BINARY_SERIALIZERS[item_tag]
items = [item_serializer(i.value) for i in items]
item_length = len(items)
items = b''.join(items)
byte_length = len(items)
return struct.pack('!BII', item_tag, byte_length, item_length) + items
-def serialize_dict(d):
+def _binary_serialize_dict(d):
item_length = 0
serialized = b''
- key_serializer = _SERIALIZERS[UTF8]
+ key_serializer = _BINARY_SERIALIZERS[UTF8]
for key, value in d.items():
assert isinstance(key, str)
byte_length = len(serialized)
return struct.pack('!II', byte_length, item_length) + serialized
-_SERIALIZERS = {
- VOID: serialize_tag_only_type,
- TRUE: serialize_tag_only_type,
- FALSE: serialize_tag_only_type,
- INT8: make_serializer_from_pack_format_string('!b'),
- INT16: make_serializer_from_pack_format_string('!h'),
- INT32: make_serializer_from_pack_format_string('!i'),
- FLOAT: make_serializer_from_pack_format_string('!f'),
- DOUBLE: make_serializer_from_pack_format_string('!d'),
- BINARY: make_string_serializer_from_encoder(lambda b: b),
- UTF8: make_string_serializer_from_encoder(lambda s: s.encode('utf-8')),
- UTF16: make_string_serializer_from_encoder(lambda s: s.encode('utf-16')),
- UTF32: make_string_serializer_from_encoder(lambda s: s.encode('utf-32')),
- LIST: serialize_list,
- DICTIONARY: serialize_dict,
+_BINARY_SERIALIZERS = {
+ VOID: _binary_serialize_tag_only_type,
+ TRUE: _binary_serialize_tag_only_type,
+ FALSE: _binary_serialize_tag_only_type,
+ INT8: _pack_format_string_to_binary_serializer('!b'),
+ INT16: _pack_format_string_to_binary_serializer('!h'),
+ INT32: _pack_format_string_to_binary_serializer('!i'),
+ FLOAT: _pack_format_string_to_binary_serializer('!f'),
+ DOUBLE: _pack_format_string_to_binary_serializer('!d'),
+ BINARY: _encoder_to_binary_serializer(lambda b: b),
+ UTF8: _encoder_to_binary_serializer(lambda s: s.encode('utf-8')),
+ UTF16: _encoder_to_binary_serializer(lambda s: s.encode('utf-16')),
+ UTF32: _encoder_to_binary_serializer(lambda s: s.encode('utf-32')),
+ LIST: _binary_serialize_list,
+ DICTIONARY: _binary_serialize_dict,
}
def _binary_serialize(o):
- o = tag(o)
- return struct.pack('!B', o.tag) + _SERIALIZERS[o.tag](o.value)
+ o = _tag(o)
+ return struct.pack('!B', o.tag) + _BINARY_SERIALIZERS[o.tag](o.value)
ParseResult = collections.namedtuple(
'ParseResult',
return string_parser
-def list_parser(source):
+def _list_parser(source):
tag = source[0]
parser = _TAGS_TO_PARSERS[tag]
UTF8: make_string_parser(lambda b : b.decode('utf-8')),
UTF16: make_string_parser(lambda b : b.decode('utf-16')),
UTF32: make_string_parser(lambda b : b.decode('utf-32')),
- LIST: list_parser,
+ LIST: _list_parser,
DICTIONARY: dictionary_parser,
}
def _binary_deserialize(b):
return _parse(_object_parser, b)
+def _integer_size_to_string_serializer(integer_size):
+ minimum = -(2 ** (integer_size - 1))
+ maximum = 2 ** (integer_size - 1) - 1
+
+ def serializer(integer):
+ assert minimum <= integer and integer <= maximum
+ return '{}i{}'.format(integer, integer_size)
+
+ return serializer
+
+def _serialize_float(f):
+ return '{}f'.format(f)
+
+def _serialize_double(d):
+ return '{}d'.format(d)
+
+def _serialize_binary(b):
+ return '"{}"b'.format(binascii.hexlify(b).decode('ascii'))
+
+def _utf_encoding_to_serializer(utf_encoding):
+ def serializer(s):
+ return '"{}"{}'.format(s, utf_encoding)
+
+ return serializer
+
+def _string_serialize_list(l):
+ return '[{}]'.format(', '.join(map(_string_serialize, l)))
+
+def _string_serialize_dictionary(d):
+ def serialize_kvp(kvp):
+ return _string_serialize(kvp[0]) + ': ' + _string_serialize(kvp[1])
+ return '{ ' + ', '.join(map(serialize_kvp, d.items())) + ' }'
+
+_STRING_SERIALIZERS = {
+ VOID: lambda o: 'null',
+ TRUE: lambda o: 'true',
+ FALSE: lambda o: 'false',
+ INT8: _integer_size_to_string_serializer(8),
+ INT16: _integer_size_to_string_serializer(16),
+ INT32: _integer_size_to_string_serializer(32),
+ INT64: _integer_size_to_string_serializer(64),
+ FLOAT: _serialize_float,
+ DOUBLE: _serialize_double,
+ BINARY: _serialize_binary,
+ UTF8: _utf_encoding_to_serializer('utf8'),
+ UTF16: _utf_encoding_to_serializer('utf16'),
+ UTF32: _utf_encoding_to_serializer('utf32'),
+ LIST: _string_serialize_list,
+ DICTIONARY: _string_serialize_dictionary,
+}
+
+def _string_serialize(o):
+ o = _tag(o)
+
+ return _STRING_SERIALIZERS[o.tag](o.value)
+
+def _string_deserialize(o):
+ pass
+
Serializer = collections.namedtuple('Serializer', ['serialize', 'deserialize'])
binary = Serializer(
deserialize = _binary_deserialize,
)
+string = Serializer(
+ serialize = _string_serialize,
+ deserialize = _string_deserialize,
+)
+
def binary_to_string(b):
return string.serialize(binary.deserialize(b))
import collections
import unittest
-import don
-from don import binary
-from don import string
+from don import *
class TestBinarySerialize(unittest.TestCase):
def test_serializes_null(self):
class TestStringSerialize(unittest.TestCase):
- pass
+ def test_serializes_null(self):
+ self.assertEqual(string.serialize(None), 'null')
+
+ def test_serializes_true(self):
+ self.assertEqual(string.serialize(True), 'true')
+
+ def test_serializes_false(self):
+ self.assertEqual(string.serialize(False), 'false')
+
+ def test_serializes_int8(self):
+ self.assertEqual(string.serialize(TaggedObject(INT8, 1)), '1i8')
+ self.assertEqual(string.serialize(TaggedObject(INT8, -1)), '-1i8')
+ self.assertEqual(string.serialize(TaggedObject(INT8, 42)), '42i8')
+
+ def test_serializes_int16(self):
+ self.assertEqual(string.serialize(TaggedObject(INT16, 1)), '1i16')
+ self.assertEqual(string.serialize(TaggedObject(INT16, -1)), '-1i16')
+ self.assertEqual(string.serialize(TaggedObject(INT16, 42)), '42i16')
+
+ def test_serializes_int32(self):
+ self.assertEqual(string.serialize(TaggedObject(INT32, 1)), '1i32')
+ self.assertEqual(string.serialize(TaggedObject(INT32, -1)), '-1i32')
+ self.assertEqual(string.serialize(TaggedObject(INT32, 42)), '42i32')
+
+ def test_serializes_int64(self):
+ self.assertEqual(string.serialize(TaggedObject(INT64, 1)), '1i64')
+ self.assertEqual(string.serialize(TaggedObject(INT64, -1)), '-1i64')
+ self.assertEqual(string.serialize(TaggedObject(INT64, 42)), '42i64')
+
+ def test_serializes_float(self):
+ self.assertEqual(string.serialize(TaggedObject(FLOAT, 1.0)), '1.0f')
+
+ def test_serializes_double(self):
+ self.assertEqual(string.serialize(TaggedObject(DOUBLE, 1.0)), '1.0d')
+
+ def test_serializes_binary(self):
+ self.assertEqual(string.serialize(TaggedObject(BINARY, b'\xde\xad\xbe\xef')), '"deadbeef"b')
+
+ def test_serializes_utf8(self):
+ self.assertEqual(string.serialize(TaggedObject(UTF8, 'Hello, world')), '"Hello, world"utf8')
+
+ def test_serializes_utf16(self):
+ self.assertEqual(string.serialize(TaggedObject(UTF16, 'Hello, world')), '"Hello, world"utf16')
+
+ def test_serializes_utf32(self):
+ self.assertEqual(string.serialize(TaggedObject(UTF32, 'Hello, world')), '"Hello, world"utf32')
+
+ def test_serializes_list(self):
+ self.assertEqual(string.serialize(TaggedObject(LIST, [1,2,3])), '[1i32, 2i32, 3i32]')
+
+ def test_serializes_dictionary(self):
+ self.assertEqual(
+ string.serialize(TaggedObject(DICTIONARY, collections.OrderedDict([
+ ('foo', 1),
+ ('bar', 'baz'),
+ ]))),
+ '{ "foo"utf8: 1i32, "bar"utf8: "baz"utf8 }'
+ )
class TestStringDeserialize(unittest.TestCase):
pass