Add a string serializer
[ton] / don / __init__.py
1 import binascii
2 import collections
3 import struct
4
5 VOID = 0x00
6 TRUE = 0x01
7 FALSE = 0x02
8 BOOL = (TRUE, FALSE)
9 INT8 = 0x10
10 INT16 = 0x11
11 INT32 = 0x12
12 INT64 = 0x13
13 FLOAT = 0x20
14 DOUBLE = 0x21
15 BINARY = 0x30
16 UTF8 = 0x31
17 UTF16 = 0x32
18 UTF32 = 0x33
19 LIST = 0x40
20 DICTIONARY = 0x41
21
22 DEFAULT_INTEGER_ENCODING = INT32
23 DEFAULT_DECIMAL_ENCODING = DOUBLE
24 DEFAULT_STRING_ENCODING = UTF8
25
26 TaggedObject = collections.namedtuple('TaggedObject', ['tag', 'value'])
27
28 _TYPES_TO_TAGS = {
29     int: DEFAULT_INTEGER_ENCODING,
30     float: DEFAULT_DECIMAL_ENCODING,
31     bytes: BINARY,
32     str: DEFAULT_STRING_ENCODING,
33     list: LIST,
34     dict: DICTIONARY,
35     collections.OrderedDict: DICTIONARY,
36 }
37
38 def _tag(o):
39     if isinstance(o, TaggedObject):
40         return o
41
42     if o is None:
43         return TaggedObject(tag = VOID, value = o)
44
45     if o is True:
46         return TaggedObject(tag = TRUE, value = o)
47
48     if o is False:
49         return TaggedObject(tag = FALSE, value = o)
50
51     return TaggedObject(tag = _TYPES_TO_TAGS[type(o)], value = o)
52
53 def _binary_serialize_tag_only_type(o):
54     return b''
55
56 def _pack_format_string_to_binary_serializer(pfs):
57     def serializer(i):
58         return struct.pack(pfs, i)
59     return serializer
60
61 def _encoder_to_binary_serializer(e):
62     def serializer(s):
63         encoded = e(s)
64         return struct.pack('!I', len(encoded)) + encoded
65     return serializer
66
67 def _binary_serialize_list(items):
68     # TODO Enforce that items are all the same type
69     items = [_tag(i) for i in items]
70
71     if len(items) == 0:
72         item_tag = VOID
73     else:
74         item_tag = items[0].tag
75
76     item_serializer = _BINARY_SERIALIZERS[item_tag]
77     items = [item_serializer(i.value) for i in items]
78     item_length = len(items)
79     items = b''.join(items)
80     byte_length = len(items)
81     return struct.pack('!BII', item_tag, byte_length, item_length) + items
82
83 def _binary_serialize_dict(d):
84     item_length = 0
85     serialized = b''
86
87     key_serializer = _BINARY_SERIALIZERS[UTF8]
88
89     for key, value in d.items():
90         assert isinstance(key, str)
91         item_length += 1
92         serialized += key_serializer(key) + _binary_serialize(value)
93
94     byte_length = len(serialized)
95     return struct.pack('!II', byte_length, item_length) + serialized
96
97 _BINARY_SERIALIZERS = {
98     VOID: _binary_serialize_tag_only_type,
99     TRUE: _binary_serialize_tag_only_type,
100     FALSE: _binary_serialize_tag_only_type,
101     INT8: _pack_format_string_to_binary_serializer('!b'),
102     INT16: _pack_format_string_to_binary_serializer('!h'),
103     INT32: _pack_format_string_to_binary_serializer('!i'),
104     FLOAT: _pack_format_string_to_binary_serializer('!f'),
105     DOUBLE: _pack_format_string_to_binary_serializer('!d'),
106     BINARY: _encoder_to_binary_serializer(lambda b: b),
107     UTF8: _encoder_to_binary_serializer(lambda s: s.encode('utf-8')),
108     UTF16: _encoder_to_binary_serializer(lambda s: s.encode('utf-16')),
109     UTF32: _encoder_to_binary_serializer(lambda s: s.encode('utf-32')),
110     LIST: _binary_serialize_list,
111     DICTIONARY: _binary_serialize_dict,
112 }
113
114 def _binary_serialize(o):
115     o = _tag(o)
116     return struct.pack('!B', o.tag) + _BINARY_SERIALIZERS[o.tag](o.value)
117
118 ParseResult = collections.namedtuple(
119     'ParseResult',
120     [
121         'success',
122         'value',
123         'remaining',
124     ],
125 )
126
127 _FAILED_PARSE_RESULT = ParseResult(success = False, value = None, remaining = None)
128
129 _BYTE_SIZES_TO_UNPACK_FORMATS = {
130     1: '!b',
131     2: '!h',
132     4: '!i',
133     8: '!q',
134 }
135
136 def make_integer_parser(size_in_bytes):
137     unpack_format = _BYTE_SIZES_TO_UNPACK_FORMATS[size_in_bytes]
138
139     def integer_parser(source):
140         value = struct.unpack(unpack_format, source[:size_in_bytes])[0]
141         remaining = source[size_in_bytes:]
142
143         return ParseResult(success = True, value = value, remaining = remaining)
144
145     return integer_parser
146
147 def binary64_parser(source):
148     return ParseResult(
149         success = True,
150         value = struct.unpack('!d', source[:8])[0],
151         remaining = source[8:],
152     )
153
154 def make_string_parser(decoder):
155     def string_parser(source):
156         length = struct.unpack('!I', source[:4])[0]
157         source = source[4:]
158         return ParseResult(
159             success = True,
160             value = decoder(source[:length]),
161             remaining = source[length:],
162         )
163
164     return string_parser
165
166 def _list_parser(source):
167     tag = source[0]
168     parser = _TAGS_TO_PARSERS[tag]
169
170     source = source[1:]
171     byte_length, items_length = struct.unpack('!II', source[:8])
172     source = source[8:]
173
174     remaining = source[byte_length:]
175     source = source[:byte_length]
176
177     def item_iterator(source):
178         count = 0
179
180         while len(source) > 0:
181             parse_result = parser(source)
182
183             if parse_result.success:
184                 count += 1
185                 yield parse_result.value
186                 source = parse_result.remaining
187
188         assert count == items_length
189     
190     return ParseResult(
191         success = True,
192         value = item_iterator(source),
193         remaining = remaining,
194     )
195
196 def dictionary_parser(source):
197     key_parser = _TAGS_TO_PARSERS[UTF8]
198
199     byte_length, item_length = struct.unpack('!II', source[:8])
200     source = source[8:]
201
202     remaining = source[byte_length:]
203     source = source[:byte_length]
204
205     def kvp_iterator(source):
206         count = 0
207
208         while len(source) > 0:
209             count += 1
210             key_parse_result = key_parser(source)
211             key, source = key_parse_result.value, key_parse_result.remaining
212             value_parse_result = _object_parser(source)
213             value, source = value_parse_result.value, value_parse_result.remaining
214
215             yield key, value
216
217         assert count == item_length
218
219     return ParseResult(
220         success = True,
221         value = collections.OrderedDict(kvp_iterator(source)),
222         remaining = remaining,
223     )
224
225
226 _TAGS_TO_PARSERS = {
227     VOID: lambda r: ParseResult(True, None, r),
228     TRUE: lambda r: ParseResult(True, True, r),
229     FALSE: lambda r: ParseResult(True, False, r),
230     INT8: make_integer_parser(1),
231     INT16: make_integer_parser(2),
232     INT32: make_integer_parser(4),
233     INT64: make_integer_parser(8),
234     DOUBLE: binary64_parser,
235     BINARY: make_string_parser(lambda b : b),
236     UTF8: make_string_parser(lambda b : b.decode('utf-8')),
237     UTF16: make_string_parser(lambda b : b.decode('utf-16')),
238     UTF32: make_string_parser(lambda b : b.decode('utf-32')),
239     LIST: _list_parser,
240     DICTIONARY: dictionary_parser,
241 }
242
243 def _object_parser(source):
244     return _TAGS_TO_PARSERS[source[0]](source[1:])
245
246 def _parse(parser, source, consume_all = True):
247     result = parser(source)
248
249     if result.success and result.remaining == b'':
250         return result.value
251
252     raise Exception('Unparsed trailing bytes: {}'.format(result.remaining))
253
254 def _binary_deserialize(b):
255     return _parse(_object_parser, b)
256
257 def _integer_size_to_string_serializer(integer_size):
258     minimum = -(2 ** (integer_size - 1))
259     maximum = 2 ** (integer_size - 1) - 1
260     
261     def serializer(integer):
262         assert minimum <= integer and integer <= maximum
263         return '{}i{}'.format(integer, integer_size)
264
265     return serializer
266
267 def _serialize_float(f):
268     return '{}f'.format(f)
269
270 def _serialize_double(d):
271     return '{}d'.format(d)
272
273 def _serialize_binary(b):
274     return '"{}"b'.format(binascii.hexlify(b).decode('ascii'))
275
276 def _utf_encoding_to_serializer(utf_encoding):
277     def serializer(s):
278         return '"{}"{}'.format(s, utf_encoding)
279
280     return serializer
281
282 def _string_serialize_list(l):
283     return '[{}]'.format(', '.join(map(_string_serialize, l)))
284
285 def _string_serialize_dictionary(d):
286     def serialize_kvp(kvp):
287         return _string_serialize(kvp[0]) + ': ' + _string_serialize(kvp[1])
288     return '{ ' + ', '.join(map(serialize_kvp, d.items())) + ' }'
289
290 _STRING_SERIALIZERS = {
291     VOID: lambda o: 'null',
292     TRUE: lambda o: 'true',
293     FALSE: lambda o: 'false',
294     INT8: _integer_size_to_string_serializer(8),
295     INT16: _integer_size_to_string_serializer(16),
296     INT32: _integer_size_to_string_serializer(32),
297     INT64: _integer_size_to_string_serializer(64),
298     FLOAT: _serialize_float,
299     DOUBLE: _serialize_double,
300     BINARY: _serialize_binary,
301     UTF8: _utf_encoding_to_serializer('utf8'),
302     UTF16: _utf_encoding_to_serializer('utf16'),
303     UTF32: _utf_encoding_to_serializer('utf32'),
304     LIST: _string_serialize_list,
305     DICTIONARY: _string_serialize_dictionary,
306 }
307
308 def _string_serialize(o):
309     o = _tag(o)
310     
311     return _STRING_SERIALIZERS[o.tag](o.value)
312
313 def _string_deserialize(o):
314     pass
315
316 Serializer = collections.namedtuple('Serializer', ['serialize', 'deserialize'])
317
318 binary = Serializer(
319     serialize = _binary_serialize,
320     deserialize = _binary_deserialize,
321 )
322
323 string = Serializer(
324     serialize = _string_serialize,
325     deserialize = _string_deserialize,
326 )
327
328 def binary_to_string(b):
329     return string.serialize(binary.deserialize(b))
330
331 def string_to_binary(s):
332     return binary.serialize(string.deserialize(s))