Implemented dictionary parsing
[ton] / don / string.py
1 import binascii
2 import collections
3 import re
4
5 from don import tags, _shared
6
7 def _integer_size_to_string_serializer(integer_size):
8     minimum = -(2 ** (integer_size - 1))
9     maximum = 2 ** (integer_size - 1) - 1
10     
11     def serializer(integer):
12         assert minimum <= integer and integer <= maximum
13         return '{}i{}'.format(integer, integer_size)
14
15     return serializer
16
17 def _serialize_float(f):
18     return '{}f'.format(f)
19
20 def _serialize_double(d):
21     return '{}d'.format(d)
22
23 def _serialize_binary(b):
24     return '"{}"b'.format(binascii.hexlify(b).decode('ascii'))
25
26 def _utf_encoding_to_serializer(utf_encoding):
27     def serializer(s):
28         return '"{}"{}'.format(s, utf_encoding)
29
30     return serializer
31
32 def _string_serialize_list(l):
33     return '[{}]'.format(', '.join(map(serialize, l)))
34
35 def _string_serialize_dictionary(d):
36     def serialize_kvp(kvp):
37         return serialize(kvp[0]) + ': ' + serialize(kvp[1])
38     return '{ ' + ', '.join(map(serialize_kvp, d.items())) + ' }'
39
40 _STRING_SERIALIZERS = {
41     tags.VOID: lambda o: 'null',
42     tags.TRUE: lambda o: 'true',
43     tags.FALSE: lambda o: 'false',
44     tags.INT8: _integer_size_to_string_serializer(8),
45     tags.INT16: _integer_size_to_string_serializer(16),
46     tags.INT32: _integer_size_to_string_serializer(32),
47     tags.INT64: _integer_size_to_string_serializer(64),
48     tags.FLOAT: _serialize_float,
49     tags.DOUBLE: _serialize_double,
50     tags.BINARY: _serialize_binary,
51     tags.UTF8: _utf_encoding_to_serializer('utf8'),
52     tags.UTF16: _utf_encoding_to_serializer('utf16'),
53     tags.UTF32: _utf_encoding_to_serializer('utf32'),
54     tags.LIST: _string_serialize_list,
55     tags.DICTIONARY: _string_serialize_dictionary,
56 }
57
58 def serialize(o):
59     o = tags._tag(o)
60     
61     return _STRING_SERIALIZERS[o.tag](o.value)
62
63 def _make_constant_parser(constant, value):
64     def parser(s):
65         if s.startswith(constant):
66             result = _shared.ParseResult(
67                 success = True,
68                 value = value,
69                 remaining = s[len(constant):],
70             )
71             return result
72
73         return _shared._FAILED_PARSE_RESULT
74
75     return parser
76
77 def _make_integer_parser(width):
78     matcher = re.compile(r'(-?\d+)i' + str(width))
79
80     def parser(s):
81         match = matcher.match(s)
82
83         if match:
84             # TODO Validate that the integer is in range
85             return _shared.ParseResult(
86                 success = True,
87                 value = int(match.group(1)),
88                 remaining = s[match.end():],
89             )
90
91         return _shared._FAILED_PARSE_RESULT
92
93     return parser
94
95 _BINARY32_MATCHER = re.compile(r'(-?\d+\.\d+)f')
96 _BINARY64_MATCHER = re.compile(r'(-?\d+\.\d+)d')
97
98 def _binary32_parser(s):
99     match = _BINARY32_MATCHER.match(s)
100
101     if match:
102         # TODO Validate that the float is in range
103         return _shared.ParseResult(
104             success = True,
105             value = float(match.group(1)),
106             remaining = s[match.end():],
107         )
108
109     return _shared._FAILED_PARSE_RESULT
110
111 def _binary64_parser(s):
112     match = _BINARY64_MATCHER.match(s)
113
114     if match:
115         # TODO Validate that the double is in range
116         return _shared.ParseResult(
117             success = True,
118             value = float(match.group(1)),
119             remaining = s[match.end():],
120         )
121
122     return _shared._FAILED_PARSE_RESULT
123
124 _BINARY_MATCHER = re.compile(r'"([\da-f]*)"b')
125
126 def _binary_parser(s):
127     match = _BINARY_MATCHER.match(s)
128
129     if match:
130         return _shared.ParseResult(
131             success = True,
132             value = binascii.unhexlify(match.group(1)),
133             remaining = s[match.end():],
134         )
135
136     return _shared._FAILED_PARSE_RESULT
137
138 def _make_utf_parser(encoding):
139     matcher = re.compile(r'"(.*?)"' + encoding)
140
141     def parser(s):
142         match = matcher.match(s)
143
144         if match:
145             return _shared.ParseResult(
146                 success = True,
147                 value = match.group(1),
148                 remaining = s[match.end():],
149             )
150
151         return _shared._FAILED_PARSE_RESULT
152
153     return parser
154
155 def _prefix_with_comma(parser):
156     def wrapped(s):
157         if s.startswith(','):
158             s = s[1:]
159
160             result = parser(s)
161             if not result.success:
162                 raise Exception('Trailing comma before "{}"'.format(s))
163
164             return result
165
166         return _shared._FAILED_PARSE_RESULT
167
168     return wrapped
169
170 def _comma_separate_and_wrap(wrapped_parser, start_wrap, end_wrap, typecaster):
171     parser_prefixed_with_comma = _prefix_with_comma(wrapped_parser)
172
173     def parser(s):
174         if s.startswith(start_wrap):
175             s = s[1:]
176         else:
177             return _shared._FAILED_PARSE_RESULT
178
179         value = []
180         first = True
181
182         parse_result = wrapped_parser(s)
183
184         while parse_result.success:
185             value.append(parse_result.value)
186             s = parse_result.remaining
187             parse_result = parser_prefixed_with_comma(s)
188
189         if s.startswith(end_wrap):
190             s = s[1:]
191         else:
192             return _shared._FAILED_PARSE_RESULT
193
194         return _shared.ParseResult(
195             success = True,
196             value = typecaster(value),
197             remaining = s,
198         )
199
200     return parser
201
202 # This uses _PARSERS which has not been defined yet, but is defined here so it can be used in
203 # the definition of _list_parser
204 def _object_parser(source):
205     for parser in _PARSERS:
206         result = parser(source)
207
208         if result.success:
209             return result
210
211     return _shared._FAILED_PARSE_RESULT
212
213 _list_parser = _comma_separate_and_wrap(_object_parser, '[', ']', list)
214
215 def _kvp_parser(s):
216     key_parse_result = _object_parser(s)
217     if key_parse_result.success:
218         s = key_parse_result.remaining
219     else:
220         return _shared._FAILED_PARSE_RESULT
221
222     if s.startswith(':'):
223         s = s[1:]
224     else:
225         return _shared._FAILED_PARSE_RESULT
226
227     value_parse_result = _object_parser(s)
228     if value_parse_result.success:
229         s = value_parse_result.remaining
230     else:
231         return _shared._FAILED_PARSE_RESULT
232
233     return _shared.ParseResult(
234         success = True,
235         value = (key_parse_result.value, value_parse_result.value),
236         remaining = s,
237     )
238
239 _dictionary_parser = _comma_separate_and_wrap(_kvp_parser, '{', '}', collections.OrderedDict)
240
241
242 _PARSERS = [
243     _make_constant_parser('null', None),
244     _make_constant_parser('true', True),
245     _make_constant_parser('false', False),
246     _make_integer_parser(8),
247     _make_integer_parser(16),
248     _make_integer_parser(32),
249     _make_integer_parser(64),
250     _binary32_parser,
251     _binary64_parser,
252     _binary_parser,
253     _make_utf_parser('utf8'),
254     _make_utf_parser('utf16'),
255     _make_utf_parser('utf32'),
256     _list_parser,
257     _dictionary_parser,
258 ]
259
260 def _parse(parser, source):
261     result = parser(source)
262
263     if result.success:
264         if result.remaining.strip() == '':
265             return result.value
266
267         raise Exception('Unparsed trailing characters: "{}"'.format(result.remaining))
268
269     raise Exception('Unable to parse: "{}"'.format(source))
270
271 def deserialize(s):
272     return _parse(_object_parser, s)