Parse constants and integers with leading whitespace
[ton] / don / string.py
1 import binascii
2 import collections
3 import functools
4 import re
5
6 from don import tags, _shared
7
8 def _integer_size_to_string_serializer(integer_size):
9     minimum = -(2 ** (integer_size - 1))
10     maximum = 2 ** (integer_size - 1) - 1
11     
12     def serializer(integer):
13         assert minimum <= integer and integer <= maximum
14         return '{}i{}'.format(integer, integer_size)
15
16     return serializer
17
18 def _serialize_float(f):
19     return '{}f'.format(f)
20
21 def _serialize_double(d):
22     return '{}d'.format(d)
23
24 def _serialize_binary(b):
25     return '"{}"b'.format(binascii.hexlify(b).decode('ascii'))
26
27 def _utf_encoding_to_serializer(utf_encoding):
28     def serializer(s):
29         return '"{}"{}'.format(s, utf_encoding)
30
31     return serializer
32
33 def _string_serialize_list(l):
34     return '[{}]'.format(', '.join(map(serialize, l)))
35
36 def _string_serialize_dictionary(d):
37     def serialize_kvp(kvp):
38         return serialize(kvp[0]) + ': ' + serialize(kvp[1])
39     return '{ ' + ', '.join(map(serialize_kvp, d.items())) + ' }'
40
41 _STRING_SERIALIZERS = {
42     tags.VOID: lambda o: 'null',
43     tags.TRUE: lambda o: 'true',
44     tags.FALSE: lambda o: 'false',
45     tags.INT8: _integer_size_to_string_serializer(8),
46     tags.INT16: _integer_size_to_string_serializer(16),
47     tags.INT32: _integer_size_to_string_serializer(32),
48     tags.INT64: _integer_size_to_string_serializer(64),
49     tags.FLOAT: _serialize_float,
50     tags.DOUBLE: _serialize_double,
51     tags.BINARY: _serialize_binary,
52     tags.UTF8: _utf_encoding_to_serializer('utf8'),
53     tags.UTF16: _utf_encoding_to_serializer('utf16'),
54     tags.UTF32: _utf_encoding_to_serializer('utf32'),
55     tags.LIST: _string_serialize_list,
56     tags.DICTIONARY: _string_serialize_dictionary,
57 }
58
59 def serialize(o):
60     o = tags._tag(o)
61     
62     return _STRING_SERIALIZERS[o.tag](o.value)
63
64 def _consume_leading_whitespace(wrapped_parser):
65     @functools.wraps(wrapped_parser)
66     def parser(s):
67         s = s.lstrip()
68         return wrapped_parser(s)
69
70     return parser
71
72 def _make_constant_parser(constant, value):
73     @_consume_leading_whitespace
74     def constant_parser(s):
75         if s.startswith(constant):
76             result = _shared.ParseResult(
77                 success = True,
78                 value = value,
79                 remaining = s[len(constant):],
80             )
81             return result
82
83         return _shared._FAILED_PARSE_RESULT
84
85     return constant_parser
86
87 def _make_integer_parser(width):
88     matcher = re.compile(r'(-?\d+)i' + str(width))
89
90     @_consume_leading_whitespace
91     def integer_parser(s):
92         match = matcher.match(s)
93
94         if match:
95             # TODO Validate that the integer is in range
96             return _shared.ParseResult(
97                 success = True,
98                 value = int(match.group(1)),
99                 remaining = s[match.end():],
100             )
101
102         return _shared._FAILED_PARSE_RESULT
103
104     return integer_parser
105
106 _BINARY32_MATCHER = re.compile(r'(-?\d+\.\d+)f')
107 _BINARY64_MATCHER = re.compile(r'(-?\d+\.\d+)d')
108
109 def _binary32_parser(s):
110     match = _BINARY32_MATCHER.match(s)
111
112     if match:
113         # TODO Validate that the float is in range
114         return _shared.ParseResult(
115             success = True,
116             value = float(match.group(1)),
117             remaining = s[match.end():],
118         )
119
120     return _shared._FAILED_PARSE_RESULT
121
122 def _binary64_parser(s):
123     match = _BINARY64_MATCHER.match(s)
124
125     if match:
126         # TODO Validate that the double is in range
127         return _shared.ParseResult(
128             success = True,
129             value = float(match.group(1)),
130             remaining = s[match.end():],
131         )
132
133     return _shared._FAILED_PARSE_RESULT
134
135 _BINARY_MATCHER = re.compile(r'"([\da-f]*)"b')
136
137 def _binary_parser(s):
138     match = _BINARY_MATCHER.match(s)
139
140     if match:
141         return _shared.ParseResult(
142             success = True,
143             value = binascii.unhexlify(match.group(1)),
144             remaining = s[match.end():],
145         )
146
147     return _shared._FAILED_PARSE_RESULT
148
149 def _make_utf_parser(encoding):
150     matcher = re.compile(r'"(.*?)"' + encoding)
151
152     def parser(s):
153         match = matcher.match(s)
154
155         if match:
156             return _shared.ParseResult(
157                 success = True,
158                 value = match.group(1),
159                 remaining = s[match.end():],
160             )
161
162         return _shared._FAILED_PARSE_RESULT
163
164     return parser
165
166 def _prefix_with_comma(parser):
167     def wrapped(s):
168         if s.startswith(','):
169             s = s[1:]
170
171             result = parser(s)
172             if not result.success:
173                 raise Exception('Trailing comma before "{}"'.format(s))
174
175             return result
176
177         return _shared._FAILED_PARSE_RESULT
178
179     return wrapped
180
181 def _comma_separate_and_wrap(wrapped_parser, start_wrap, end_wrap, typecaster):
182     parser_prefixed_with_comma = _prefix_with_comma(wrapped_parser)
183
184     def parser(s):
185         if s.startswith(start_wrap):
186             s = s[1:]
187         else:
188             return _shared._FAILED_PARSE_RESULT
189
190         value = []
191         first = True
192
193         parse_result = wrapped_parser(s)
194
195         while parse_result.success:
196             value.append(parse_result.value)
197             s = parse_result.remaining
198             parse_result = parser_prefixed_with_comma(s)
199
200         if s.startswith(end_wrap):
201             s = s[1:]
202         else:
203             return _shared._FAILED_PARSE_RESULT
204
205         return _shared.ParseResult(
206             success = True,
207             value = typecaster(value),
208             remaining = s,
209         )
210
211     return parser
212
213 # This uses _PARSERS which has not been defined yet, but is defined here so it can be used in
214 # the definition of _list_parser
215 def _object_parser(source):
216     for parser in _PARSERS:
217         result = parser(source)
218
219         if result.success:
220             return result
221
222     return _shared._FAILED_PARSE_RESULT
223
224 _list_parser = _comma_separate_and_wrap(_object_parser, '[', ']', list)
225
226 def _kvp_parser(s):
227     key_parse_result = _object_parser(s)
228     if key_parse_result.success:
229         s = key_parse_result.remaining
230     else:
231         return _shared._FAILED_PARSE_RESULT
232
233     if s.startswith(':'):
234         s = s[1:]
235     else:
236         return _shared._FAILED_PARSE_RESULT
237
238     value_parse_result = _object_parser(s)
239     if value_parse_result.success:
240         s = value_parse_result.remaining
241     else:
242         return _shared._FAILED_PARSE_RESULT
243
244     return _shared.ParseResult(
245         success = True,
246         value = (key_parse_result.value, value_parse_result.value),
247         remaining = s,
248     )
249
250 _dictionary_parser = _comma_separate_and_wrap(_kvp_parser, '{', '}', collections.OrderedDict)
251
252
253 _PARSERS = [
254     _make_constant_parser('null', None),
255     _make_constant_parser('true', True),
256     _make_constant_parser('false', False),
257     _make_integer_parser(8),
258     _make_integer_parser(16),
259     _make_integer_parser(32),
260     _make_integer_parser(64),
261     _binary32_parser,
262     _binary64_parser,
263     _binary_parser,
264     _make_utf_parser('utf8'),
265     _make_utf_parser('utf16'),
266     _make_utf_parser('utf32'),
267     _list_parser,
268     _dictionary_parser,
269 ]
270
271 def _parse(parser, source):
272     result = parser(source)
273
274     if result.success:
275         if result.remaining.strip() == '':
276             return result.value
277
278         raise Exception('Unparsed trailing characters: "{}"'.format(result.remaining))
279
280     raise Exception('Unable to parse: "{}"'.format(source))
281
282 def deserialize(s):
283     return _parse(_object_parser, s)