Parse floats/doubles with leading whitespace
[ton] / don / string.py
1 import binascii
2 import collections
3 import functools
4 import re
5
6 from don import tags, _shared
7
8 def _integer_size_to_string_serializer(integer_size):
9     minimum = -(2 ** (integer_size - 1))
10     maximum = 2 ** (integer_size - 1) - 1
11     
12     def serializer(integer):
13         assert minimum <= integer and integer <= maximum
14         return '{}i{}'.format(integer, integer_size)
15
16     return serializer
17
18 def _serialize_float(f):
19     return '{}f'.format(f)
20
21 def _serialize_double(d):
22     return '{}d'.format(d)
23
24 def _serialize_binary(b):
25     return '"{}"b'.format(binascii.hexlify(b).decode('ascii'))
26
27 def _utf_encoding_to_serializer(utf_encoding):
28     def serializer(s):
29         return '"{}"{}'.format(s, utf_encoding)
30
31     return serializer
32
33 def _string_serialize_list(l):
34     return '[{}]'.format(', '.join(map(serialize, l)))
35
36 def _string_serialize_dictionary(d):
37     def serialize_kvp(kvp):
38         return serialize(kvp[0]) + ': ' + serialize(kvp[1])
39     return '{ ' + ', '.join(map(serialize_kvp, d.items())) + ' }'
40
41 _STRING_SERIALIZERS = {
42     tags.VOID: lambda o: 'null',
43     tags.TRUE: lambda o: 'true',
44     tags.FALSE: lambda o: 'false',
45     tags.INT8: _integer_size_to_string_serializer(8),
46     tags.INT16: _integer_size_to_string_serializer(16),
47     tags.INT32: _integer_size_to_string_serializer(32),
48     tags.INT64: _integer_size_to_string_serializer(64),
49     tags.FLOAT: _serialize_float,
50     tags.DOUBLE: _serialize_double,
51     tags.BINARY: _serialize_binary,
52     tags.UTF8: _utf_encoding_to_serializer('utf8'),
53     tags.UTF16: _utf_encoding_to_serializer('utf16'),
54     tags.UTF32: _utf_encoding_to_serializer('utf32'),
55     tags.LIST: _string_serialize_list,
56     tags.DICTIONARY: _string_serialize_dictionary,
57 }
58
59 def serialize(o):
60     o = tags._tag(o)
61     
62     return _STRING_SERIALIZERS[o.tag](o.value)
63
64 def _consume_leading_whitespace(wrapped_parser):
65     @functools.wraps(wrapped_parser)
66     def parser(s):
67         s = s.lstrip()
68         return wrapped_parser(s)
69
70     return parser
71
72 def _make_constant_parser(constant, value):
73     @_consume_leading_whitespace
74     def constant_parser(s):
75         if s.startswith(constant):
76             result = _shared.ParseResult(
77                 success = True,
78                 value = value,
79                 remaining = s[len(constant):],
80             )
81             return result
82
83         return _shared._FAILED_PARSE_RESULT
84
85     return constant_parser
86
87 def _make_integer_parser(width):
88     matcher = re.compile(r'(-?\d+)i' + str(width))
89
90     @_consume_leading_whitespace
91     def integer_parser(s):
92         match = matcher.match(s)
93
94         if match:
95             # TODO Validate that the integer is in range
96             return _shared.ParseResult(
97                 success = True,
98                 value = int(match.group(1)),
99                 remaining = s[match.end():],
100             )
101
102         return _shared._FAILED_PARSE_RESULT
103
104     return integer_parser
105
106 _BINARY32_MATCHER = re.compile(r'(-?\d+\.\d+)f')
107 _BINARY64_MATCHER = re.compile(r'(-?\d+\.\d+)d')
108
109 @_consume_leading_whitespace
110 def _binary32_parser(s):
111     match = _BINARY32_MATCHER.match(s)
112
113     if match:
114         # TODO Validate that the float is in range
115         return _shared.ParseResult(
116             success = True,
117             value = float(match.group(1)),
118             remaining = s[match.end():],
119         )
120
121     return _shared._FAILED_PARSE_RESULT
122
123 @_consume_leading_whitespace
124 def _binary64_parser(s):
125     match = _BINARY64_MATCHER.match(s)
126
127     if match:
128         # TODO Validate that the double is in range
129         return _shared.ParseResult(
130             success = True,
131             value = float(match.group(1)),
132             remaining = s[match.end():],
133         )
134
135     return _shared._FAILED_PARSE_RESULT
136
137 _BINARY_MATCHER = re.compile(r'"([\da-f]*)"b')
138
139 def _binary_parser(s):
140     match = _BINARY_MATCHER.match(s)
141
142     if match:
143         return _shared.ParseResult(
144             success = True,
145             value = binascii.unhexlify(match.group(1)),
146             remaining = s[match.end():],
147         )
148
149     return _shared._FAILED_PARSE_RESULT
150
151 def _make_utf_parser(encoding):
152     matcher = re.compile(r'"(.*?)"' + encoding)
153
154     def parser(s):
155         match = matcher.match(s)
156
157         if match:
158             return _shared.ParseResult(
159                 success = True,
160                 value = match.group(1),
161                 remaining = s[match.end():],
162             )
163
164         return _shared._FAILED_PARSE_RESULT
165
166     return parser
167
168 def _prefix_with_comma(parser):
169     def wrapped(s):
170         if s.startswith(','):
171             s = s[1:]
172
173             result = parser(s)
174             if not result.success:
175                 raise Exception('Trailing comma before "{}"'.format(s))
176
177             return result
178
179         return _shared._FAILED_PARSE_RESULT
180
181     return wrapped
182
183 def _comma_separate_and_wrap(wrapped_parser, start_wrap, end_wrap, typecaster):
184     parser_prefixed_with_comma = _prefix_with_comma(wrapped_parser)
185
186     def parser(s):
187         if s.startswith(start_wrap):
188             s = s[1:]
189         else:
190             return _shared._FAILED_PARSE_RESULT
191
192         value = []
193         first = True
194
195         parse_result = wrapped_parser(s)
196
197         while parse_result.success:
198             value.append(parse_result.value)
199             s = parse_result.remaining
200             parse_result = parser_prefixed_with_comma(s)
201
202         if s.startswith(end_wrap):
203             s = s[1:]
204         else:
205             return _shared._FAILED_PARSE_RESULT
206
207         return _shared.ParseResult(
208             success = True,
209             value = typecaster(value),
210             remaining = s,
211         )
212
213     return parser
214
215 # This uses _PARSERS which has not been defined yet, but is defined here so it can be used in
216 # the definition of _list_parser
217 def _object_parser(source):
218     for parser in _PARSERS:
219         result = parser(source)
220
221         if result.success:
222             return result
223
224     return _shared._FAILED_PARSE_RESULT
225
226 _list_parser = _comma_separate_and_wrap(_object_parser, '[', ']', list)
227
228 def _kvp_parser(s):
229     key_parse_result = _object_parser(s)
230     if key_parse_result.success:
231         s = key_parse_result.remaining
232     else:
233         return _shared._FAILED_PARSE_RESULT
234
235     if s.startswith(':'):
236         s = s[1:]
237     else:
238         return _shared._FAILED_PARSE_RESULT
239
240     value_parse_result = _object_parser(s)
241     if value_parse_result.success:
242         s = value_parse_result.remaining
243     else:
244         return _shared._FAILED_PARSE_RESULT
245
246     return _shared.ParseResult(
247         success = True,
248         value = (key_parse_result.value, value_parse_result.value),
249         remaining = s,
250     )
251
252 _dictionary_parser = _comma_separate_and_wrap(_kvp_parser, '{', '}', collections.OrderedDict)
253
254
255 _PARSERS = [
256     _make_constant_parser('null', None),
257     _make_constant_parser('true', True),
258     _make_constant_parser('false', False),
259     _make_integer_parser(8),
260     _make_integer_parser(16),
261     _make_integer_parser(32),
262     _make_integer_parser(64),
263     _binary32_parser,
264     _binary64_parser,
265     _binary_parser,
266     _make_utf_parser('utf8'),
267     _make_utf_parser('utf16'),
268     _make_utf_parser('utf32'),
269     _list_parser,
270     _dictionary_parser,
271 ]
272
273 def _parse(parser, source):
274     result = parser(source)
275
276     if result.success:
277         if result.remaining.strip() == '':
278             return result.value
279
280         raise Exception('Unparsed trailing characters: "{}"'.format(result.remaining))
281
282     raise Exception('Unable to parse: "{}"'.format(source))
283
284 def deserialize(s):
285     return _parse(_object_parser, s)