Added string and list parsing
[ton] / don / string.py
1 import binascii
2 import re
3
4 from don import tags, _shared
5
6 def _integer_size_to_string_serializer(integer_size):
7     minimum = -(2 ** (integer_size - 1))
8     maximum = 2 ** (integer_size - 1) - 1
9     
10     def serializer(integer):
11         assert minimum <= integer and integer <= maximum
12         return '{}i{}'.format(integer, integer_size)
13
14     return serializer
15
16 def _serialize_float(f):
17     return '{}f'.format(f)
18
19 def _serialize_double(d):
20     return '{}d'.format(d)
21
22 def _serialize_binary(b):
23     return '"{}"b'.format(binascii.hexlify(b).decode('ascii'))
24
25 def _utf_encoding_to_serializer(utf_encoding):
26     def serializer(s):
27         return '"{}"{}'.format(s, utf_encoding)
28
29     return serializer
30
31 def _string_serialize_list(l):
32     return '[{}]'.format(', '.join(map(serialize, l)))
33
34 def _string_serialize_dictionary(d):
35     def serialize_kvp(kvp):
36         return serialize(kvp[0]) + ': ' + serialize(kvp[1])
37     return '{ ' + ', '.join(map(serialize_kvp, d.items())) + ' }'
38
39 _STRING_SERIALIZERS = {
40     tags.VOID: lambda o: 'null',
41     tags.TRUE: lambda o: 'true',
42     tags.FALSE: lambda o: 'false',
43     tags.INT8: _integer_size_to_string_serializer(8),
44     tags.INT16: _integer_size_to_string_serializer(16),
45     tags.INT32: _integer_size_to_string_serializer(32),
46     tags.INT64: _integer_size_to_string_serializer(64),
47     tags.FLOAT: _serialize_float,
48     tags.DOUBLE: _serialize_double,
49     tags.BINARY: _serialize_binary,
50     tags.UTF8: _utf_encoding_to_serializer('utf8'),
51     tags.UTF16: _utf_encoding_to_serializer('utf16'),
52     tags.UTF32: _utf_encoding_to_serializer('utf32'),
53     tags.LIST: _string_serialize_list,
54     tags.DICTIONARY: _string_serialize_dictionary,
55 }
56
57 def serialize(o):
58     o = tags._tag(o)
59     
60     return _STRING_SERIALIZERS[o.tag](o.value)
61
62 def _make_constant_parser(constant, value):
63     def parser(s):
64         if s.startswith(constant):
65             result = _shared.ParseResult(
66                 success = True,
67                 value = value,
68                 remaining = s[len(constant):],
69             )
70             return result
71
72         return _shared._FAILED_PARSE_RESULT
73
74     return parser
75
76 def _make_integer_parser(width):
77     matcher = re.compile(r'(-?\d+)i' + str(width))
78
79     def parser(s):
80         match = matcher.match(s)
81
82         if match:
83             # TODO Validate that the integer is in range
84             return _shared.ParseResult(
85                 success = True,
86                 value = int(match.group(1)),
87                 remaining = s[match.end():],
88             )
89
90         return _shared._FAILED_PARSE_RESULT
91
92     return parser
93
94 _BINARY32_MATCHER = re.compile(r'(-?\d+\.\d+)f')
95 _BINARY64_MATCHER = re.compile(r'(-?\d+\.\d+)d')
96
97 def _binary32_parser(s):
98     match = _BINARY32_MATCHER.match(s)
99
100     if match:
101         # TODO Validate that the float is in range
102         return _shared.ParseResult(
103             success = True,
104             value = float(match.group(1)),
105             remaining = s[match.end():],
106         )
107
108     return _shared._FAILED_PARSE_RESULT
109
110 def _binary64_parser(s):
111     match = _BINARY64_MATCHER.match(s)
112
113     if match:
114         # TODO Validate that the double is in range
115         return _shared.ParseResult(
116             success = True,
117             value = float(match.group(1)),
118             remaining = s[match.end():],
119         )
120
121     return _shared._FAILED_PARSE_RESULT
122
123 _BINARY_MATCHER = re.compile(r'"([\da-f]*)"b')
124
125 def _binary_parser(s):
126     match = _BINARY_MATCHER.match(s)
127
128     if match:
129         return _shared.ParseResult(
130             success = True,
131             value = binascii.unhexlify(match.group(1)),
132             remaining = s[match.end():],
133         )
134
135     return _shared._FAILED_PARSE_RESULT
136
137 def _make_utf_parser(encoding):
138     matcher = re.compile(r'"(.*?)"' + encoding)
139
140     def parser(s):
141         match = matcher.match(s)
142
143         if match:
144             return _shared.ParseResult(
145                 success = True,
146                 value = match.group(1),
147                 remaining = s[match.end():],
148             )
149
150         return _shared._FAILED_PARSE_RESULT
151
152     return parser
153
154 def _prefix_with_comma(parser):
155     def wrapped(s):
156         if s.startswith(','):
157             s = s[1:]
158
159             result = parser(s)
160             if not result.success:
161                 raise Exception('Trailing comma before "{}"'.format(s))
162
163             return result
164
165         return _shared._FAILED_PARSE_RESULT
166
167     return wrapped
168
169 def _list_parser(s):
170     # TODO Assert they are all the same type
171     if not s.startswith('['):
172         return _shared._FAILED_PARSE_RESULT
173     s = s[1:]
174
175     value = []
176
177     first = True
178     parse_result = _object_parser(s)
179
180     while parse_result.success:
181         value.append(parse_result.value)
182         s = parse_result.remaining
183         parse_result = _prefix_with_comma(_object_parser)(s)
184
185     if not s.startswith(']'):
186         return _shared._FAILED_PARSE_RESULT
187
188     return _shared.ParseResult(
189         success = True,
190         value = value,
191         remaining = s[1:],
192     )
193
194
195
196 def _dictionary_parser(s):
197     return _shared._FAILED_PARSE_RESULT
198
199
200 _PARSERS = [
201     _make_constant_parser('null', None),
202     _make_constant_parser('true', True),
203     _make_constant_parser('false', False),
204     _make_integer_parser(8),
205     _make_integer_parser(16),
206     _make_integer_parser(32),
207     _make_integer_parser(64),
208     _binary32_parser,
209     _binary64_parser,
210     _binary_parser,
211     _make_utf_parser('utf8'),
212     _make_utf_parser('utf16'),
213     _make_utf_parser('utf32'),
214     _list_parser,
215     _dictionary_parser,
216 ]
217
218 def _object_parser(source):
219     for parser in _PARSERS:
220         result = parser(source)
221
222         if result.success:
223             return result
224
225     return _shared._FAILED_PARSE_RESULT
226
227 def _parse(parser, source):
228     result = parser(source)
229
230     if result.success:
231         if result.remaining.strip() == '':
232             return result.value
233
234         raise Exception('Unparsed trailing characters: "{}"'.format(result.remaining))
235
236     raise Exception('Unable to parse: "{}"'.format(source))
237
238 def deserialize(s):
239     return _parse(_object_parser, s)