Serialization/deserialization for lists
[sandbox] / serial / serial / text.py
1 import binascii
2 import re
3
4 from . import tags
5
6 def _make_literal_serializer(expected_value, literal):
7     def serializer(to):
8         assert to.instance is expected_value
9         return literal
10
11     return serializer
12
13 def _make_integer_serializer(lower_bound, upper_bound, suffix):
14     def _serializer(to):
15         assert lower_bound <= to.instance and to.instance < upper_bound
16         return '{}{}'.format(to.instance, suffix)
17
18     return _serializer
19
20 def _make_unsigned_integer_serializer(bit_length):
21     return _make_integer_serializer(0, 2 << (bit_length - 1), 'u{}'.format(bit_length))
22
23 def _make_signed_integer_serializer(bit_length):
24     upper_bound = 2 << (bit_length - 2)
25     lower_bound = -upper_bound
26     return _make_integer_serializer(lower_bound, upper_bound, 'i{}'.format(bit_length))
27
28 def _serialize_binary(to):
29     return 'bin"{}"'.format(binascii.hexlify(to.instance).decode('ascii'))
30
31 _ESCAPES = {
32     '\\': '\\\\',
33     '"': '\\"',
34 }
35
36 def _escape_character(ch):
37     return _ESCAPES.get(ch, ch)
38
39 def _escape(s):
40     return ''.join(_escape_character(ch) for ch in s)
41
42 def _make_string_serializer(prefix):
43     def serializer(to):
44         assert isinstance(to.instance, str)
45         return '{}"{}"'.format(prefix, _escape(to.instance))
46
47     return serializer
48
49 def _indent(s, depth = 2):
50     return '\n'.join(' ' * depth + line for line in s.split('\n'))
51
52 def _serialize_list(to):
53     assert isinstance(to.instance, list)
54
55     return '[\n' + _indent(',\n'.join(serialize(i) for i in to.instance)) + '\n]'
56
57 _SERIALIZERS = {
58     tags.NULL: _make_literal_serializer(None, 'null'),
59     tags.TRUE: _make_literal_serializer(True, 'true'),
60     tags.FALSE: _make_literal_serializer(False, 'false'),
61     tags.UINT8: _make_unsigned_integer_serializer(8),
62     tags.UINT16: _make_unsigned_integer_serializer(16),
63     tags.UINT32: _make_unsigned_integer_serializer(32),
64     tags.UINT64: _make_unsigned_integer_serializer(64),
65     tags.INT8: _make_signed_integer_serializer(8),
66     tags.INT16: _make_signed_integer_serializer(16),
67     tags.INT32: _make_signed_integer_serializer(32),
68     tags.INT64: _make_signed_integer_serializer(64),
69     tags.BINARY: _serialize_binary,
70     tags.UTF8: _make_string_serializer('utf8'),
71     tags.UTF16: _make_string_serializer('utf16'),
72     tags.UTF32: _make_string_serializer('utf32'),
73     tags.LIST: _serialize_list,
74 }
75
76 def serialize(to):
77     return _SERIALIZERS[to.tag](to)
78
79 def _make_literal_deserializer(tag, instance, literal):
80     def _deserializer(s):
81         if s.startswith(literal):
82             return True, tags.TaggedObject(tag = tag, instance = instance), s[len(literal):]
83
84         return False, None, None
85
86     return _deserializer
87
88 def _make_regex_deserializer(tag, decoder, regex):
89     matcher = re.compile(regex).match
90
91     def _deserializer(s):
92         match = matcher(s)
93
94         if match is None:
95             return False, None, None
96
97         return True, tags.TaggedObject(tag = tag, instance = decoder(match)), s[match.end():]
98
99     return _deserializer
100
101 def _make_unsigned_int_deserializer(tag, bit_length):
102     bound = 2 << (bit_length - 1)
103
104     def _decoder(match):
105         result = int(match.group(1))
106         assert result < bound
107         return result
108
109     return _make_regex_deserializer(tag, _decoder, r'(\d+)' + 'u{}'.format(bit_length))
110
111 def _make_signed_int_deserializer(tag, bit_length):
112     upper_bound = 2 << (bit_length - 2)
113     lower_bound = -upper_bound
114
115     def _decoder(match):
116         result = int(match.group(1))
117         assert lower_bound <= result and result < upper_bound
118         return result
119
120     return _make_regex_deserializer(tag, _decoder, r'(-?\d+)' + 'i{}'.format(bit_length))
121
122 _BINARY_MATCHER = re.compile(r'bin"([\da-f]*)"').match
123
124 def _deserialize_binary(s):
125     match = _BINARY_MATCHER(s)
126
127     if match is None:
128         return False, None, None
129
130     result = tags.TaggedObject(
131         tag = tags.BINARY,
132         instance = binascii.unhexlify(match.group(1)),
133     )
134
135     return True, result, s[match.end():]
136
137 def _make_string_matcher(prefix):
138     return re.compile(prefix + r'"(([^"]|\\.)*)"').match
139
140 _UNESCAPE_CHARACTERS = {
141     '\\': '\\',
142     '"': '"',
143 }
144
145 def _unescape_character(ch):
146     return _UNESCAPE_CHARACTERS[ch]
147
148 def _unescape(s):
149     characters = []
150     escaping = False
151
152     for ch in s:
153         if escaping:
154             characters.append(_unescape_character(ch))
155             escaping = False
156
157         elif ch == '\\':
158             escaping = True
159
160         else:
161             characters.append(ch)
162
163     return ''.join(characters)
164
165 def _make_string_deserializer(tag, prefix):
166     matcher = _make_string_matcher(prefix)
167
168     def deserializer(s):
169         match = matcher(s)
170
171         if match is None:
172             return False, None, None
173
174         result = tags.TaggedObject(
175             tag = tag,
176             instance = _unescape(match.group(1)),
177         )
178
179         return True, result, s[match.end():]
180
181     return deserializer
182
183 def _deserialize_list(s):
184     s = s.lstrip()
185
186     if not s.startswith('['):
187         return False, None, None
188
189     instance = []
190
191     s = s[1:].lstrip()
192
193     succeeded, result, s = _deserialize_one(s)
194
195     # TODO Handle empty lists
196
197     if succeeded:
198         instance.append(result)
199
200     s = s.lstrip()
201
202     while not s.startswith(']'):
203         assert s.startswith(',')
204         s = s[1:].lstrip()
205
206         succeeded, result, s = _deserialize_one(s)
207
208         # TODO Handle trailing commas
209         assert succeeded
210         instance.append(result)
211         s = s.lstrip()
212
213     assert s.startswith(']')
214     return True, tags.TaggedObject(tag = tags.LIST, instance = instance), s[1:]
215
216 _DESERIALIZERS = [
217     _make_literal_deserializer(tags.NULL, None, 'null'),
218     _make_literal_deserializer(tags.TRUE, True, 'true'),
219     _make_literal_deserializer(tags.FALSE, False, 'false'),
220     _make_unsigned_int_deserializer(tags.UINT8, 8),
221     _make_unsigned_int_deserializer(tags.UINT16, 16),
222     _make_unsigned_int_deserializer(tags.UINT32, 32),
223     _make_unsigned_int_deserializer(tags.UINT64, 64),
224     _make_signed_int_deserializer(tags.INT8, 8),
225     _make_signed_int_deserializer(tags.INT16, 16),
226     _make_signed_int_deserializer(tags.INT32, 32),
227     _make_signed_int_deserializer(tags.INT64, 64),
228     _deserialize_binary,
229     _make_string_deserializer(tags.UTF8, 'utf8'),
230     _make_string_deserializer(tags.UTF16, 'utf16'),
231     _make_string_deserializer(tags.UTF32, 'utf32'),
232     _deserialize_list,
233 ]
234
235 def _deserialize_one(s):
236     for deserializer in _DESERIALIZERS:
237         succeeded, result, remaining = deserializer(s)
238
239         if succeeded:
240             return succeeded, result, remaining
241
242     return False, None, None
243
244 def deserialize(s):
245     succeeded, result, remaining = _deserialize_one(s)
246
247     assert succeeded
248     assert remaining == ''
249
250     return result