Skip to content

Commit a2e53ab

Browse files
elpransElvis Pranskevichus
authored and
Elvis Pranskevichus
committed
Add support for the text I/O fallback for arrays of unknown types.
Currently, asyncpg supports falling back to text I/O for types that do not have a registered codec. This commit extends this behaviour to arrays of such types. Additionally, add an explicit error for when the text I/O is attempted for a range or a composite type, as these are not supported yet. Fixes: #82.
1 parent b329d02 commit a2e53ab

File tree

8 files changed

+323
-43
lines changed

8 files changed

+323
-43
lines changed

asyncpg/protocol/buffer.pxd

+1-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ cdef class WriteBuffer:
4848
cdef write_bytes(self, bytes data)
4949
cdef write_bytestring(self, bytes string)
5050
cdef write_str(self, str string, str encoding)
51-
cdef write_cstr(self, char *data, ssize_t len)
51+
cdef write_cstr(self, const char *data, ssize_t len)
5252
cdef write_int16(self, int16_t i)
5353
cdef write_int32(self, int32_t i)
5454
cdef write_int64(self, int64_t i)

asyncpg/protocol/buffer.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ cdef class WriteBuffer:
169169
cdef write_str(self, str string, str encoding):
170170
self.write_bytestring(string.encode(encoding))
171171

172-
cdef write_cstr(self, char *data, ssize_t len):
172+
cdef write_cstr(self, const char *data, ssize_t len):
173173
self._check_readonly()
174174
self._ensure_alloced(len)
175175

asyncpg/protocol/codecs/array.pyx

+121
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,127 @@ cdef inline array_encode(ConnectionSettings settings, WriteBuffer buf,
140140
buf.write_buffer(elem_data)
141141

142142

143+
cdef _write_textarray_data(ConnectionSettings settings, object obj,
144+
int32_t ndims, int32_t dim, WriteBuffer array_data,
145+
encode_func_ex encoder, const void *encoder_arg,
146+
Py_UCS4 typdelim):
147+
cdef:
148+
ssize_t i = 0
149+
int8_t delim = <int8_t>typdelim
150+
WriteBuffer elem_data
151+
Py_buffer pybuf
152+
const char *elem_str
153+
char ch
154+
ssize_t elem_len
155+
ssize_t quoted_elem_len
156+
bint need_quoting
157+
158+
array_data.write_byte(b'{')
159+
160+
if dim < ndims - 1:
161+
for item in obj:
162+
if i > 0:
163+
array_data.write_byte(delim)
164+
array_data.write_byte(b' ')
165+
_write_textarray_data(settings, item, ndims, dim + 1, array_data,
166+
encoder, encoder_arg, typdelim)
167+
i += 1
168+
else:
169+
for item in obj:
170+
elem_data = WriteBuffer.new()
171+
172+
if i > 0:
173+
array_data.write_byte(delim)
174+
array_data.write_byte(b' ')
175+
176+
if item is None:
177+
array_data.write_bytes(b'NULL')
178+
i += 1
179+
continue
180+
else:
181+
try:
182+
encoder(settings, elem_data, item, encoder_arg)
183+
except TypeError as e:
184+
raise ValueError(
185+
'invalid array element: {}'.format(
186+
e.args[0])) from None
187+
188+
# element string length (first four bytes are the encoded length.)
189+
elem_len = elem_data.len() - 4
190+
191+
if elem_len == 0:
192+
# Empty string
193+
array_data.write_bytes(b'""')
194+
else:
195+
cpython.PyObject_GetBuffer(
196+
elem_data, &pybuf, cpython.PyBUF_SIMPLE)
197+
198+
elem_str = <const char*>(pybuf.buf) + 4
199+
200+
try:
201+
if not apg_strcasecmp_char(elem_str, b'NULL'):
202+
array_data.write_bytes(b'"NULL"')
203+
else:
204+
quoted_elem_len = elem_len
205+
need_quoting = False
206+
207+
for i in range(elem_len):
208+
ch = elem_str[i]
209+
if ch == b'"' or ch == b'\\':
210+
# Quotes and backslashes need escaping.
211+
quoted_elem_len += 1
212+
need_quoting = True
213+
elif (ch == b'{' or ch == b'}' or ch == delim or
214+
apg_ascii_isspace(<uint32_t>ch)):
215+
need_quoting = True
216+
217+
if need_quoting:
218+
array_data.write_byte(b'"')
219+
220+
if quoted_elem_len == elem_len:
221+
array_data.write_cstr(elem_str, elem_len)
222+
else:
223+
# Escaping required.
224+
for i in range(elem_len):
225+
ch = elem_str[i]
226+
if ch == b'"' or ch == b'\\':
227+
array_data.write_byte(b'\\')
228+
array_data.write_byte(ch)
229+
230+
array_data.write_byte(b'"')
231+
else:
232+
array_data.write_cstr(elem_str, elem_len)
233+
finally:
234+
cpython.PyBuffer_Release(&pybuf)
235+
236+
i += 1
237+
238+
array_data.write_byte(b'}')
239+
240+
241+
cdef inline textarray_encode(ConnectionSettings settings, WriteBuffer buf,
242+
object obj, encode_func_ex encoder,
243+
const void *encoder_arg, Py_UCS4 typdelim):
244+
cdef:
245+
WriteBuffer array_data
246+
int32_t dims[ARRAY_MAXDIM]
247+
int32_t ndims = 1
248+
int32_t i
249+
250+
if not _is_container(obj):
251+
raise TypeError(
252+
'a non-trivial iterable expected (got type {!r})'.format(
253+
type(obj).__name__))
254+
255+
_get_array_shape(obj, dims, &ndims)
256+
257+
array_data = WriteBuffer.new()
258+
_write_textarray_data(settings, obj, ndims, 0, array_data,
259+
encoder, encoder_arg, typdelim)
260+
buf.write_int32(array_data.len())
261+
buf.write_buffer(array_data)
262+
263+
143264
cdef inline array_decode(ConnectionSettings settings, FastReadBuffer buf,
144265
decode_func_ex decoder, const void *decoder_arg):
145266
cdef:

asyncpg/protocol/codecs/base.pxd

+4
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,9 @@ cdef class Codec:
8181
cdef encode_array(self, ConnectionSettings settings, WriteBuffer buf,
8282
object obj)
8383

84+
cdef encode_array_text(self, ConnectionSettings settings, WriteBuffer buf,
85+
object obj)
86+
8487
cdef encode_range(self, ConnectionSettings settings, WriteBuffer buf,
8588
object obj)
8689

@@ -137,6 +140,7 @@ cdef class Codec:
137140
cdef Codec new_composite_codec(uint32_t oid,
138141
str name,
139142
str schema,
143+
CodecFormat format,
140144
list element_codecs,
141145
tuple element_type_oids,
142146
object element_names)

asyncpg/protocol/codecs/base.pyx

+76-36
Original file line numberDiff line numberDiff line change
@@ -50,15 +50,24 @@ cdef class Codec:
5050
self.encoder = <codec_encode_func>&self.encode_scalar
5151
self.decoder = <codec_decode_func>&self.decode_scalar
5252
elif type == CODEC_ARRAY:
53-
self.encoder = <codec_encode_func>&self.encode_array
5453
if format == PG_FORMAT_BINARY:
54+
self.encoder = <codec_encode_func>&self.encode_array
5555
self.decoder = <codec_decode_func>&self.decode_array
5656
else:
57+
self.encoder = <codec_encode_func>&self.encode_array_text
5758
self.decoder = <codec_decode_func>&self.decode_array_text
5859
elif type == CODEC_RANGE:
60+
if format != PG_FORMAT_BINARY:
61+
raise RuntimeError(
62+
'cannot encode type "{}"."{}": text encoding of '
63+
'range types is not supported'.format(schema, name))
5964
self.encoder = <codec_encode_func>&self.encode_range
6065
self.decoder = <codec_decode_func>&self.decode_range
6166
elif type == CODEC_COMPOSITE:
67+
if format != PG_FORMAT_BINARY:
68+
raise RuntimeError(
69+
'cannot encode type "{}"."{}": text encoding of '
70+
'composite types is not supported'.format(schema, name))
6271
self.encoder = <codec_encode_func>&self.encode_composite
6372
self.decoder = <codec_decode_func>&self.decode_composite
6473
elif type == CODEC_PY:
@@ -91,6 +100,13 @@ cdef class Codec:
91100
codec_encode_func_ex,
92101
<void*>(<cpython.PyObject>self.element_codec))
93102

103+
cdef encode_array_text(self, ConnectionSettings settings, WriteBuffer buf,
104+
object obj):
105+
return textarray_encode(settings, buf, obj,
106+
codec_encode_func_ex,
107+
<void*>(<cpython.PyObject>self.element_codec),
108+
self.element_delimiter)
109+
94110
cdef encode_range(self, ConnectionSettings settings, WriteBuffer buf,
95111
object obj):
96112
range_encode(settings, buf, obj, self.element_codec.oid,
@@ -269,22 +285,22 @@ cdef class Codec:
269285
Codec element_codec):
270286
cdef Codec codec
271287
codec = Codec(oid)
272-
codec.init(name, schema, 'range', CODEC_RANGE, PG_FORMAT_BINARY,
273-
NULL, NULL, None, None, element_codec, None, None, None,
274-
0)
288+
codec.init(name, schema, 'range', CODEC_RANGE, element_codec.format,
289+
NULL, NULL, None, None, element_codec, None, None, None, 0)
275290
return codec
276291

277292
@staticmethod
278293
cdef Codec new_composite_codec(uint32_t oid,
279294
str name,
280295
str schema,
296+
CodecFormat format,
281297
list element_codecs,
282298
tuple element_type_oids,
283299
object element_names):
284300
cdef Codec codec
285301
codec = Codec(oid)
286302
codec.init(name, schema, 'composite', CODEC_COMPOSITE,
287-
PG_FORMAT_BINARY, NULL, NULL, None, None, None,
303+
format, NULL, NULL, None, None, None,
288304
element_type_oids, element_names, element_codecs, 0)
289305
return codec
290306

@@ -368,11 +384,12 @@ cdef class DataCodecConfig:
368384
elem_format = PG_FORMAT_BINARY
369385
else:
370386
elem_format = PG_FORMAT_TEXT
387+
371388
elem_codec = self.get_codec(array_element_oid, elem_format)
372389
if elem_codec is None:
373-
raise RuntimeError(
374-
'no codec for array element type {}'.format(
375-
array_element_oid))
390+
elem_format = PG_FORMAT_TEXT
391+
elem_codec = self.declare_fallback_codec(
392+
array_element_oid, name, schema)
376393

377394
elem_delim = <Py_UCS4>ti['elemdelim'][0]
378395

@@ -410,9 +427,8 @@ cdef class DataCodecConfig:
410427

411428
self._type_codecs_cache[oid, format] = \
412429
Codec.new_composite_codec(
413-
oid, name, schema, comp_elem_codecs,
414-
comp_type_attrs,
415-
element_names)
430+
oid, name, schema, format, comp_elem_codecs,
431+
comp_type_attrs, element_names)
416432

417433
elif ti['kind'] == b'd':
418434
# Domain type
@@ -424,8 +440,9 @@ cdef class DataCodecConfig:
424440

425441
elem_codec = self.get_codec(base_type, format)
426442
if elem_codec is None:
427-
raise RuntimeError(
428-
'no codec for domain base type {}'.format(base_type))
443+
format = PG_FORMAT_TEXT
444+
elem_codec = self.declare_fallback_codec(
445+
base_type, name, schema)
429446

430447
self._type_codecs_cache[oid, format] = elem_codec
431448

@@ -441,34 +458,18 @@ cdef class DataCodecConfig:
441458
elem_format = PG_FORMAT_BINARY
442459
else:
443460
elem_format = PG_FORMAT_TEXT
461+
444462
elem_codec = self.get_codec(range_subtype_oid, elem_format)
445463
if elem_codec is None:
446-
raise RuntimeError(
447-
'no codec for range element type {}'.format(
448-
range_subtype_oid))
464+
elem_format = PG_FORMAT_TEXT
465+
elem_codec = self.declare_fallback_codec(
466+
range_subtype_oid, name, schema)
449467

450468
self._type_codecs_cache[oid, elem_format] = \
451469
Codec.new_range_codec(oid, name, schema, elem_codec)
452470

453471
else:
454-
if oid <= MAXBUILTINOID:
455-
# This is a non-BKI type, for which ayncpg has no
456-
# defined codec. This should only happen for newly
457-
# added builtin types, for which this version of
458-
# asyncpg is lacking support.
459-
#
460-
raise NotImplementedError(
461-
'unhandled standard data type {!r} (OID {})'.format(
462-
name, oid))
463-
else:
464-
# This is a non-BKI type, and as such, has no
465-
# stable OID, so no possibility of a builtin codec.
466-
# In this case, fallback to text format. Applications
467-
# can avoid this by specifying a codec for this type
468-
# using Connection.set_type_codec().
469-
#
470-
self.set_builtin_type_codec(oid, name, schema, 'scalar',
471-
UNKNOWNOID)
472+
self.declare_fallback_codec(oid, name, schema)
472473

473474
def add_python_codec(self, typeoid, typename, typeschema, typekind,
474475
encoder, decoder, binary):
@@ -478,13 +479,20 @@ cdef class DataCodecConfig:
478479
Codec.new_python_codec(typeoid, typename, typeschema, typekind,
479480
encoder, decoder, format)
480481

482+
self.clear_type_cache()
483+
481484
def set_builtin_type_codec(self, typeoid, typename, typeschema, typekind,
482-
alias_to):
485+
alias_to, format=PG_FORMAT_ANY):
483486
cdef:
484487
Codec codec
485488
Codec target_codec
486489

487-
for format in (PG_FORMAT_BINARY, PG_FORMAT_TEXT):
490+
if format == PG_FORMAT_ANY:
491+
formats = (PG_FORMAT_BINARY, PG_FORMAT_TEXT)
492+
else:
493+
formats = (format,)
494+
495+
for format in formats:
488496
if self.get_codec(typeoid, format) is not None:
489497
raise ValueError('cannot override codec for type {}'.format(
490498
typeoid))
@@ -509,9 +517,41 @@ cdef class DataCodecConfig:
509517
(typeoid, PG_FORMAT_TEXT) not in self._local_type_codecs):
510518
raise ValueError('unknown alias target: {}'.format(alias_to))
511519

520+
self.clear_type_cache()
521+
512522
def clear_type_cache(self):
513523
self._type_codecs_cache.clear()
514524

525+
def declare_fallback_codec(self, uint32_t oid, str name, str schema):
526+
cdef Codec codec
527+
528+
codec = self.get_codec(oid, PG_FORMAT_TEXT)
529+
if codec is not None:
530+
return codec
531+
532+
if oid <= MAXBUILTINOID:
533+
# This is a BKI type, for which ayncpg has no
534+
# defined codec. This should only happen for newly
535+
# added builtin types, for which this version of
536+
# asyncpg is lacking support.
537+
#
538+
raise NotImplementedError(
539+
'unhandled standard data type {!r} (OID {})'.format(
540+
name, oid))
541+
else:
542+
# This is a non-BKI type, and as such, has no
543+
# stable OID, so no possibility of a builtin codec.
544+
# In this case, fallback to text format. Applications
545+
# can avoid this by specifying a codec for this type
546+
# using Connection.set_type_codec().
547+
#
548+
self.set_builtin_type_codec(oid, name, schema, 'scalar',
549+
TEXTOID, PG_FORMAT_TEXT)
550+
551+
codec = self.get_codec(oid, PG_FORMAT_TEXT)
552+
553+
return codec
554+
515555
cdef inline Codec get_codec(self, uint32_t oid, CodecFormat format):
516556
cdef Codec codec
517557

asyncpg/protocol/codecs/text.pyx

+4
Original file line numberDiff line numberDiff line change
@@ -63,5 +63,9 @@ cdef init_text_codecs():
6363
<decode_func>&text_decode,
6464
PG_FORMAT_BINARY)
6565

66+
register_core_codec(oid,
67+
<encode_func>&text_encode,
68+
<decode_func>&text_decode,
69+
PG_FORMAT_TEXT)
6670

6771
init_text_codecs()

0 commit comments

Comments
 (0)