Skip to content

Commit 65498cc

Browse files
committed
schema: support encoding=None connections
Several different problems are fixed here, but all have the same root. When a connection encoding is None (it is default on Python 2 and may be set explicitly on Python 3), all mp_str values are decoded into bytes, not Unicode strings (note that bytes is alias for str in Python 2). But the database schema parsing code have assumptions that _vspace / _vindex values are Unicode strings. The resolved problems are the following: 1. Default encoding in bytes#decode() method is 'ascii', however names in tarantool can contain symbols beyond ASCII symbol table. Set 'utf-8' for names decoding. 2. Convert all binary values into Unicode strings before parse or store them. This allows further correct accesses to the local schema representation. 3. Convert binary parameters like space, index or field name into Unicode strings, when a schema is accessed to don't trigger redundant schema refetching. Those problems are briefly mentioned in [1]. Tested manually with Python 2 and Python 3: my testing tarantool instance has a space with name '©' and after the changes I'm able to connect to it when the connection encoding is set to None. Also I verified that schema is not fetched each time when I do <connection>.select('©') in Python 2 (where such string literal is str / bytes, not Unicode string). [1]: #105 (comment)
1 parent 4f79627 commit 65498cc

File tree

1 file changed

+29
-4
lines changed

1 file changed

+29
-4
lines changed

tarantool/schema.py

+29-4
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,16 @@ def __init__(self, index_row, space):
2121
self.iid = index_row[1]
2222
self.name = index_row[2]
2323
if isinstance(self.name, bytes):
24-
self.name = self.name.decode()
24+
self.name = self.name.decode(encoding='utf-8')
2525
self.index = index_row[3]
2626
self.unique = index_row[4]
2727
self.parts = []
2828
if isinstance(index_row[5], (list, tuple)):
2929
for val in index_row[5]:
3030
if isinstance(val, dict):
31-
self.parts.append((val['field'], val['type']))
31+
val_field = val.get('field', val.get(b'field'))
32+
val_type = val.get('type', val.get(b'type'))
33+
self.parts.append((val_field, val_type))
3234
else:
3335
self.parts.append((val[0], val[1]))
3436
else:
@@ -54,14 +56,24 @@ def __init__(self, space_row, schema):
5456
self.arity = space_row[1]
5557
self.name = space_row[2]
5658
if isinstance(self.name, bytes):
57-
self.name = self.name.decode()
59+
self.name = self.name.decode(encoding='utf-8')
5860
self.indexes = {}
5961
self.schema = schema
6062
self.schema[self.sid] = self
6163
if self.name:
6264
self.schema[self.name] = self
6365
self.format = dict()
64-
for part_id, part in enumerate(space_row[6]):
66+
for part_id, raw_part in enumerate(space_row[6]):
67+
# Convert keys and values from bytes to str.
68+
# It is necessary for an encoding=None connection.
69+
part = dict()
70+
for key, val in raw_part.items():
71+
if isinstance(key, bytes):
72+
key = key.decode(encoding='utf-8')
73+
if isinstance(val, bytes):
74+
val = val.decode(encoding='utf-8')
75+
part[key] = val
76+
6577
part['id'] = part_id
6678
self.format[part['name']] = part
6779
self.format[part_id ] = part
@@ -78,6 +90,9 @@ def __init__(self, con):
7890
self.con = con
7991

8092
def get_space(self, space):
93+
if isinstance(space, bytes):
94+
space = space.decode(encoding='utf-8')
95+
8196
try:
8297
return self.schema[space]
8398
except KeyError:
@@ -135,6 +150,11 @@ def fetch_space_all(self):
135150
SchemaSpace(row, self.schema)
136151

137152
def get_index(self, space, index):
153+
if isinstance(space, bytes):
154+
space = space.decode(encoding='utf-8')
155+
if isinstance(index, bytes):
156+
index = index.decode(encoding='utf-8')
157+
138158
_space = self.get_space(space)
139159
try:
140160
return _space.indexes[index]
@@ -203,6 +223,11 @@ def fetch_index_from(self, space, index):
203223
return index_row
204224

205225
def get_field(self, space, field):
226+
if isinstance(space, bytes):
227+
space = space.decode(encoding='utf-8')
228+
if isinstance(field, bytes):
229+
field = field.decode(encoding='utf-8')
230+
206231
_space = self.get_space(space)
207232
try:
208233
return _space.format[field]

0 commit comments

Comments
 (0)