Skip to content

Commit a18de7a

Browse files
committed
schema: support encoding=None connections
Several different problems are fixed here, but all have the same root. When a connection encoding is None (it is default on Python 2 and may be set explicitly on Python 3), all mp_str values are decoded into bytes, not Unicode strings (note that bytes is alias for str in Python 2). But the database schema parsing code have assumptions that _vspace / _vindex values are Unicode strings. The resolved problems are the following: 1. Default encoding in bytes#decode() method is 'ascii', however names in tarantool can contain symbols beyond ASCII symbol table. Set 'utf-8' for names decoding. 2. Convert all binary values into Unicode strings before parse or store them. This allows further correct accesses to the local schema representation. 3. Convert binary parameters like space, index or field name into Unicode strings, when a schema is accessed to don't trigger redundant schema refetching. Those problems are briefly mentioned in [1]. Tested manually with Python 2 and Python 3: my testing tarantool instance has a space with name '©' and after the changes I'm able to connect to it when the connection encoding is set to None. Also I verified that schema is not fetched each time when I do <connection>.select('©') in Python 2 (where such string literal is str / bytes, not Unicode string). Relevant test cases are added in next commits. [1]: #105 (comment)
1 parent 4f79627 commit a18de7a

File tree

1 file changed

+73
-11
lines changed

1 file changed

+73
-11
lines changed

tarantool/schema.py

+73-11
Original file line numberDiff line numberDiff line change
@@ -10,32 +10,83 @@
1010
integer_types,
1111
)
1212
from tarantool.error import (
13+
Error,
1314
SchemaError,
1415
DatabaseError
1516
)
1617
import tarantool.const as const
1718

1819

20+
class RecursionError(Error):
21+
"""Report the situation when max recursion depth is reached.
22+
23+
This is internal error for <to_unicode_recursive> caller
24+
and it should be re-raised properly be the caller.
25+
"""
26+
27+
28+
def to_unicode(s):
29+
if isinstance(s, bytes):
30+
return s.decode(encoding='utf-8')
31+
return s
32+
33+
34+
def to_unicode_recursive(x, max_depth):
35+
"""Same as to_unicode(), but traverses over dictionaries,
36+
lists and tuples recursivery.
37+
38+
x: value to convert
39+
40+
max_depth: 1 accepts a scalar, 2 accepts a list of scalars,
41+
etc.
42+
"""
43+
if max_depth <= 0:
44+
raise RecursionError('Max recursion depth is reached')
45+
46+
if isinstance(x, dict):
47+
res = dict()
48+
for key, val in x.items():
49+
key = to_unicode_recursive(key, max_depth - 1)
50+
val = to_unicode_recursive(val, max_depth - 1)
51+
res[key] = val
52+
return res
53+
54+
if isinstance(x, list) or isinstance(x, tuple):
55+
res = []
56+
for val in x:
57+
val = to_unicode_recursive(val, max_depth - 1)
58+
res.append(val)
59+
if isinstance(x, tuple):
60+
return tuple(res)
61+
return res
62+
63+
return to_unicode(x)
64+
65+
1966
class SchemaIndex(object):
2067
def __init__(self, index_row, space):
2168
self.iid = index_row[1]
2269
self.name = index_row[2]
23-
if isinstance(self.name, bytes):
24-
self.name = self.name.decode()
70+
self.name = to_unicode(index_row[2])
2571
self.index = index_row[3]
2672
self.unique = index_row[4]
2773
self.parts = []
28-
if isinstance(index_row[5], (list, tuple)):
29-
for val in index_row[5]:
74+
try:
75+
parts_raw = to_unicode_recursive(index_row[5], 3)
76+
except RecursionError as e:
77+
errmsg = 'Unexpected index parts structure: ' + str(e)
78+
raise SchemaError(errmsg)
79+
if isinstance(parts_raw, (list, tuple)):
80+
for val in parts_raw:
3081
if isinstance(val, dict):
3182
self.parts.append((val['field'], val['type']))
3283
else:
3384
self.parts.append((val[0], val[1]))
3485
else:
35-
for i in range(index_row[5]):
86+
for i in range(parts_raw):
3687
self.parts.append((
37-
index_row[5 + 1 + i * 2],
38-
index_row[5 + 2 + i * 2]
88+
to_unicode(index_row[5 + 1 + i * 2]),
89+
to_unicode(index_row[5 + 2 + i * 2])
3990
))
4091
self.space = space
4192
self.space.indexes[self.iid] = self
@@ -52,16 +103,19 @@ class SchemaSpace(object):
52103
def __init__(self, space_row, schema):
53104
self.sid = space_row[0]
54105
self.arity = space_row[1]
55-
self.name = space_row[2]
56-
if isinstance(self.name, bytes):
57-
self.name = self.name.decode()
106+
self.name = to_unicode(space_row[2])
58107
self.indexes = {}
59108
self.schema = schema
60109
self.schema[self.sid] = self
61110
if self.name:
62111
self.schema[self.name] = self
63112
self.format = dict()
64-
for part_id, part in enumerate(space_row[6]):
113+
try:
114+
format_raw = to_unicode_recursive(space_row[6], 3)
115+
except RecursionError as e:
116+
errmsg = 'Unexpected space format structure: ' + str(e)
117+
raise SchemaError(errmsg)
118+
for part_id, part in enumerate(format_raw):
65119
part['id'] = part_id
66120
self.format[part['name']] = part
67121
self.format[part_id ] = part
@@ -78,6 +132,8 @@ def __init__(self, con):
78132
self.con = con
79133

80134
def get_space(self, space):
135+
space = to_unicode(space)
136+
81137
try:
82138
return self.schema[space]
83139
except KeyError:
@@ -135,6 +191,9 @@ def fetch_space_all(self):
135191
SchemaSpace(row, self.schema)
136192

137193
def get_index(self, space, index):
194+
space = to_unicode(space)
195+
index = to_unicode(index)
196+
138197
_space = self.get_space(space)
139198
try:
140199
return _space.indexes[index]
@@ -203,6 +262,9 @@ def fetch_index_from(self, space, index):
203262
return index_row
204263

205264
def get_field(self, space, field):
265+
space = to_unicode(space)
266+
field = to_unicode(field)
267+
206268
_space = self.get_space(space)
207269
try:
208270
return _space.format[field]

0 commit comments

Comments
 (0)