Skip to content

Commit 8609f6c

Browse files
committed
clean up string conversions
1 parent b268bb0 commit 8609f6c

File tree

1 file changed

+37
-15
lines changed

1 file changed

+37
-15
lines changed

pandas/io/pytables.py

+37-15
Original file line numberDiff line numberDiff line change
@@ -4390,11 +4390,23 @@ def _unconvert_index_legacy(data, kind, legacy=False, encoding=None):
43904390

43914391

43924392
def _convert_string_array(data, encoding, itemsize=None):
4393+
"""
4394+
we take a string-like that is object dtype and coerce to a fixed size string type
4395+
4396+
Parameters
4397+
----------
4398+
data : a numpy array of object dtype
4399+
encoding : None or string-encoding
4400+
itemsize : integer, optional, defaults to the max length of the strings
4401+
4402+
Returns
4403+
-------
4404+
data in a fixed-length string dtype, encoded to bytes if needed
4405+
"""
43934406

43944407
# encode if needed
43954408
if encoding is not None and len(data):
4396-
f = np.vectorize(lambda x: x.encode(encoding), otypes=[np.object])
4397-
data = f(data)
4409+
data = Series(data.ravel()).str.encode(encoding).values.reshape(data.shape)
43984410

43994411
# create the sized dtype
44004412
if itemsize is None:
@@ -4404,7 +4416,20 @@ def _convert_string_array(data, encoding, itemsize=None):
44044416
return data
44054417

44064418
def _unconvert_string_array(data, nan_rep=None, encoding=None):
4407-
""" deserialize a string array, possibly decoding """
4419+
"""
4420+
inverse of _convert_string_array
4421+
4422+
Parameters
4423+
----------
4424+
data : fixed length string dtyped array
4425+
nan_rep : the storage repr of NaN, optional
4426+
encoding : the encoding of the data, optional
4427+
4428+
Returns
4429+
-------
4430+
an object array of the decoded data
4431+
4432+
"""
44084433
shape = data.shape
44094434
data = np.asarray(data.ravel(), dtype=object)
44104435

@@ -4413,19 +4438,16 @@ def _unconvert_string_array(data, nan_rep=None, encoding=None):
44134438
encoding = _ensure_encoding(encoding)
44144439
if encoding is not None and len(data):
44154440

4416-
try:
4417-
itemsize = lib.max_len_string_array(com._ensure_object(data.ravel()))
4418-
if compat.PY3:
4419-
dtype = "U{0}".format(itemsize)
4420-
else:
4421-
dtype = "S{0}".format(itemsize)
4422-
# fix? issue #10366
4423-
data = _convert_string_array(data, _ensure_encoding(encoding),
4424-
itemsize=itemsize)
4441+
itemsize = lib.max_len_string_array(com._ensure_object(data))
4442+
if compat.PY3:
4443+
dtype = "U{0}".format(itemsize)
4444+
else:
4445+
dtype = "S{0}".format(itemsize)
4446+
4447+
if isinstance(data[0], compat.binary_type):
4448+
data = Series(data).str.decode(encoding).values
4449+
else:
44254450
data = data.astype(dtype, copy=False).astype(object, copy=False)
4426-
except (Exception) as e:
4427-
f = np.vectorize(lambda x: x.decode(encoding), otypes=[np.object])
4428-
data = f(data)
44294451

44304452
if nan_rep is None:
44314453
nan_rep = 'nan'

0 commit comments

Comments
 (0)