@@ -4390,11 +4390,23 @@ def _unconvert_index_legacy(data, kind, legacy=False, encoding=None):
4390
4390
4391
4391
4392
4392
def _convert_string_array (data , encoding , itemsize = None ):
4393
+ """
4394
+ we take a string-like that is object dtype and coerce to a fixed size string type
4395
+
4396
+ Parameters
4397
+ ----------
4398
+ data : a numpy array of object dtype
4399
+ encoding : None or string-encoding
4400
+ itemsize : integer, optional, defaults to the max length of the strings
4401
+
4402
+ Returns
4403
+ -------
4404
+ data in a fixed-length string dtype, encoded to bytes if needed
4405
+ """
4393
4406
4394
4407
# encode if needed
4395
4408
if encoding is not None and len (data ):
4396
- f = np .vectorize (lambda x : x .encode (encoding ), otypes = [np .object ])
4397
- data = f (data )
4409
+ data = Series (data .ravel ()).str .encode (encoding ).values .reshape (data .shape )
4398
4410
4399
4411
# create the sized dtype
4400
4412
if itemsize is None :
@@ -4404,7 +4416,20 @@ def _convert_string_array(data, encoding, itemsize=None):
4404
4416
return data
4405
4417
4406
4418
def _unconvert_string_array (data , nan_rep = None , encoding = None ):
4407
- """ deserialize a string array, possibly decoding """
4419
+ """
4420
+ inverse of _convert_string_array
4421
+
4422
+ Parameters
4423
+ ----------
4424
+ data : fixed length string dtyped array
4425
+ nan_rep : the storage repr of NaN, optional
4426
+ encoding : the encoding of the data, optional
4427
+
4428
+ Returns
4429
+ -------
4430
+ an object array of the decoded data
4431
+
4432
+ """
4408
4433
shape = data .shape
4409
4434
data = np .asarray (data .ravel (), dtype = object )
4410
4435
@@ -4413,19 +4438,16 @@ def _unconvert_string_array(data, nan_rep=None, encoding=None):
4413
4438
encoding = _ensure_encoding (encoding )
4414
4439
if encoding is not None and len (data ):
4415
4440
4416
- try :
4417
- itemsize = lib . max_len_string_array ( com . _ensure_object ( data . ravel ()))
4418
- if compat . PY3 :
4419
- dtype = "U{0}" . format ( itemsize )
4420
- else :
4421
- dtype = "S{0}" . format ( itemsize )
4422
- # fix? issue #10366
4423
- data = _convert_string_array (data , _ensure_encoding (encoding ),
4424
- itemsize = itemsize )
4441
+ itemsize = lib . max_len_string_array ( com . _ensure_object ( data ))
4442
+ if compat . PY3 :
4443
+ dtype = "U{0}" . format ( itemsize )
4444
+ else :
4445
+ dtype = "S{0}" . format ( itemsize )
4446
+
4447
+ if isinstance ( data [ 0 ], compat . binary_type ):
4448
+ data = Series (data ). str . decode (encoding ). values
4449
+ else :
4425
4450
data = data .astype (dtype , copy = False ).astype (object , copy = False )
4426
- except (Exception ) as e :
4427
- f = np .vectorize (lambda x : x .decode (encoding ), otypes = [np .object ])
4428
- data = f (data )
4429
4451
4430
4452
if nan_rep is None :
4431
4453
nan_rep = 'nan'
0 commit comments