@@ -116,12 +116,12 @@ def _length_check(others):
116
116
return n
117
117
118
118
119
- def _na_map (f , arr , na_result = np .nan , dtype = object ):
119
+ def _na_map (f , arr , na_result = np .nan , dtype = object , np_f = None ):
120
120
# should really _check_ for NA
121
- return _map (f , arr , na_mask = True , na_value = na_result , dtype = dtype )
121
+ return _map (f , arr , na_mask = True , na_value = na_result , dtype = dtype , np_f = np_f )
122
122
123
123
124
- def _map (f , arr , na_mask = False , na_value = np .nan , dtype = object ):
124
+ def _map (f , arr , na_mask = False , na_value = np .nan , dtype = object , np_f = None ):
125
125
from pandas .core .series import Series
126
126
127
127
if not len (arr ):
@@ -131,6 +131,14 @@ def _map(f, arr, na_mask=False, na_value=np.nan, dtype=object):
131
131
arr = arr .values
132
132
if not isinstance (arr , np .ndarray ):
133
133
arr = np .asarray (arr , dtype = object )
134
+
135
+ # short path for all-string array
136
+ if np_f is not None and lib .is_string_array (arr ):
137
+ try :
138
+ return np_f (arr .values .astype (unicode ))
139
+ except Exception :
140
+ pass
141
+
134
142
if na_mask :
135
143
mask = isnull (arr )
136
144
try :
@@ -686,14 +694,17 @@ def str_pad(arr, width, side='left', fillchar=' '):
686
694
687
695
if side == 'left' :
688
696
f = lambda x : x .rjust (width , fillchar )
697
+ np_f = lambda x : np .core .defchararray .ljust (x , width , fillchar )
689
698
elif side == 'right' :
690
699
f = lambda x : x .ljust (width , fillchar )
700
+ np_f = lambda x : np .core .defchararray .rjust (x , width , fillchar )
691
701
elif side == 'both' :
692
702
f = lambda x : x .center (width , fillchar )
703
+ np_f = lambda x : np .core .defchararray .lower (x , width , fillchar )
693
704
else : # pragma: no cover
694
705
raise ValueError ('Invalid side' )
695
706
696
- return _na_map (f , arr )
707
+ return _na_map (f , arr , np_f = np_f )
697
708
698
709
699
710
def str_split (arr , pat = None , n = None ):
@@ -720,17 +731,21 @@ def str_split(arr, pat=None, n=None):
720
731
if n is None or n == 0 :
721
732
n = - 1
722
733
f = lambda x : x .split (pat , n )
734
+ np_f = lambda x : np .core .defchararray .split (x , pat , n )
723
735
else :
724
736
if len (pat ) == 1 :
725
737
if n is None or n == 0 :
726
738
n = - 1
727
739
f = lambda x : x .split (pat , n )
740
+ np_f = lambda x : np .core .defchararray .split (x , pat , n )
728
741
else :
729
742
if n is None or n == - 1 :
730
743
n = 0
731
744
regex = re .compile (pat )
732
745
f = lambda x : regex .split (x , maxsplit = n )
733
- res = _na_map (f , arr )
746
+ # numpy doesn't support regex
747
+ np_f = None
748
+ res = _na_map (f , arr , np_f = np_f )
734
749
return res
735
750
736
751
@@ -946,7 +961,8 @@ def str_decode(arr, encoding, errors="strict"):
946
961
decoded : Series/Index of objects
947
962
"""
948
963
f = lambda x : x .decode (encoding , errors )
949
- return _na_map (f , arr )
964
+ np_f = lambda x : np .core .defchararray .decode (x , errors )
965
+ return _na_map (f , arr , np_f = np_f )
950
966
951
967
952
968
def str_encode (arr , encoding , errors = "strict" ):
@@ -964,12 +980,13 @@ def str_encode(arr, encoding, errors="strict"):
964
980
encoded : Series/Index of objects
965
981
"""
966
982
f = lambda x : x .encode (encoding , errors )
967
- return _na_map (f , arr )
983
+ np_f = lambda x : np .core .defchararray .encode (x , errors )
984
+ return _na_map (f , arr , np_f = np_f )
968
985
969
986
970
- def _noarg_wrapper (f , docstring = None , ** kargs ):
987
+ def _noarg_wrapper (f , docstring = None , np_f = None , ** kargs ):
971
988
def wrapper (self ):
972
- result = _na_map (f , self .series , ** kargs )
989
+ result = _na_map (f , self .series , np_f = np_f , ** kargs )
973
990
return self ._wrap_result (result )
974
991
975
992
wrapper .__name__ = f .__name__
@@ -1443,7 +1460,8 @@ def rindex(self, sub, start=0, end=None):
1443
1460
_shared_docs ['swapcase' ] = dict (type = 'be swapcased' , method = 'swapcase' )
1444
1461
lower = _noarg_wrapper (lambda x : x .lower (),
1445
1462
docstring = _shared_docs ['casemethods' ] %
1446
- _shared_docs ['lower' ])
1463
+ _shared_docs ['lower' ],
1464
+ np_f = np .core .defchararray .lower )
1447
1465
upper = _noarg_wrapper (lambda x : x .upper (),
1448
1466
docstring = _shared_docs ['casemethods' ] %
1449
1467
_shared_docs ['upper' ])
@@ -1452,7 +1470,8 @@ def rindex(self, sub, start=0, end=None):
1452
1470
_shared_docs ['title' ])
1453
1471
capitalize = _noarg_wrapper (lambda x : x .capitalize (),
1454
1472
docstring = _shared_docs ['casemethods' ] %
1455
- _shared_docs ['capitalize' ])
1473
+ _shared_docs ['capitalize' ],
1474
+ np_f = np .core .defchararray .capitalize )
1456
1475
swapcase = _noarg_wrapper (lambda x : x .swapcase (),
1457
1476
docstring = _shared_docs ['casemethods' ] %
1458
1477
_shared_docs ['swapcase' ])
0 commit comments