3
3
from pandas .compat import zip
4
4
from pandas .core .common import isnull , _values_from_object , is_bool_dtype
5
5
import pandas .compat as compat
6
- from pandas .util .decorators import Appender , deprecate_kwarg
6
+ from pandas .util .decorators import Appender
7
7
import re
8
8
import pandas .lib as lib
9
9
import warnings
@@ -638,26 +638,6 @@ def str_find(arr, sub, start=0, end=None, side='left'):
638
638
return _na_map (f , arr , dtype = int )
639
639
640
640
641
- def str_index (arr , sub , start = 0 , end = None , side = 'left' ):
642
- if not isinstance (sub , compat .string_types ):
643
- msg = 'expected a string object, not {0}'
644
- raise TypeError (msg .format (type (sub ).__name__ ))
645
-
646
- if side == 'left' :
647
- method = 'index'
648
- elif side == 'right' :
649
- method = 'rindex'
650
- else : # pragma: no cover
651
- raise ValueError ('Invalid side' )
652
-
653
- if end is None :
654
- f = lambda x : getattr (x , method )(sub , start )
655
- else :
656
- f = lambda x : getattr (x , method )(sub , start , end )
657
-
658
- return _na_map (f , arr , dtype = int )
659
-
660
-
661
641
def str_pad (arr , width , side = 'left' , fillchar = ' ' ):
662
642
"""
663
643
Pad strings in the Series/Index with an additional character to
@@ -696,7 +676,7 @@ def str_pad(arr, width, side='left', fillchar=' '):
696
676
return _na_map (f , arr )
697
677
698
678
699
- def str_split (arr , pat = None , n = None ):
679
+ def str_split (arr , pat = None , n = None , return_type = 'series' ):
700
680
"""
701
681
Split each string (a la re.split) in the Series/Index by given
702
682
pattern, propagating NA values. Equivalent to :meth:`str.split`.
@@ -705,17 +685,29 @@ def str_split(arr, pat=None, n=None):
705
685
----------
706
686
pat : string, default None
707
687
String or regular expression to split on. If None, splits on whitespace
708
- n : int, default -1 (all)
709
- None, 0 and -1 will be interpreted as return all splits
710
- expand : bool, default False
711
- * If True, return DataFrame/MultiIndex expanding dimensionality.
712
- * If False, return Series/Index.
713
- return_type : deprecated, use `expand`
688
+ n : int, default None (all)
689
+ return_type : {'series', 'index', 'frame'}, default 'series'
690
+ If frame, returns a DataFrame (elements are strings)
691
+ If series or index, returns the same type as the original object
692
+ (elements are lists of strings).
693
+
694
+ Notes
695
+ -----
696
+ Both 0 and -1 will be interpreted as return all splits
714
697
715
698
Returns
716
699
-------
717
- split : Series/Index or DataFrame/MultiIndex of objects
700
+ split : Series/Index of objects or DataFrame
718
701
"""
702
+ from pandas .core .series import Series
703
+ from pandas .core .frame import DataFrame
704
+ from pandas .core .index import Index
705
+
706
+ if return_type not in ('series' , 'index' , 'frame' ):
707
+ raise ValueError ("return_type must be {'series', 'index', 'frame'}" )
708
+ if return_type == 'frame' and isinstance (arr , Index ):
709
+ raise ValueError ("return_type='frame' is not supported for string "
710
+ "methods on Index" )
719
711
if pat is None :
720
712
if n is None or n == 0 :
721
713
n = - 1
@@ -730,7 +722,10 @@ def str_split(arr, pat=None, n=None):
730
722
n = 0
731
723
regex = re .compile (pat )
732
724
f = lambda x : regex .split (x , maxsplit = n )
733
- res = _na_map (f , arr )
725
+ if return_type == 'frame' :
726
+ res = DataFrame ((x for x in _na_map (f , arr )), index = arr .index )
727
+ else :
728
+ res = _na_map (f , arr )
734
729
return res
735
730
736
731
@@ -813,7 +808,7 @@ def str_strip(arr, to_strip=None, side='both'):
813
808
814
809
815
810
def str_wrap (arr , width , ** kwargs ):
816
- r """
811
+ """
817
812
Wrap long strings in the Series/Index to be formatted in
818
813
paragraphs with length less than a given width.
819
814
@@ -875,44 +870,6 @@ def str_wrap(arr, width, **kwargs):
875
870
return _na_map (lambda s : '\n ' .join (tw .wrap (s )), arr )
876
871
877
872
878
- def str_translate (arr , table , deletechars = None ):
879
- """
880
- Map all characters in the string through the given mapping table.
881
- Equivalent to standard :meth:`str.translate`. Note that the optional
882
- argument deletechars is only valid if you are using python 2. For python 3,
883
- character deletion should be specified via the table argument.
884
-
885
- Parameters
886
- ----------
887
- table : dict (python 3), str or None (python 2)
888
- In python 3, table is a mapping of Unicode ordinals to Unicode ordinals,
889
- strings, or None. Unmapped characters are left untouched. Characters
890
- mapped to None are deleted. :meth:`str.maketrans` is a helper function
891
- for making translation tables.
892
- In python 2, table is either a string of length 256 or None. If the
893
- table argument is None, no translation is applied and the operation
894
- simply removes the characters in deletechars. :func:`string.maketrans`
895
- is a helper function for making translation tables.
896
- deletechars : str, optional (python 2)
897
- A string of characters to delete. This argument is only valid
898
- in python 2.
899
-
900
- Returns
901
- -------
902
- translated : Series/Index of objects
903
- """
904
- if deletechars is None :
905
- f = lambda x : x .translate (table )
906
- else :
907
- from pandas import compat
908
- if compat .PY3 :
909
- raise ValueError ("deletechars is not a valid argument for "
910
- "str.translate in python 3. You should simply "
911
- "specify character deletions in the table argument" )
912
- f = lambda x : x .translate (table , deletechars )
913
- return _na_map (f , arr )
914
-
915
-
916
873
def str_get (arr , i ):
917
874
"""
918
875
Extract element from lists, tuples, or strings in each element in the
@@ -1044,7 +1001,6 @@ def __iter__(self):
1044
1001
g = self .get (i )
1045
1002
1046
1003
def _wrap_result (self , result , ** kwargs ):
1047
-
1048
1004
# leave as it is to keep extract and get_dummies results
1049
1005
# can be merged to _wrap_result_expand in v0.17
1050
1006
from pandas .core .series import Series
@@ -1068,10 +1024,7 @@ def _wrap_result(self, result, **kwargs):
1068
1024
return DataFrame (result , index = self .series .index )
1069
1025
1070
1026
def _wrap_result_expand (self , result , expand = False ):
1071
- if not isinstance (expand , bool ):
1072
- raise ValueError ("expand must be True or False" )
1073
-
1074
- from pandas .core .index import Index , MultiIndex
1027
+ from pandas .core .index import Index
1075
1028
if not hasattr (result , 'ndim' ):
1076
1029
return result
1077
1030
@@ -1084,16 +1037,13 @@ def _wrap_result_expand(self, result, expand=False):
1084
1037
1085
1038
if expand :
1086
1039
result = list (result )
1087
- return MultiIndex .from_tuples (result , names = name )
1088
- else :
1089
- return Index (result , name = name )
1040
+ return Index (result , name = name )
1090
1041
else :
1091
1042
index = self .series .index
1092
1043
if expand :
1093
- cons_row = self .series ._constructor
1094
1044
cons = self .series ._constructor_expanddim
1095
- data = [cons_row ( x ) for x in result ]
1096
- return cons (data , index = index )
1045
+ data = [x if ( x is not np . nan ) else [ None ] for x in result ]
1046
+ return cons (data , index = index ). fillna ( np . nan )
1097
1047
else :
1098
1048
name = getattr (result , 'name' , None )
1099
1049
cons = self .series ._constructor
@@ -1104,12 +1054,10 @@ def cat(self, others=None, sep=None, na_rep=None):
1104
1054
result = str_cat (self .series , others = others , sep = sep , na_rep = na_rep )
1105
1055
return self ._wrap_result (result )
1106
1056
1107
- @deprecate_kwarg ('return_type' , 'expand' ,
1108
- mapping = {'series' : False , 'frame' : True })
1109
1057
@copy (str_split )
1110
- def split (self , pat = None , n = - 1 , expand = False ):
1111
- result = str_split (self .series , pat , n = n )
1112
- return self ._wrap_result_expand (result , expand = expand )
1058
+ def split (self , pat = None , n = - 1 , return_type = 'series' ):
1059
+ result = str_split (self .series , pat , n = n , return_type = return_type )
1060
+ return self ._wrap_result (result )
1113
1061
1114
1062
_shared_docs ['str_partition' ] = ("""
1115
1063
Split the string at the %(side)s occurrence of `sep`, and return 3 elements
@@ -1123,7 +1071,7 @@ def split(self, pat=None, n=-1, expand=False):
1123
1071
String to split on.
1124
1072
expand : bool, default True
1125
1073
* If True, return DataFrame/MultiIndex expanding dimensionality.
1126
- * If False, return Series/Index.
1074
+ * If False, return Series/Index
1127
1075
1128
1076
Returns
1129
1077
-------
@@ -1313,11 +1261,6 @@ def get_dummies(self, sep='|'):
1313
1261
result = str_get_dummies (self .series , sep )
1314
1262
return self ._wrap_result (result )
1315
1263
1316
- @copy (str_translate )
1317
- def translate (self , table , deletechars = None ):
1318
- result = str_translate (self .series , table , deletechars )
1319
- return self ._wrap_result (result )
1320
-
1321
1264
count = _pat_wrapper (str_count , flags = True )
1322
1265
startswith = _pat_wrapper (str_startswith , na = True )
1323
1266
endswith = _pat_wrapper (str_endswith , na = True )
@@ -1382,42 +1325,6 @@ def normalize(self, form):
1382
1325
result = _na_map (f , self .series )
1383
1326
return self ._wrap_result (result )
1384
1327
1385
- _shared_docs ['index' ] = ("""
1386
- Return %(side)s indexes in each strings where the substring is
1387
- fully contained between [start:end]. This is the same as ``str.%(similar)s``
1388
- except instead of returning -1, it raises a ValueError when the substring
1389
- is not found. Equivalent to standard ``str.%(method)s``.
1390
-
1391
- Parameters
1392
- ----------
1393
- sub : str
1394
- Substring being searched
1395
- start : int
1396
- Left edge index
1397
- end : int
1398
- Right edge index
1399
-
1400
- Returns
1401
- -------
1402
- found : Series/Index of objects
1403
-
1404
- See Also
1405
- --------
1406
- %(also)s
1407
- """ )
1408
-
1409
- @Appender (_shared_docs ['index' ] % dict (side = 'lowest' , similar = 'find' , method = 'index' ,
1410
- also = 'rindex : Return highest indexes in each strings' ))
1411
- def index (self , sub , start = 0 , end = None ):
1412
- result = str_index (self .series , sub , start = start , end = end , side = 'left' )
1413
- return self ._wrap_result (result )
1414
-
1415
- @Appender (_shared_docs ['index' ] % dict (side = 'highest' , similar = 'rfind' , method = 'rindex' ,
1416
- also = 'index : Return lowest indexes in each strings' ))
1417
- def rindex (self , sub , start = 0 , end = None ):
1418
- result = str_index (self .series , sub , start = start , end = end , side = 'right' )
1419
- return self ._wrap_result (result )
1420
-
1421
1328
_shared_docs ['len' ] = ("""
1422
1329
Compute length of each string in the Series/Index.
1423
1330
0 commit comments