diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 6d20907373014..87169a5314643 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -624,6 +624,7 @@ def str_pad(arr, width, side='left', fillchar=' '): def str_split(arr, pat=None, n=None, return_type='series'): """ + Deprecated: return_types 'series', 'index', 'frame' are now deprecated Split each string (a la re.split) in array by given pattern, propagating NA values @@ -632,9 +633,9 @@ def str_split(arr, pat=None, n=None, return_type='series'): pat : string, default None String or regular expression to split on. If None, splits on whitespace n : int, default None (all) - return_type : {'series', 'index', 'frame'}, default 'series' - If frame, returns a DataFrame (elements are strings) - If series or index, returns the same type as the original object + return_type : {'same', 'expand'}, default 'series' + If expand, returns a DataFrame (elements are strings) + If series, index or same, returns the same type as the original object (elements are lists of strings). Notes @@ -649,11 +650,14 @@ def str_split(arr, pat=None, n=None, return_type='series'): from pandas.core.frame import DataFrame from pandas.core.index import Index - if return_type not in ('series', 'index', 'frame'): - raise ValueError("return_type must be {'series', 'index', 'frame'}") - if return_type == 'frame' and isinstance(arr, Index): + if return_type not in ('series', 'index', 'frame', 'same', 'expand'): + raise ValueError("return_type must be {'series', 'index', 'frame', 'same', 'expand'}") + if return_type in ('frame', 'expand') and isinstance(arr, Index): raise ValueError("return_type='frame' is not supported for string " "methods on Index") + if return_type in ('series', 'index', 'frame'): + warnings.warn(("'series', 'index' and 'frame' are deprecated. Please use 'same' or 'expand' instead"), + FutureWarning) if pat is None: if n is None or n == 0: n = -1 @@ -668,7 +672,7 @@ def str_split(arr, pat=None, n=None, return_type='series'): n = 0 regex = re.compile(pat) f = lambda x: regex.split(x, maxsplit=n) - if return_type == 'frame': + if return_type in ('frame', 'expand'): res = DataFrame((Series(x) for x in _na_map(f, arr)), index=arr.index) else: res = _na_map(f, arr) diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index bb75b12754dca..159635da5d505 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -1220,9 +1220,12 @@ def test_str_attribute(self): tm.assert_index_equal(idx.str.split(return_type='series'), expected) # return_type 'index' is an alias for 'series' tm.assert_index_equal(idx.str.split(return_type='index'), expected) + # return_type 'same' is an alias for 'series' and 'index' + tm.assert_index_equal(idx.str.split(return_type='same'), expected) with self.assertRaisesRegexp(ValueError, 'not supported'): idx.str.split(return_type='frame') - + with self.assertRaisesRegexp(ValueError, 'not supported'): + idx.str.split(return_type='expand') # test boolean case, should return np.array instead of boolean Index idx = Index(['a1', 'a2', 'b1', 'b2']) expected = np.array([True, True, False, False])