Skip to content

Commit 38f96dd

Browse files
sreejatasreejata
sreejata
authored and
sreejata
committed
Fix: pandas-dev#9847 by adding a "same" and "expand" param to the StringMethods.split() return value
1 parent 35b20d8 commit 38f96dd

File tree

2 files changed

+11
-7
lines changed

2 files changed

+11
-7
lines changed

pandas/core/strings.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -632,9 +632,9 @@ def str_split(arr, pat=None, n=None, return_type='series'):
632632
pat : string, default None
633633
String or regular expression to split on. If None, splits on whitespace
634634
n : int, default None (all)
635-
return_type : {'series', 'index', 'frame'}, default 'series'
636-
If frame, returns a DataFrame (elements are strings)
637-
If series or index, returns the same type as the original object
635+
return_type : {'series', 'index', 'frame', 'same', 'expand'}, default 'series'
636+
If frame or expand, returns a DataFrame (elements are strings)
637+
If series, index or same, returns the same type as the original object
638638
(elements are lists of strings).
639639
640640
Notes
@@ -649,9 +649,9 @@ def str_split(arr, pat=None, n=None, return_type='series'):
649649
from pandas.core.frame import DataFrame
650650
from pandas.core.index import Index
651651

652-
if return_type not in ('series', 'index', 'frame'):
653-
raise ValueError("return_type must be {'series', 'index', 'frame'}")
654-
if return_type == 'frame' and isinstance(arr, Index):
652+
if return_type not in ('series', 'index', 'frame', 'same', 'expand'):
653+
raise ValueError("return_type must be {'series', 'index', 'frame', 'same', 'expand'}")
654+
if return_type in ['frame', 'expand'] and isinstance(arr, Index):
655655
raise ValueError("return_type='frame' is not supported for string "
656656
"methods on Index")
657657
if pat is None:
@@ -668,7 +668,7 @@ def str_split(arr, pat=None, n=None, return_type='series'):
668668
n = 0
669669
regex = re.compile(pat)
670670
f = lambda x: regex.split(x, maxsplit=n)
671-
if return_type == 'frame':
671+
if return_type == 'frame' or return_type == 'expand':
672672
res = DataFrame((Series(x) for x in _na_map(f, arr)), index=arr.index)
673673
else:
674674
res = _na_map(f, arr)

pandas/tests/test_index.py

+4
Original file line numberDiff line numberDiff line change
@@ -1220,8 +1220,12 @@ def test_str_attribute(self):
12201220
tm.assert_index_equal(idx.str.split(return_type='series'), expected)
12211221
# return_type 'index' is an alias for 'series'
12221222
tm.assert_index_equal(idx.str.split(return_type='index'), expected)
1223+
# return_type 'same' is an alias for 'series' and 'index'
1224+
tm.assert_index_equal(idx.str.split(return_type='same'), expected)
12231225
with self.assertRaisesRegexp(ValueError, 'not supported'):
12241226
idx.str.split(return_type='frame')
1227+
with self.assertRaisesRegexp(ValueError, 'not supported'):
1228+
idx.str.split(return_type='expand')
12251229

12261230
# test boolean case, should return np.array instead of boolean Index
12271231
idx = Index(['a1', 'a2', 'b1', 'b2'])

0 commit comments

Comments
 (0)