Skip to content

BUG: .str methods with expand=True may raise ValueError if input has name #12843

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.18.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ Bug Fixes



- Bug in ``.str`` accessor methods may raise ``ValueError`` if input has ``name`` and the result is ``DataFrame`` or ``MultiIndex`` (:issue:`12617`)


- Bug in ``CategoricalIndex.get_loc`` returns different result from regular ``Index`` (:issue:`12531`)
Expand Down
15 changes: 9 additions & 6 deletions pandas/core/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -1329,12 +1329,15 @@ def cons_row(x):
if not isinstance(expand, bool):
raise ValueError("expand must be True or False")

if name is None:
name = getattr(result, 'name', None)
if name is None:
# do not use logical or, _orig may be a DataFrame
# which has "name" column
name = self._orig.name
if expand is False:
# if expand is False, result should have the same name
# as the original otherwise specified
if name is None:
name = getattr(result, 'name', None)
if name is None:
# do not use logical or, _orig may be a DataFrame
# which has "name" column
name = self._orig.name

# Wait until we are sure result is a Series or Index before
# checking attributes (GH 12180)
Expand Down
49 changes: 49 additions & 0 deletions pandas/tests/test_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -1938,6 +1938,30 @@ def test_rsplit_to_multiindex_expand(self):
tm.assert_index_equal(result, exp)
self.assertEqual(result.nlevels, 2)

def test_split_with_name(self):
# GH 12617

# should preserve name
s = Series(['a,b', 'c,d'], name='xxx')
res = s.str.split(',')
exp = Series([('a', 'b'), ('c', 'd')], name='xxx')
tm.assert_series_equal(res, exp)

res = s.str.split(',', expand=True)
exp = DataFrame([['a', 'b'], ['c', 'd']])
tm.assert_frame_equal(res, exp)

idx = Index(['a,b', 'c,d'], name='xxx')
res = idx.str.split(',')
exp = Index([['a', 'b'], ['c', 'd']], name='xxx')
self.assertTrue(res.nlevels, 1)
tm.assert_index_equal(res, exp)

res = idx.str.split(',', expand=True)
exp = MultiIndex.from_tuples([('a', 'b'), ('c', 'd')])
self.assertTrue(res.nlevels, 2)
tm.assert_index_equal(res, exp)

def test_partition_series(self):
values = Series(['a_b_c', 'c_d_e', NA, 'f_g_h'])

Expand Down Expand Up @@ -2059,6 +2083,31 @@ def test_partition_to_dataframe(self):
2: ['c', 'e', np.nan, 'h']})
tm.assert_frame_equal(result, exp)

def test_partition_with_name(self):
# GH 12617

s = Series(['a,b', 'c,d'], name='xxx')
res = s.str.partition(',')
exp = DataFrame({0: ['a', 'c'], 1: [',', ','], 2: ['b', 'd']})
tm.assert_frame_equal(res, exp)

# should preserve name
res = s.str.partition(',', expand=False)
exp = Series([('a', ',', 'b'), ('c', ',', 'd')], name='xxx')
tm.assert_series_equal(res, exp)

idx = Index(['a,b', 'c,d'], name='xxx')
res = idx.str.partition(',')
exp = MultiIndex.from_tuples([('a', ',', 'b'), ('c', ',', 'd')])
self.assertTrue(res.nlevels, 3)
tm.assert_index_equal(res, exp)

# should preserve name
res = idx.str.partition(',', expand=False)
exp = Index(np.array([('a', ',', 'b'), ('c', ',', 'd')]), name='xxx')
self.assertTrue(res.nlevels, 1)
tm.assert_index_equal(res, exp)

def test_pipe_failures(self):
# #2119
s = Series(['A|B|C'])
Expand Down