Skip to content

Commit 0a2b723

Browse files
sinhrksjreback
authored andcommitted
BUG: .str methods with expand=True may raise ValueError if input has name
closes #12617 Author: sinhrks <[email protected]> Closes #12843 from sinhrks/str_expand_name and squashes the following commits: b5cdc02 [sinhrks] BUG: .str may raise ValueError if input has name
1 parent c03f545 commit 0a2b723

File tree

3 files changed

+59
-6
lines changed

3 files changed

+59
-6
lines changed

doc/source/whatsnew/v0.18.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,7 @@ Bug Fixes
227227

228228

229229

230+
- Bug in ``.str`` accessor methods may raise ``ValueError`` if input has ``name`` and the result is ``DataFrame`` or ``MultiIndex`` (:issue:`12617`)
230231

231232

232233
- Bug in ``CategoricalIndex.get_loc`` returns different result from regular ``Index`` (:issue:`12531`)

pandas/core/strings.py

+9-6
Original file line numberDiff line numberDiff line change
@@ -1329,12 +1329,15 @@ def cons_row(x):
13291329
if not isinstance(expand, bool):
13301330
raise ValueError("expand must be True or False")
13311331

1332-
if name is None:
1333-
name = getattr(result, 'name', None)
1334-
if name is None:
1335-
# do not use logical or, _orig may be a DataFrame
1336-
# which has "name" column
1337-
name = self._orig.name
1332+
if expand is False:
1333+
# if expand is False, result should have the same name
1334+
# as the original otherwise specified
1335+
if name is None:
1336+
name = getattr(result, 'name', None)
1337+
if name is None:
1338+
# do not use logical or, _orig may be a DataFrame
1339+
# which has "name" column
1340+
name = self._orig.name
13381341

13391342
# Wait until we are sure result is a Series or Index before
13401343
# checking attributes (GH 12180)

pandas/tests/test_strings.py

+49
Original file line numberDiff line numberDiff line change
@@ -1938,6 +1938,30 @@ def test_rsplit_to_multiindex_expand(self):
19381938
tm.assert_index_equal(result, exp)
19391939
self.assertEqual(result.nlevels, 2)
19401940

1941+
def test_split_with_name(self):
1942+
# GH 12617
1943+
1944+
# should preserve name
1945+
s = Series(['a,b', 'c,d'], name='xxx')
1946+
res = s.str.split(',')
1947+
exp = Series([('a', 'b'), ('c', 'd')], name='xxx')
1948+
tm.assert_series_equal(res, exp)
1949+
1950+
res = s.str.split(',', expand=True)
1951+
exp = DataFrame([['a', 'b'], ['c', 'd']])
1952+
tm.assert_frame_equal(res, exp)
1953+
1954+
idx = Index(['a,b', 'c,d'], name='xxx')
1955+
res = idx.str.split(',')
1956+
exp = Index([['a', 'b'], ['c', 'd']], name='xxx')
1957+
self.assertTrue(res.nlevels, 1)
1958+
tm.assert_index_equal(res, exp)
1959+
1960+
res = idx.str.split(',', expand=True)
1961+
exp = MultiIndex.from_tuples([('a', 'b'), ('c', 'd')])
1962+
self.assertTrue(res.nlevels, 2)
1963+
tm.assert_index_equal(res, exp)
1964+
19411965
def test_partition_series(self):
19421966
values = Series(['a_b_c', 'c_d_e', NA, 'f_g_h'])
19431967

@@ -2059,6 +2083,31 @@ def test_partition_to_dataframe(self):
20592083
2: ['c', 'e', np.nan, 'h']})
20602084
tm.assert_frame_equal(result, exp)
20612085

2086+
def test_partition_with_name(self):
2087+
# GH 12617
2088+
2089+
s = Series(['a,b', 'c,d'], name='xxx')
2090+
res = s.str.partition(',')
2091+
exp = DataFrame({0: ['a', 'c'], 1: [',', ','], 2: ['b', 'd']})
2092+
tm.assert_frame_equal(res, exp)
2093+
2094+
# should preserve name
2095+
res = s.str.partition(',', expand=False)
2096+
exp = Series([('a', ',', 'b'), ('c', ',', 'd')], name='xxx')
2097+
tm.assert_series_equal(res, exp)
2098+
2099+
idx = Index(['a,b', 'c,d'], name='xxx')
2100+
res = idx.str.partition(',')
2101+
exp = MultiIndex.from_tuples([('a', ',', 'b'), ('c', ',', 'd')])
2102+
self.assertTrue(res.nlevels, 3)
2103+
tm.assert_index_equal(res, exp)
2104+
2105+
# should preserve name
2106+
res = idx.str.partition(',', expand=False)
2107+
exp = Index(np.array([('a', ',', 'b'), ('c', ',', 'd')]), name='xxx')
2108+
self.assertTrue(res.nlevels, 1)
2109+
tm.assert_index_equal(res, exp)
2110+
20622111
def test_pipe_failures(self):
20632112
# #2119
20642113
s = Series(['A|B|C'])

0 commit comments

Comments
 (0)