Skip to content

Commit a7a0574

Browse files
toobazjreback
authored andcommitted
ENH: Support resetting index with tuple name
closes #16164 Author: Pietro Battiston <[email protected]> Closes #16165 from toobaz/reix_col_name and squashes the following commits: 9e1bdba [Pietro Battiston] REF: reorganize reinsertion code 3b0bb1f [Pietro Battiston] ENH: Handle tuples shorter than nlevels gracefully c958de7 [Pietro Battiston] TST: additional test for reset_index with tuple-named index level e12bca1 [Pietro Battiston] ENH: allow tuple index names to be interpreted as full column keys 6315d07 [Pietro Battiston] REF: Avoid duplication in reset_index() when reinsering index columns
1 parent b6f65eb commit a7a0574

File tree

3 files changed

+72
-41
lines changed

3 files changed

+72
-41
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,7 @@ Other Enhancements
490490
- ``Series.interpolate()`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`)
491491
- Addition of a ``level`` keyword to ``DataFrame/Series.rename`` to rename
492492
labels in the specified level of a MultiIndex (:issue:`4160`).
493+
- ``DataFrame.reset_index()`` will now interpret a tuple ``index.name`` as a key spanning across levels of ``columns``, if this is a ``MultiIndex`` (:issues:`16164`)
493494
- ``Timedelta.isoformat`` method added for formatting Timedeltas as an `ISO 8601 duration`_. See the :ref:`Timedelta docs <timedeltas.isoformat>` (:issue:`15136`)
494495
- ``.select_dtypes()`` now allows the string ``datetimetz`` to generically select datetimes with tz (:issue:`14910`)
495496
- The ``.to_latex()`` method will now accept ``multicolumn`` and ``multirow`` arguments to use the accompanying LaTeX enhancements

pandas/core/frame.py

+32-36
Original file line numberDiff line numberDiff line change
@@ -3019,44 +3019,40 @@ def _maybe_casted_values(index, labels=None):
30193019
if len(level) < len(self.index.levels):
30203020
new_index = self.index.droplevel(level)
30213021

3022-
if not drop:
3023-
names = self.index.names
3024-
zipped = lzip(self.index.levels, self.index.labels)
3025-
3026-
multi_col = isinstance(self.columns, MultiIndex)
3027-
for i, (lev, lab) in reversed(list(enumerate(zipped))):
3028-
col_name = names[i]
3029-
if col_name is None:
3030-
col_name = 'level_%d' % i
3031-
3032-
if multi_col:
3033-
if col_fill is None:
3034-
col_name = tuple([col_name] * self.columns.nlevels)
3035-
else:
3036-
name_lst = [col_fill] * self.columns.nlevels
3037-
lev_num = self.columns._get_level_number(col_level)
3038-
name_lst[lev_num] = col_name
3039-
col_name = tuple(name_lst)
3040-
3041-
# to ndarray and maybe infer different dtype
3042-
level_values = _maybe_casted_values(lev, lab)
3043-
if level is None or i in level:
3044-
new_obj.insert(0, col_name, level_values)
3045-
3046-
elif not drop:
3047-
name = self.index.name
3048-
if name is None or name == 'index':
3049-
name = 'index' if 'index' not in self else 'level_0'
3050-
if isinstance(self.columns, MultiIndex):
3051-
if col_fill is None:
3052-
name = tuple([name] * self.columns.nlevels)
3053-
else:
3054-
name_lst = [col_fill] * self.columns.nlevels
3022+
if not drop:
3023+
if isinstance(self.index, MultiIndex):
3024+
names = [n if n is not None else ('level_%d' % i)
3025+
for (i, n) in enumerate(self.index.names)]
3026+
to_insert = lzip(self.index.levels, self.index.labels)
3027+
else:
3028+
default = 'index' if 'index' not in self else 'level_0'
3029+
names = ([default] if self.index.name is None
3030+
else [self.index.name])
3031+
to_insert = ((self.index, None),)
3032+
3033+
multi_col = isinstance(self.columns, MultiIndex)
3034+
for i, (lev, lab) in reversed(list(enumerate(to_insert))):
3035+
name = names[i]
3036+
if multi_col:
3037+
col_name = (list(name) if isinstance(name, tuple)
3038+
else [name])
3039+
if col_fill is None:
3040+
if len(col_name) not in (1, self.columns.nlevels):
3041+
raise ValueError("col_fill=None is incompatible "
3042+
"with incomplete column name "
3043+
"{}".format(name))
3044+
col_fill = col_name[0]
3045+
30553046
lev_num = self.columns._get_level_number(col_level)
3056-
name_lst[lev_num] = name
3047+
name_lst = [col_fill] * lev_num + col_name
3048+
missing = self.columns.nlevels - len(name_lst)
3049+
name_lst += [col_fill] * missing
30573050
name = tuple(name_lst)
3058-
values = _maybe_casted_values(self.index)
3059-
new_obj.insert(0, name, values)
3051+
3052+
# to ndarray and maybe infer different dtype
3053+
level_values = _maybe_casted_values(lev, lab)
3054+
if level is None or i in level:
3055+
new_obj.insert(0, name, level_values)
30603056

30613057
new_obj.index = new_index
30623058
if not inplace:

pandas/tests/test_multilevel.py

100755100644
+39-5
Original file line numberDiff line numberDiff line change
@@ -2242,16 +2242,50 @@ def test_reset_index_multiindex_columns(self):
22422242
levels = [['A', ''], ['B', 'b']]
22432243
df = pd.DataFrame([[0, 2], [1, 3]],
22442244
columns=pd.MultiIndex.from_tuples(levels))
2245-
expected = df.copy()
2246-
df.index.name = 'A'
2247-
result = df[['B']].reset_index()
2248-
tm.assert_frame_equal(result, expected)
2245+
result = df[['B']].rename_axis('A').reset_index()
2246+
tm.assert_frame_equal(result, df)
22492247

22502248
# gh-16120: already existing column
22512249
with tm.assert_raises_regex(ValueError,
22522250
("cannot insert \('A', ''\), "
22532251
"already exists")):
2254-
df.reset_index()
2252+
df.rename_axis('A').reset_index()
2253+
2254+
# gh-16164: multiindex (tuple) full key
2255+
result = df.set_index([('A', '')]).reset_index()
2256+
tm.assert_frame_equal(result, df)
2257+
2258+
# with additional (unnamed) index level
2259+
idx_col = pd.DataFrame([[0], [1]],
2260+
columns=pd.MultiIndex.from_tuples([('level_0',
2261+
'')]))
2262+
expected = pd.concat([idx_col, df[[('B', 'b'), ('A', '')]]], axis=1)
2263+
result = df.set_index([('B', 'b')], append=True).reset_index()
2264+
tm.assert_frame_equal(result, expected)
2265+
2266+
# with index name which is a too long tuple...
2267+
with tm.assert_raises_regex(ValueError,
2268+
("Item must have length equal to number "
2269+
"of levels.")):
2270+
df.rename_axis([('C', 'c', 'i')]).reset_index()
2271+
2272+
# or too short...
2273+
levels = [['A', 'a', ''], ['B', 'b', 'i']]
2274+
df2 = pd.DataFrame([[0, 2], [1, 3]],
2275+
columns=pd.MultiIndex.from_tuples(levels))
2276+
idx_col = pd.DataFrame([[0], [1]],
2277+
columns=pd.MultiIndex.from_tuples([('C',
2278+
'c',
2279+
'ii')]))
2280+
expected = pd.concat([idx_col, df2], axis=1)
2281+
result = df2.rename_axis([('C', 'c')]).reset_index(col_fill='ii')
2282+
tm.assert_frame_equal(result, expected)
2283+
2284+
# ... which is incompatible with col_fill=None
2285+
with tm.assert_raises_regex(ValueError,
2286+
("col_fill=None is incompatible with "
2287+
"incomplete column name \('C', 'c'\)")):
2288+
df2.rename_axis([('C', 'c')]).reset_index(col_fill=None)
22552289

22562290
def test_set_index_period(self):
22572291
# GH 6631

0 commit comments

Comments
 (0)