Skip to content

Support resetting index with tuple name #16165

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,7 @@ Other Enhancements
- ``Series.interpolate()`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`)
- Addition of a ``level`` keyword to ``DataFrame/Series.rename`` to rename
labels in the specified level of a MultiIndex (:issue:`4160`).
- ``DataFrame.reset_index()`` will now interpret a tuple ``index.name`` as a key spanning across levels of ``columns``, if this is a ``MultiIndex`` (:issues:`16164`)
- ``Timedelta.isoformat`` method added for formatting Timedeltas as an `ISO 8601 duration`_. See the :ref:`Timedelta docs <timedeltas.isoformat>` (:issue:`15136`)
- ``.select_dtypes()`` now allows the string ``datetimetz`` to generically select datetimes with tz (:issue:`14910`)
- The ``.to_latex()`` method will now accept ``multicolumn`` and ``multirow`` arguments to use the accompanying LaTeX enhancements
Expand Down
68 changes: 32 additions & 36 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3019,44 +3019,40 @@ def _maybe_casted_values(index, labels=None):
if len(level) < len(self.index.levels):
new_index = self.index.droplevel(level)

if not drop:
names = self.index.names
zipped = lzip(self.index.levels, self.index.labels)

multi_col = isinstance(self.columns, MultiIndex)
for i, (lev, lab) in reversed(list(enumerate(zipped))):
col_name = names[i]
if col_name is None:
col_name = 'level_%d' % i

if multi_col:
if col_fill is None:
col_name = tuple([col_name] * self.columns.nlevels)
else:
name_lst = [col_fill] * self.columns.nlevels
lev_num = self.columns._get_level_number(col_level)
name_lst[lev_num] = col_name
col_name = tuple(name_lst)

# to ndarray and maybe infer different dtype
level_values = _maybe_casted_values(lev, lab)
if level is None or i in level:
new_obj.insert(0, col_name, level_values)

elif not drop:
name = self.index.name
if name is None or name == 'index':
name = 'index' if 'index' not in self else 'level_0'
if isinstance(self.columns, MultiIndex):
if col_fill is None:
name = tuple([name] * self.columns.nlevels)
else:
name_lst = [col_fill] * self.columns.nlevels
if not drop:
if isinstance(self.index, MultiIndex):
names = [n if n is not None else ('level_%d' % i)
for (i, n) in enumerate(self.index.names)]
to_insert = lzip(self.index.levels, self.index.labels)
else:
default = 'index' if 'index' not in self else 'level_0'
names = ([default] if self.index.name is None
else [self.index.name])
to_insert = ((self.index, None),)

multi_col = isinstance(self.columns, MultiIndex)
for i, (lev, lab) in reversed(list(enumerate(to_insert))):
name = names[i]
if multi_col:
col_name = (list(name) if isinstance(name, tuple)
else [name])
if col_fill is None:
if len(col_name) not in (1, self.columns.nlevels):
raise ValueError("col_fill=None is incompatible "
"with incomplete column name "
"{}".format(name))
col_fill = col_name[0]

lev_num = self.columns._get_level_number(col_level)
name_lst[lev_num] = name
name_lst = [col_fill] * lev_num + col_name
missing = self.columns.nlevels - len(name_lst)
name_lst += [col_fill] * missing
name = tuple(name_lst)
values = _maybe_casted_values(self.index)
new_obj.insert(0, name, values)

# to ndarray and maybe infer different dtype
level_values = _maybe_casted_values(lev, lab)
if level is None or i in level:
new_obj.insert(0, name, level_values)

new_obj.index = new_index
if not inplace:
Expand Down
43 changes: 38 additions & 5 deletions pandas/tests/test_multilevel.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -2242,16 +2242,49 @@ def test_reset_index_multiindex_columns(self):
levels = [['A', ''], ['B', 'b']]
df = pd.DataFrame([[0, 2], [1, 3]],
columns=pd.MultiIndex.from_tuples(levels))
expected = df.copy()
df.index.name = 'A'
result = df[['B']].reset_index()
tm.assert_frame_equal(result, expected)
result = df[['B']].rename_axis('A').reset_index()
tm.assert_frame_equal(result, df)

# gh-16120: already existing column
with tm.assert_raises_regex(ValueError,
("cannot insert \('A', ''\), "
"already exists")):
df.reset_index()
df.rename_axis('A').reset_index()

# gh-16164: multiindex (tuple) full key
result = df.set_index([('A', '')]).reset_index()
tm.assert_frame_equal(result, df)

# with additional (unnamed) index level
idx_col = pd.DataFrame([[0], [1]],
columns=pd.MultiIndex.from_tuples([('level_0',
'')]))
expected = pd.concat([idx_col, df[[('B', 'b'), ('A', '')]]], axis=1)
result = df.set_index([('B', 'b')], append=True).reset_index()
tm.assert_frame_equal(result, expected)

# with index name which is a too long tuple...
with tm.assert_raises_regex(ValueError,
("Item must have length equal to number "
"of levels.")):
df.rename_axis([('C', 'c', 'i')]).reset_index()
# or too short...
levels = [['A', 'a', ''], ['B', 'b', 'i']]
df2 = pd.DataFrame([[0, 2], [1, 3]],
columns=pd.MultiIndex.from_tuples(levels))
idx_col = pd.DataFrame([[0], [1]],
columns=pd.MultiIndex.from_tuples([('C',
'c',
'ii')]))
expected = pd.concat([idx_col, df2], axis=1)
result = df2.rename_axis([('C', 'c')]).reset_index(col_fill='ii')
tm.assert_frame_equal(result, expected)

# ... which is incompatible with col_fill=None
with tm.assert_raises_regex(ValueError,
("col_fill=None is incompatible with "
"incomplete column name \('C', 'c'\)")):
df2.rename_axis([('C', 'c')]).reset_index(col_fill=None)

def test_set_index_period(self):
# GH 6631
Expand Down