diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 86a598183517c..4c2dda48a7d10 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -490,6 +490,7 @@ Other Enhancements - ``Series.interpolate()`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`) - Addition of a ``level`` keyword to ``DataFrame/Series.rename`` to rename labels in the specified level of a MultiIndex (:issue:`4160`). +- ``DataFrame.reset_index()`` will now interpret a tuple ``index.name`` as a key spanning across levels of ``columns``, if this is a ``MultiIndex`` (:issues:`16164`) - ``Timedelta.isoformat`` method added for formatting Timedeltas as an `ISO 8601 duration`_. See the :ref:`Timedelta docs ` (:issue:`15136`) - ``.select_dtypes()`` now allows the string ``datetimetz`` to generically select datetimes with tz (:issue:`14910`) - The ``.to_latex()`` method will now accept ``multicolumn`` and ``multirow`` arguments to use the accompanying LaTeX enhancements diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 06bd8f8fc51bc..9a62259202653 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3019,44 +3019,40 @@ def _maybe_casted_values(index, labels=None): if len(level) < len(self.index.levels): new_index = self.index.droplevel(level) - if not drop: - names = self.index.names - zipped = lzip(self.index.levels, self.index.labels) - - multi_col = isinstance(self.columns, MultiIndex) - for i, (lev, lab) in reversed(list(enumerate(zipped))): - col_name = names[i] - if col_name is None: - col_name = 'level_%d' % i - - if multi_col: - if col_fill is None: - col_name = tuple([col_name] * self.columns.nlevels) - else: - name_lst = [col_fill] * self.columns.nlevels - lev_num = self.columns._get_level_number(col_level) - name_lst[lev_num] = col_name - col_name = tuple(name_lst) - - # to ndarray and maybe infer different dtype - level_values = _maybe_casted_values(lev, lab) - if level is None or i in level: - new_obj.insert(0, col_name, level_values) - - elif not drop: - name = self.index.name - if name is None or name == 'index': - name = 'index' if 'index' not in self else 'level_0' - if isinstance(self.columns, MultiIndex): - if col_fill is None: - name = tuple([name] * self.columns.nlevels) - else: - name_lst = [col_fill] * self.columns.nlevels + if not drop: + if isinstance(self.index, MultiIndex): + names = [n if n is not None else ('level_%d' % i) + for (i, n) in enumerate(self.index.names)] + to_insert = lzip(self.index.levels, self.index.labels) + else: + default = 'index' if 'index' not in self else 'level_0' + names = ([default] if self.index.name is None + else [self.index.name]) + to_insert = ((self.index, None),) + + multi_col = isinstance(self.columns, MultiIndex) + for i, (lev, lab) in reversed(list(enumerate(to_insert))): + name = names[i] + if multi_col: + col_name = (list(name) if isinstance(name, tuple) + else [name]) + if col_fill is None: + if len(col_name) not in (1, self.columns.nlevels): + raise ValueError("col_fill=None is incompatible " + "with incomplete column name " + "{}".format(name)) + col_fill = col_name[0] + lev_num = self.columns._get_level_number(col_level) - name_lst[lev_num] = name + name_lst = [col_fill] * lev_num + col_name + missing = self.columns.nlevels - len(name_lst) + name_lst += [col_fill] * missing name = tuple(name_lst) - values = _maybe_casted_values(self.index) - new_obj.insert(0, name, values) + + # to ndarray and maybe infer different dtype + level_values = _maybe_casted_values(lev, lab) + if level is None or i in level: + new_obj.insert(0, name, level_values) new_obj.index = new_index if not inplace: diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py old mode 100755 new mode 100644 index 1a4603978ce38..6d1d25c69f6b0 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2242,16 +2242,49 @@ def test_reset_index_multiindex_columns(self): levels = [['A', ''], ['B', 'b']] df = pd.DataFrame([[0, 2], [1, 3]], columns=pd.MultiIndex.from_tuples(levels)) - expected = df.copy() - df.index.name = 'A' - result = df[['B']].reset_index() - tm.assert_frame_equal(result, expected) + result = df[['B']].rename_axis('A').reset_index() + tm.assert_frame_equal(result, df) # gh-16120: already existing column with tm.assert_raises_regex(ValueError, ("cannot insert \('A', ''\), " "already exists")): - df.reset_index() + df.rename_axis('A').reset_index() + + # gh-16164: multiindex (tuple) full key + result = df.set_index([('A', '')]).reset_index() + tm.assert_frame_equal(result, df) + + # with additional (unnamed) index level + idx_col = pd.DataFrame([[0], [1]], + columns=pd.MultiIndex.from_tuples([('level_0', + '')])) + expected = pd.concat([idx_col, df[[('B', 'b'), ('A', '')]]], axis=1) + result = df.set_index([('B', 'b')], append=True).reset_index() + tm.assert_frame_equal(result, expected) + + # with index name which is a too long tuple... + with tm.assert_raises_regex(ValueError, + ("Item must have length equal to number " + "of levels.")): + df.rename_axis([('C', 'c', 'i')]).reset_index() + # or too short... + levels = [['A', 'a', ''], ['B', 'b', 'i']] + df2 = pd.DataFrame([[0, 2], [1, 3]], + columns=pd.MultiIndex.from_tuples(levels)) + idx_col = pd.DataFrame([[0], [1]], + columns=pd.MultiIndex.from_tuples([('C', + 'c', + 'ii')])) + expected = pd.concat([idx_col, df2], axis=1) + result = df2.rename_axis([('C', 'c')]).reset_index(col_fill='ii') + tm.assert_frame_equal(result, expected) + + # ... which is incompatible with col_fill=None + with tm.assert_raises_regex(ValueError, + ("col_fill=None is incompatible with " + "incomplete column name \('C', 'c'\)")): + df2.rename_axis([('C', 'c')]).reset_index(col_fill=None) def test_set_index_period(self): # GH 6631