diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 8c02785647861..6cbee83247692 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -46,7 +46,7 @@ Other enhancements - Improved error message in :class:`~pandas.core.window.Rolling` when ``window`` is a frequency and ``NaT`` is in the rolling axis (:issue:`46087`) - :class:`Series` and :class:`DataFrame` with ``IntegerDtype`` now supports bitwise operations (:issue:`34463`) - Add ``milliseconds`` field support for :class:`~pandas.DateOffset` (:issue:`43371`) -- +- :meth:`DataFrame.reset_index` now accepts a ``names`` argument which renames the index names (:issue:`6878`) .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index caa08c67cbfab..be94f5ad2d735 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5684,6 +5684,7 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., + names: Hashable | Sequence[Hashable] = None, ) -> DataFrame: ... @@ -5696,6 +5697,7 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., + names: Hashable | Sequence[Hashable] = None, ) -> None: ... @@ -5708,6 +5710,7 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., + names: Hashable | Sequence[Hashable] = None, ) -> None: ... @@ -5720,6 +5723,7 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., + names: Hashable | Sequence[Hashable] = None, ) -> None: ... @@ -5731,6 +5735,7 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., + names: Hashable | Sequence[Hashable] = None, ) -> None: ... @@ -5743,6 +5748,7 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., + names: Hashable | Sequence[Hashable] = None, ) -> DataFrame | None: ... @@ -5755,6 +5761,7 @@ def reset_index( col_level: Hashable = 0, col_fill: Hashable = "", allow_duplicates: bool | lib.NoDefault = lib.no_default, + names: Hashable | Sequence[Hashable] = None, ) -> DataFrame | None: """ Reset the index, or a level of it. @@ -5785,6 +5792,13 @@ def reset_index( .. versionadded:: 1.5.0 + names : int, str or 1-dimensional list, default None + Using the given string, rename the DataFrame column which contains the + index data. If the DataFrame has a MultiIndex, this has to be a list or + tuple with length equal to the number of levels. + + .. versionadded:: 1.5.0 + Returns ------- DataFrame or None @@ -5855,6 +5869,16 @@ class name mammal lion 80.5 run monkey NaN jump + Using the `names` parameter, choose a name for the index column: + + >>> df.reset_index(names=['classes', 'names']) + classes names speed species + max type + 0 bird falcon 389.0 fly + 1 bird parrot 24.0 fly + 2 mammal lion 80.5 run + 3 mammal monkey NaN jump + If the index has multiple levels, we can reset a subset of them: >>> df.reset_index(level='class') @@ -5920,12 +5944,13 @@ class max type if not drop: to_insert: Iterable[tuple[Any, Any | None]] + + default = "index" if "index" not in self else "level_0" + names = self.index._get_default_index_names(names, default) + if isinstance(self.index, MultiIndex): - names = com.fill_missing_names(self.index.names) to_insert = zip(self.index.levels, self.index.codes) else: - default = "index" if "index" not in self else "level_0" - names = [default] if self.index.name is None else [self.index.name] to_insert = ((self.index, None),) multi_col = isinstance(self.columns, MultiIndex) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c34e99298dd0e..2ded3c4926f6b 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1771,6 +1771,41 @@ def _validate_names( return new_names + def _get_default_index_names( + self, names: Hashable | Sequence[Hashable] | None = None, default=None + ) -> list[Hashable]: + """ + Get names of index. + + Parameters + ---------- + names : int, str or 1-dimensional list, default None + Index names to set. + default : str + Default name of index. + + Raises + ------ + TypeError + if names not str or list-like + """ + from pandas.core.indexes.multi import MultiIndex + + if names is not None: + if isinstance(names, str) or isinstance(names, int): + names = [names] + + if not isinstance(names, list) and names is not None: + raise ValueError("Index names must be str or 1-dimensional list") + + if not names: + if isinstance(self, MultiIndex): + names = com.fill_missing_names(self.names) + else: + names = [default] if self.name is None else [self.name] + + return names + def _get_names(self) -> FrozenList: return FrozenList((self.name,)) diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 840d0c6e6bdf4..37431bc291b76 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -754,3 +754,42 @@ def test_reset_index_interval_columns_object_cast(): columns=Index(["Year", Interval(0, 1), Interval(1, 2)]), ) tm.assert_frame_equal(result, expected) + + +def test_reset_index_rename(float_frame): + # GH 6878 + result = float_frame.reset_index(names="new_name") + expected = Series(float_frame.index.values, name="new_name") + tm.assert_series_equal(result["new_name"], expected) + + result = float_frame.reset_index(names=123) + expected = Series(float_frame.index.values, name=123) + tm.assert_series_equal(result[123], expected) + + +def test_reset_index_rename_multiindex(float_frame): + # GH 6878 + stacked_df = float_frame.stack()[::2] + stacked_df = DataFrame({"foo": stacked_df, "bar": stacked_df}) + + names = ["first", "second"] + stacked_df.index.names = names + + result = stacked_df.reset_index() + expected = stacked_df.reset_index(names=["new_first", "new_second"]) + tm.assert_series_equal(result["first"], expected["new_first"], check_names=False) + tm.assert_series_equal(result["second"], expected["new_second"], check_names=False) + + +def test_errorreset_index_rename(float_frame): + # GH 6878 + stacked_df = float_frame.stack()[::2] + stacked_df = DataFrame({"first": stacked_df, "second": stacked_df}) + + with pytest.raises( + ValueError, match="Index names must be str or 1-dimensional list" + ): + stacked_df.reset_index(names={"first": "new_first", "second": "new_second"}) + + with pytest.raises(IndexError, match="list index out of range"): + stacked_df.reset_index(names=["new_first"])