diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index bb9c124bae68b..2239543bc841a 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -106,6 +106,7 @@ Other enhancements - :meth:`read_table` now supports the argument ``storage_options`` (:issue:`39167`) - Methods that relied on hashmap based algos such as :meth:`DataFrameGroupBy.value_counts`, :meth:`DataFrameGroupBy.count` and :func:`factorize` ignored imaginary component for complex numbers (:issue:`17927`) - Add :meth:`Series.str.removeprefix` and :meth:`Series.str.removesuffix` introduced in Python 3.9 to remove pre-/suffixes from string-type :class:`Series` (:issue:`36944`) +- :meth:`DataFrame.reset_index` now accepts a ``names`` argument which renames the index names (:issue:`6878`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f5090097f72d8..19085012d250b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5542,6 +5542,7 @@ def reset_index( inplace: Literal[False] = ..., col_level: Hashable = ..., col_fill: Hashable = ..., + names: Hashable | Sequence[Hashable] = None, ) -> DataFrame: ... @@ -5553,6 +5554,7 @@ def reset_index( inplace: Literal[True], col_level: Hashable = ..., col_fill: Hashable = ..., + names: Hashable | Sequence[Hashable] = None, ) -> None: ... @@ -5564,6 +5566,7 @@ def reset_index( inplace: Literal[True], col_level: Hashable = ..., col_fill: Hashable = ..., + names: Hashable | Sequence[Hashable] = None, ) -> None: ... @@ -5575,6 +5578,7 @@ def reset_index( inplace: Literal[True], col_level: Hashable = ..., col_fill: Hashable = ..., + names: Hashable | Sequence[Hashable] = None, ) -> None: ... @@ -5585,6 +5589,7 @@ def reset_index( inplace: Literal[True], col_level: Hashable = ..., col_fill: Hashable = ..., + names: Hashable | Sequence[Hashable] = None, ) -> None: ... @@ -5596,6 +5601,7 @@ def reset_index( inplace: bool = ..., col_level: Hashable = ..., col_fill: Hashable = ..., + names: Hashable | Sequence[Hashable] = None, ) -> DataFrame | None: ... @@ -5607,6 +5613,7 @@ def reset_index( inplace: bool = False, col_level: Hashable = 0, col_fill: Hashable = "", + names: Hashable | Sequence[Hashable] = None, ) -> DataFrame | None: """ Reset the index, or a level of it. @@ -5632,6 +5639,12 @@ def reset_index( col_fill : object, default '' If the columns have multiple levels, determines how the other levels are named. If None then the index name is repeated. + names : str, tuple or list, default None + Using the given string, rename the DataFrame column which contains the + index data. If the DataFrame has a MultiIndex, this has to be a list or + tuple with length equal to the number of levels. + + .. versionadded:: 1.4.0 Returns ------- @@ -5669,6 +5682,16 @@ class max_speed 2 lion mammal 80.5 3 monkey mammal NaN + Using the `names` parameter, it is possible to choose a name for + the old index column: + + >>> df.reset_index(names='name') + name class max_speed + 0 falcon bird 389.0 + 1 parrot bird 24.0 + 2 lion mammal 80.5 + 3 monkey mammal NaN + We can use the `drop` parameter to avoid the old index being added as a column: @@ -5767,14 +5790,10 @@ class max type if not drop: to_insert: Iterable[tuple[Any, Any | None]] if isinstance(self.index, MultiIndex): - names = [ - (n if n is not None else f"level_{i}") - for i, n in enumerate(self.index.names) - ] + names = self.index.get_default_index_names(names) to_insert = zip(self.index.levels, self.index.codes) else: - default = "index" if "index" not in self else "level_0" - names = [default] if self.index.name is None else [self.index.name] + names = self.index.get_default_index_names(self, names) to_insert = ((self.index, None),) multi_col = isinstance(self.columns, MultiIndex) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 645fab0d76a73..0ccef7a37f092 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1559,6 +1559,21 @@ def _validate_names( return new_names + def get_default_index_names( + self, df: DataFrame, names: str = None + ) -> Sequence[str]: + + if names is not None and not isinstance(names, str): + raise ValueError("Names must be a string") + + default = "index" if "index" not in df else "level_0" + if not names: + names = [default] if df.index.name is None else [df.index.name] + else: + names = [names] + + return names + def _get_names(self) -> FrozenList: return FrozenList((self.name,)) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 0c158d47cfa3b..eecba6baa89e1 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1391,6 +1391,26 @@ def format( # -------------------------------------------------------------------- # Names Methods + def get_default_index_names( + self, names: Sequence[Hashable] = None + ) -> Sequence[Hashable]: + + if names is not None and not isinstance(names, (tuple, list)): + raise ValueError("Names must be a tuple or list") + + if not names: + return [ + (n if n is not None else f"level_{i}") for i, n in enumerate(self.names) + ] + if len(names) != self.nlevels: + raise ValueError( + f"The number of provided names " + f"({len(names)}) does not match the number of " + f"MultiIndex levels ({self.nlevels})" + ) + + return names + def _get_names(self) -> FrozenList: return FrozenList(self._names) diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 76d259707787d..009aa672a4236 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -183,6 +183,37 @@ def test_reset_index_name(self): assert return_value is None assert df.index.name is None + def test_reset_index_rename(self, float_frame): + # GH 6878 + rdf = float_frame.reset_index(names="new_name") + exp = Series(float_frame.index.values, name="new_name") + tm.assert_series_equal(rdf["new_name"], exp) + + with pytest.raises(ValueError, match="Names must be a string"): + float_frame.reset_index(names=1) + + def test_reset_index_rename_multiindex(self, float_frame): + # GH 6878 + stacked = float_frame.stack()[::2] + stacked = DataFrame({"foo": stacked, "bar": stacked}) + + names = ["first", "second"] + stacked.index.names = names + deleveled = stacked.reset_index() + deleveled2 = stacked.reset_index(names=["new_first", "new_second"]) + tm.assert_series_equal( + deleveled["first"], deleveled2["new_first"], check_names=False + ) + tm.assert_series_equal( + deleveled["second"], deleveled2["new_second"], check_names=False + ) + + with pytest.raises(ValueError, match=r".* number of provided names .*"): + stacked.reset_index(names=["new_first"]) + + with pytest.raises(ValueError, match="Names must be a tuple or list"): + stacked.reset_index(names={"first": "new_first", "second": "new_second"}) + def test_reset_index_level(self): df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "C", "D"])