From ef2a6a523a54b59505753543b6ce4184eca25106 Mon Sep 17 00:00:00 2001 From: Giacomo Caria Date: Fri, 2 Jul 2021 12:13:48 +0200 Subject: [PATCH 01/10] Add argument to rename index when resetting it. --- pandas/core/frame.py | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f5090097f72d8..61fa34d9bc83a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5542,6 +5542,7 @@ def reset_index( inplace: Literal[False] = ..., col_level: Hashable = ..., col_fill: Hashable = ..., + names: Hashable | Sequence[Hashable] = None, ) -> DataFrame: ... @@ -5553,6 +5554,7 @@ def reset_index( inplace: Literal[True], col_level: Hashable = ..., col_fill: Hashable = ..., + names: Hashable | Sequence[Hashable] = None, ) -> None: ... @@ -5564,6 +5566,7 @@ def reset_index( inplace: Literal[True], col_level: Hashable = ..., col_fill: Hashable = ..., + names: Hashable | Sequence[Hashable] = None, ) -> None: ... @@ -5575,6 +5578,7 @@ def reset_index( inplace: Literal[True], col_level: Hashable = ..., col_fill: Hashable = ..., + names: Hashable | Sequence[Hashable] = None, ) -> None: ... @@ -5585,6 +5589,7 @@ def reset_index( inplace: Literal[True], col_level: Hashable = ..., col_fill: Hashable = ..., + names: Hashable | Sequence[Hashable] = None, ) -> None: ... @@ -5596,6 +5601,7 @@ def reset_index( inplace: bool = ..., col_level: Hashable = ..., col_fill: Hashable = ..., + names: Hashable | Sequence[Hashable] = None, ) -> DataFrame | None: ... @@ -5607,7 +5613,8 @@ def reset_index( inplace: bool = False, col_level: Hashable = 0, col_fill: Hashable = "", - ) -> DataFrame | None: + names: Hashable | Sequence[Hashable] = None, + ) -> Optional[DataFrame]: """ Reset the index, or a level of it. @@ -5767,14 +5774,20 @@ class max type if not drop: to_insert: Iterable[tuple[Any, Any | None]] if isinstance(self.index, MultiIndex): - names = [ - (n if n is not None else f"level_{i}") - for i, n in enumerate(self.index.names) - ] + if not names: + names = [ + (n if n is not None else f"level_{i}") + for i, n in enumerate(self.index.names) + ] + else: + names = names to_insert = zip(self.index.levels, self.index.codes) else: default = "index" if "index" not in self else "level_0" - names = [default] if self.index.name is None else [self.index.name] + if not names: + names = [default] if self.index.name is None else [self.index.name] + else: + names = [names] to_insert = ((self.index, None),) multi_col = isinstance(self.columns, MultiIndex) From 69b3073f44f072441592407c9fc44ecbc4aa3918 Mon Sep 17 00:00:00 2001 From: Giacomo Caria Date: Fri, 2 Jul 2021 16:24:14 +0200 Subject: [PATCH 02/10] Add tests. --- pandas/core/frame.py | 2 +- .../tests/frame/methods/test_reset_index.py | 21 +++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 61fa34d9bc83a..bcc115ca53afd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5614,7 +5614,7 @@ def reset_index( col_level: Hashable = 0, col_fill: Hashable = "", names: Hashable | Sequence[Hashable] = None, - ) -> Optional[DataFrame]: + ) -> DataFrame | None: """ Reset the index, or a level of it. diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 76d259707787d..b78a64cd8b43f 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -183,6 +183,27 @@ def test_reset_index_name(self): assert return_value is None assert df.index.name is None + def test_reset_index_rename(self, float_frame): + # index + rdf = float_frame.reset_index(names="new_name") + exp = Series(float_frame.index.values, name="new_name") + tm.assert_series_equal(rdf["new_name"], exp) + + # multiindex + stacked = float_frame.stack()[::2] + stacked = DataFrame({"foo": stacked, "bar": stacked}) + + names = ["first", "second"] + stacked.index.names = names + deleveled = stacked.reset_index() + deleveled2 = stacked.reset_index(names=["new_first", "new_second"]) + tm.assert_series_equal( + deleveled["first"], deleveled2["new_first"], check_names=False + ) + tm.assert_series_equal( + deleveled["second"], deleveled2["new_second"], check_names=False + ) + def test_reset_index_level(self): df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "C", "D"]) From 141da420d6d0993188bd19a525427c13ec233c38 Mon Sep 17 00:00:00 2001 From: Giacomo Caria Date: Mon, 5 Jul 2021 18:32:20 +0200 Subject: [PATCH 03/10] Split test function --- pandas/core/frame.py | 2 -- pandas/tests/frame/methods/test_reset_index.py | 5 +++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index bcc115ca53afd..737b0c017ea17 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5779,8 +5779,6 @@ class max type (n if n is not None else f"level_{i}") for i, n in enumerate(self.index.names) ] - else: - names = names to_insert = zip(self.index.levels, self.index.codes) else: default = "index" if "index" not in self else "level_0" diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index b78a64cd8b43f..e8ffc2e90b980 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -184,12 +184,13 @@ def test_reset_index_name(self): assert df.index.name is None def test_reset_index_rename(self, float_frame): - # index + rdf = float_frame.reset_index(names="new_name") exp = Series(float_frame.index.values, name="new_name") tm.assert_series_equal(rdf["new_name"], exp) - # multiindex + def test_reset_index_rename_multiindex(self, float_frame): + stacked = float_frame.stack()[::2] stacked = DataFrame({"foo": stacked, "bar": stacked}) From 0673a5f92b3eb7253d1d351360f6993783060bb3 Mon Sep 17 00:00:00 2001 From: Giacomo Caria Date: Tue, 6 Jul 2021 10:06:22 +0200 Subject: [PATCH 04/10] Add issue number. --- pandas/tests/frame/methods/test_reset_index.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index e8ffc2e90b980..82ba8440b82cf 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -184,13 +184,13 @@ def test_reset_index_name(self): assert df.index.name is None def test_reset_index_rename(self, float_frame): - + # GH 6878 rdf = float_frame.reset_index(names="new_name") exp = Series(float_frame.index.values, name="new_name") tm.assert_series_equal(rdf["new_name"], exp) def test_reset_index_rename_multiindex(self, float_frame): - + # GH 6878 stacked = float_frame.stack()[::2] stacked = DataFrame({"foo": stacked, "bar": stacked}) From ab5723223bac8fa844a7a4f17d26adde9187b90e Mon Sep 17 00:00:00 2001 From: Giacomo Caria Date: Tue, 6 Jul 2021 13:23:34 +0200 Subject: [PATCH 05/10] Edit whatsnew --- doc/source/whatsnew/v1.4.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index bb9c124bae68b..2239543bc841a 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -106,6 +106,7 @@ Other enhancements - :meth:`read_table` now supports the argument ``storage_options`` (:issue:`39167`) - Methods that relied on hashmap based algos such as :meth:`DataFrameGroupBy.value_counts`, :meth:`DataFrameGroupBy.count` and :func:`factorize` ignored imaginary component for complex numbers (:issue:`17927`) - Add :meth:`Series.str.removeprefix` and :meth:`Series.str.removesuffix` introduced in Python 3.9 to remove pre-/suffixes from string-type :class:`Series` (:issue:`36944`) +- :meth:`DataFrame.reset_index` now accepts a ``names`` argument which renames the index names (:issue:`6878`) .. --------------------------------------------------------------------------- From ecf88fff975ae3302b34fad6eecd3c3c3e1d9ebe Mon Sep 17 00:00:00 2001 From: Giacomo Caria Date: Thu, 8 Jul 2021 13:18:44 +0200 Subject: [PATCH 06/10] Add an example in the doc-string. --- pandas/core/frame.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 737b0c017ea17..3fb443e74df43 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5676,6 +5676,16 @@ class max_speed 2 lion mammal 80.5 3 monkey mammal NaN + Using the `names` parameter, it is possible to choose a name for + the old index column: + + >>> df.reset_index(names='name') + name class max_speed + 0 falcon bird 389.0 + 1 parrot bird 24.0 + 2 lion mammal 80.5 + 3 monkey mammal NaN + We can use the `drop` parameter to avoid the old index being added as a column: From 548a30f0aef1e38ae2e99d954d59c4085c45ad4f Mon Sep 17 00:00:00 2001 From: Giacomo Caria Date: Fri, 9 Jul 2021 21:33:34 +0200 Subject: [PATCH 07/10] Edit doc-string, test failure cases. --- pandas/core/frame.py | 22 +++++++++++++++++++ .../tests/frame/methods/test_reset_index.py | 9 ++++++++ 2 files changed, 31 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3fb443e74df43..05c6964415a93 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5639,6 +5639,12 @@ def reset_index( col_fill : object, default '' If the columns have multiple levels, determines how the other levels are named. If None then the index name is repeated. + names : str, tuple or list, default None + Using the given string, rename the DataFrame column which contains the index data. + If the DataFrame has a MultiIndex, this has to be a list or tuple with length + equal to the number of levels. + + .. versionadded:: 1.4.0 Returns ------- @@ -5781,6 +5787,14 @@ class max type if len(level) < self.index.nlevels: new_index = self.index.droplevel(level) + if names is not None: + if isinstance(self.index, MultiIndex): + if not isinstance(names, (tuple, list)): + raise ValueError("Names must be a tuple or list") + else: + if not isinstance(names, str): + raise ValueError("Names must be a string") + if not drop: to_insert: Iterable[tuple[Any, Any | None]] if isinstance(self.index, MultiIndex): @@ -5789,6 +5803,14 @@ class max type (n if n is not None else f"level_{i}") for i, n in enumerate(self.index.names) ] + else: + + if len(names) != self.index.nlevels: + raise ValueError( + f"The number of provided names " + f"({len(names)}) does not match the number of" + f" MultiIndex levels ({self.index.nlevels})" + ) to_insert = zip(self.index.levels, self.index.codes) else: default = "index" if "index" not in self else "level_0" diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 82ba8440b82cf..009aa672a4236 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -189,6 +189,9 @@ def test_reset_index_rename(self, float_frame): exp = Series(float_frame.index.values, name="new_name") tm.assert_series_equal(rdf["new_name"], exp) + with pytest.raises(ValueError, match="Names must be a string"): + float_frame.reset_index(names=1) + def test_reset_index_rename_multiindex(self, float_frame): # GH 6878 stacked = float_frame.stack()[::2] @@ -205,6 +208,12 @@ def test_reset_index_rename_multiindex(self, float_frame): deleveled["second"], deleveled2["new_second"], check_names=False ) + with pytest.raises(ValueError, match=r".* number of provided names .*"): + stacked.reset_index(names=["new_first"]) + + with pytest.raises(ValueError, match="Names must be a tuple or list"): + stacked.reset_index(names={"first": "new_first", "second": "new_second"}) + def test_reset_index_level(self): df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "C", "D"]) From 96f52fbe0f10bb9a69add42833ce659faf7c6c41 Mon Sep 17 00:00:00 2001 From: Giacomo Caria Date: Mon, 23 Aug 2021 19:28:28 +0200 Subject: [PATCH 08/10] Add get_default_index_names method to Index and MultiIndex --- pandas/core/frame.py | 34 +++++----------------------------- pandas/core/indexes/base.py | 12 ++++++++++++ pandas/core/indexes/multi.py | 19 +++++++++++++++++++ 3 files changed, 36 insertions(+), 29 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 05c6964415a93..19085012d250b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5640,9 +5640,9 @@ def reset_index( If the columns have multiple levels, determines how the other levels are named. If None then the index name is repeated. names : str, tuple or list, default None - Using the given string, rename the DataFrame column which contains the index data. - If the DataFrame has a MultiIndex, this has to be a list or tuple with length - equal to the number of levels. + Using the given string, rename the DataFrame column which contains the + index data. If the DataFrame has a MultiIndex, this has to be a list or + tuple with length equal to the number of levels. .. versionadded:: 1.4.0 @@ -5787,37 +5787,13 @@ class max type if len(level) < self.index.nlevels: new_index = self.index.droplevel(level) - if names is not None: - if isinstance(self.index, MultiIndex): - if not isinstance(names, (tuple, list)): - raise ValueError("Names must be a tuple or list") - else: - if not isinstance(names, str): - raise ValueError("Names must be a string") - if not drop: to_insert: Iterable[tuple[Any, Any | None]] if isinstance(self.index, MultiIndex): - if not names: - names = [ - (n if n is not None else f"level_{i}") - for i, n in enumerate(self.index.names) - ] - else: - - if len(names) != self.index.nlevels: - raise ValueError( - f"The number of provided names " - f"({len(names)}) does not match the number of" - f" MultiIndex levels ({self.index.nlevels})" - ) + names = self.index.get_default_index_names(names) to_insert = zip(self.index.levels, self.index.codes) else: - default = "index" if "index" not in self else "level_0" - if not names: - names = [default] if self.index.name is None else [self.index.name] - else: - names = [names] + names = self.index.get_default_index_names(self, names) to_insert = ((self.index, None),) multi_col = isinstance(self.columns, MultiIndex) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 645fab0d76a73..4860908c2eada 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1559,6 +1559,18 @@ def _validate_names( return new_names + def get_default_index_names(self, df: DataFrame, names: str = None): + + if names is not None and not isinstance(names, str): + raise ValueError("Names must be a string") + + default = "index" if "index" not in self else "level_0" + if not names: + names = [default] if df.index.name is None else [df.index.name] + else: + names = [names] + return names + def _get_names(self) -> FrozenList: return FrozenList((self.name,)) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 0c158d47cfa3b..83ac2854ae28d 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1391,6 +1391,25 @@ def format( # -------------------------------------------------------------------- # Names Methods + def get_default_index_names(self, names=None): + + if names is not None and not isinstance(names, (tuple, list)): + raise ValueError("Names must be a tuple or list") + + if not names: + names = [ + (n if n is not None else f"level_{i}") for i, n in enumerate(self.names) + ] + else: + if len(names) != self.nlevels: + raise ValueError( + f"The number of provided names " + f"({len(names)}) does not match the number of " + f"MultiIndex levels ({self.nlevels})" + ) + + return names + def _get_names(self) -> FrozenList: return FrozenList(self._names) From 3c006c0dc2c79b1a0b37b42ecb9ed0e1e4a1d5be Mon Sep 17 00:00:00 2001 From: Giacomo Caria Date: Wed, 8 Sep 2021 12:02:55 +0200 Subject: [PATCH 09/10] Simplify logic --- pandas/core/indexes/multi.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 83ac2854ae28d..d93402c7bfc35 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1397,16 +1397,15 @@ def get_default_index_names(self, names=None): raise ValueError("Names must be a tuple or list") if not names: - names = [ + return [ (n if n is not None else f"level_{i}") for i, n in enumerate(self.names) ] - else: - if len(names) != self.nlevels: - raise ValueError( - f"The number of provided names " - f"({len(names)}) does not match the number of " - f"MultiIndex levels ({self.nlevels})" - ) + if len(names) != self.nlevels: + raise ValueError( + f"The number of provided names " + f"({len(names)}) does not match the number of " + f"MultiIndex levels ({self.nlevels})" + ) return names From 11e46879ed6bf9241e57d08b5d4d14f25452075a Mon Sep 17 00:00:00 2001 From: Giacomo Caria Date: Thu, 9 Sep 2021 12:57:22 +0200 Subject: [PATCH 10/10] Add typing --- pandas/core/indexes/base.py | 7 +++++-- pandas/core/indexes/multi.py | 4 +++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 4860908c2eada..0ccef7a37f092 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1559,16 +1559,19 @@ def _validate_names( return new_names - def get_default_index_names(self, df: DataFrame, names: str = None): + def get_default_index_names( + self, df: DataFrame, names: str = None + ) -> Sequence[str]: if names is not None and not isinstance(names, str): raise ValueError("Names must be a string") - default = "index" if "index" not in self else "level_0" + default = "index" if "index" not in df else "level_0" if not names: names = [default] if df.index.name is None else [df.index.name] else: names = [names] + return names def _get_names(self) -> FrozenList: diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index d93402c7bfc35..eecba6baa89e1 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1391,7 +1391,9 @@ def format( # -------------------------------------------------------------------- # Names Methods - def get_default_index_names(self, names=None): + def get_default_index_names( + self, names: Sequence[Hashable] = None + ) -> Sequence[Hashable]: if names is not None and not isinstance(names, (tuple, list)): raise ValueError("Names must be a tuple or list")