From 88cd26fb82a3d3da027397ba702874da349ae5b0 Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Sat, 4 Dec 2021 18:15:21 +0000 Subject: [PATCH 01/31] reset_index to handle duplicate column labels --- pandas/core/frame.py | 7 ++++++- pandas/tests/frame/methods/test_reset_index.py | 15 +-------------- pandas/tests/io/pytables/test_put.py | 2 +- 3 files changed, 8 insertions(+), 16 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9e6faa7037dae..d32328486b8a2 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5838,7 +5838,12 @@ class max type level_values, lab, allow_fill=True, fill_value=lev._na_value ) - new_obj.insert(0, name, level_values) + new_obj.insert( + 0, + name, + level_values, + allow_duplicates=self.flags.allows_duplicate_labels, + ) new_obj.index = new_index if not inplace: diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 43af48cf4a654..1479c880155a8 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -328,21 +328,13 @@ def test_reset_index_multiindex_nan(self): ) def test_reset_index_with_datetimeindex_cols(self, name): # GH#5818 - warn = None - if isinstance(name, Timestamp) and name.tz is not None: - # _deprecate_mismatched_indexing - warn = FutureWarning - df = DataFrame( [[1, 2], [3, 4]], columns=date_range("1/1/2013", "1/2/2013"), index=["A", "B"], ) df.index.name = name - - with tm.assert_produces_warning(warn): - result = df.reset_index() - + result = df.reset_index() item = name if name is not None else "index" columns = Index([item, datetime(2013, 1, 1), datetime(2013, 1, 2)]) if isinstance(item, str) and item == "2012-12-31": @@ -374,11 +366,6 @@ def test_reset_index_multiindex_columns(self): result = df[["B"]].rename_axis("A").reset_index() tm.assert_frame_equal(result, df) - # GH#16120: already existing column - msg = r"cannot insert \('A', ''\), already exists" - with pytest.raises(ValueError, match=msg): - df.rename_axis("A").reset_index() - # GH#16164: multiindex (tuple) full key result = df.set_index([("A", "")]).reset_index() tm.assert_frame_equal(result, df) diff --git a/pandas/tests/io/pytables/test_put.py b/pandas/tests/io/pytables/test_put.py index 41addc5023436..5b73aae9a3541 100644 --- a/pandas/tests/io/pytables/test_put.py +++ b/pandas/tests/io/pytables/test_put.py @@ -351,7 +351,7 @@ def make_index(names=None): columns=["a", "b"], index=make_index(["date", "a", "t"]), ) - msg = "duplicate names/columns in the multi-index when storing as a table" + msg = "cannot reindex on an axis with duplicate labels" with pytest.raises(ValueError, match=msg): store.append("df", df) From a23dbdc72f96bf71c74c5c426d3d81e55853bcb3 Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Sun, 5 Dec 2021 14:02:03 +0000 Subject: [PATCH 02/31] Add tests --- .../tests/frame/methods/test_reset_index.py | 6 +++++ .../tests/series/methods/test_reset_index.py | 26 +++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 1479c880155a8..0d41849cf3c03 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -366,6 +366,12 @@ def test_reset_index_multiindex_columns(self): result = df[["B"]].rename_axis("A").reset_index() tm.assert_frame_equal(result, df) + # GH#44755 reset_index with duplicate column labels + result = df.rename_axis("A").reset_index() + levels = [["A", ""], ["A", ""], ["B", "b"]] + expected = DataFrame([[0, 0, 2], [1, 1, 3]], columns=MultiIndex.from_tuples(levels)) + tm.assert_frame_equal(result, expected) + # GH#16164: multiindex (tuple) full key result = df.set_index([("A", "")]).reset_index() tm.assert_frame_equal(result, df) diff --git a/pandas/tests/series/methods/test_reset_index.py b/pandas/tests/series/methods/test_reset_index.py index f38491508cc23..b9d8bc7aa4208 100644 --- a/pandas/tests/series/methods/test_reset_index.py +++ b/pandas/tests/series/methods/test_reset_index.py @@ -186,3 +186,29 @@ def test_reset_index_dtypes_on_empty_series_with_multiindex(array, dtype): {"level_0": np.int64, "level_1": np.float64, "level_2": dtype, 0: object} ) tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "test, expected_names", + [ + ("repeat", ["a", None, "d", "b", "b", "e"]), + ("level", ["a", None, "d", "b", "c", "level_1"]), + ], +) +def test_column_name_clashes(test, expected_names): + df = pd.DataFrame( + {"a": [1, 2], "b": [3, 4], "c": [5, 6], "d": [7, 8], "e": [9, 10]} + ) + if test == "repeat": + df.columns = list("abbde") + else: + df.columns = list("abcd") + ["level_1"] + result = df.groupby(["a", [0, 1], "d"]).value_counts() + expected = pd.Series( + data=(1, 1), + index=pd.MultiIndex.from_tuples( + [(1, 0, 7, 3, 5, 9), (2, 1, 8, 4, 6, 10)], + names=expected_names, + ), + ) + tm.assert_series_equal(result, expected) From b1011772450f7b051392beb8b932fc85245a43e1 Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Sun, 5 Dec 2021 14:49:46 +0000 Subject: [PATCH 03/31] Add tests --- .../tests/frame/methods/test_reset_index.py | 4 ++- .../tests/series/methods/test_reset_index.py | 29 ++++++------------- 2 files changed, 12 insertions(+), 21 deletions(-) diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 0d41849cf3c03..f556d7f8a8ff2 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -369,7 +369,9 @@ def test_reset_index_multiindex_columns(self): # GH#44755 reset_index with duplicate column labels result = df.rename_axis("A").reset_index() levels = [["A", ""], ["A", ""], ["B", "b"]] - expected = DataFrame([[0, 0, 2], [1, 1, 3]], columns=MultiIndex.from_tuples(levels)) + expected = DataFrame( + [[0, 0, 2], [1, 1, 3]], columns=MultiIndex.from_tuples(levels) + ) tm.assert_frame_equal(result, expected) # GH#16164: multiindex (tuple) full key diff --git a/pandas/tests/series/methods/test_reset_index.py b/pandas/tests/series/methods/test_reset_index.py index b9d8bc7aa4208..3bb44b798edff 100644 --- a/pandas/tests/series/methods/test_reset_index.py +++ b/pandas/tests/series/methods/test_reset_index.py @@ -189,26 +189,15 @@ def test_reset_index_dtypes_on_empty_series_with_multiindex(array, dtype): @pytest.mark.parametrize( - "test, expected_names", + "names, expected_names", [ - ("repeat", ["a", None, "d", "b", "b", "e"]), - ("level", ["a", None, "d", "b", "c", "level_1"]), + (["A", "A"], ["A", "A"]), + (["level_1", None], ["level_1", "level_1"]), ], ) -def test_column_name_clashes(test, expected_names): - df = pd.DataFrame( - {"a": [1, 2], "b": [3, 4], "c": [5, 6], "d": [7, 8], "e": [9, 10]} - ) - if test == "repeat": - df.columns = list("abbde") - else: - df.columns = list("abcd") + ["level_1"] - result = df.groupby(["a", [0, 1], "d"]).value_counts() - expected = pd.Series( - data=(1, 1), - index=pd.MultiIndex.from_tuples( - [(1, 0, 7, 3, 5, 9), (2, 1, 8, 4, 6, 10)], - names=expected_names, - ), - ) - tm.assert_series_equal(result, expected) +def test_column_name_duplicates(names, expected_names): + # GH#44755 reset_index with duplicate column labels + s = Series([1], index=MultiIndex.from_arrays([[1], [1]], names=names)) + result = s.reset_index() + expected = DataFrame([[1, 1, 1]], columns=expected_names + [0]) + tm.assert_frame_equal(result, expected) From 39d9f755a2486290761b2ff3abe5cecb5d37a639 Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Sun, 5 Dec 2021 14:55:06 +0000 Subject: [PATCH 04/31] Update v1.4.0.rst --- doc/source/whatsnew/v1.4.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 4a4e7dd6d15d7..18880902c737b 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -686,6 +686,7 @@ Indexing - Bug in indexing on columns with ``loc`` or ``iloc`` using a slice with a negative step with ``ExtensionDtype`` columns incorrectly raising (:issue:`44551`) - Bug in :meth:`IntervalIndex.get_indexer_non_unique` returning boolean mask instead of array of integers for a non unique and non monotonic index (:issue:`44084`) - Bug in :meth:`IntervalIndex.get_indexer_non_unique` not handling targets of ``dtype`` 'object' with NaNs correctly (:issue:`44482`) +- Bug in :meth:`Series.reset_index` on a MultiIndex with duplicate levels raises a ValueError (:issue:`44410`) - Missing From e5ab5f7bc2c45b1a12f9de3085018ca87c5f5a0a Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Mon, 6 Dec 2021 13:31:17 +0000 Subject: [PATCH 05/31] Implement allow_duplicates parameter --- pandas/core/frame.py | 12 +++++++++++- pandas/core/series.py | 4 ++-- pandas/tests/frame/methods/test_reset_index.py | 17 +++++++++++++++-- pandas/tests/io/pytables/test_put.py | 2 +- pandas/tests/series/methods/test_reset_index.py | 13 +++++++++---- 5 files changed, 38 insertions(+), 10 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 52e10326cb4d0..41d51c6f4e80b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5571,6 +5571,7 @@ def reset_index( inplace: Literal[False] = ..., col_level: Hashable = ..., col_fill: Hashable = ..., + allow_duplicates: bool | None = ..., ) -> DataFrame: ... @@ -5582,6 +5583,7 @@ def reset_index( inplace: Literal[True], col_level: Hashable = ..., col_fill: Hashable = ..., + allow_duplicates: bool | None = ..., ) -> None: ... @@ -5593,6 +5595,7 @@ def reset_index( inplace: Literal[True], col_level: Hashable = ..., col_fill: Hashable = ..., + allow_duplicates: bool | None = ..., ) -> None: ... @@ -5604,6 +5607,7 @@ def reset_index( inplace: Literal[True], col_level: Hashable = ..., col_fill: Hashable = ..., + allow_duplicates: bool | None = ..., ) -> None: ... @@ -5614,6 +5618,7 @@ def reset_index( inplace: Literal[True], col_level: Hashable = ..., col_fill: Hashable = ..., + allow_duplicates: bool | None = ..., ) -> None: ... @@ -5625,6 +5630,7 @@ def reset_index( inplace: bool = ..., col_level: Hashable = ..., col_fill: Hashable = ..., + allow_duplicates: bool | None = ..., ) -> DataFrame | None: ... @@ -5636,6 +5642,7 @@ def reset_index( inplace: bool = False, col_level: Hashable = 0, col_fill: Hashable = "", + allow_duplicates: bool | None = False, ) -> DataFrame | None: """ Reset the index, or a level of it. @@ -5838,11 +5845,14 @@ class max type level_values, lab, allow_fill=True, fill_value=lev._na_value ) + if allow_duplicates is None: + allow_duplicates = self.flags.allows_duplicate_labels + new_obj.insert( 0, name, level_values, - allow_duplicates=self.flags.allows_duplicate_labels, + allow_duplicates=allow_duplicates, ) new_obj.index = new_index diff --git a/pandas/core/series.py b/pandas/core/series.py index ab6550a48bc31..4b912f9803867 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1350,7 +1350,7 @@ def repeat(self, repeats, axis=None) -> Series: ) @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "level"]) - def reset_index(self, level=None, drop=False, name=lib.no_default, inplace=False): + def reset_index(self, level=None, drop=False, name=lib.no_default, inplace=False, allow_duplicates: bool = False): """ Generate a new DataFrame or Series with the index reset. @@ -1488,7 +1488,7 @@ def reset_index(self, level=None, drop=False, name=lib.no_default, inplace=False name = self.name df = self.to_frame(name) - return df.reset_index(level=level, drop=drop) + return df.reset_index(level=level, drop=drop, allow_duplicates=allow_duplicates) # ---------------------------------------------------------------------- # Rendering Methods diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index f556d7f8a8ff2..4a37554766a20 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -328,13 +328,21 @@ def test_reset_index_multiindex_nan(self): ) def test_reset_index_with_datetimeindex_cols(self, name): # GH#5818 + warn = None + if isinstance(name, Timestamp) and name.tz is not None: + # _deprecate_mismatched_indexing + warn = FutureWarning + df = DataFrame( [[1, 2], [3, 4]], columns=date_range("1/1/2013", "1/2/2013"), index=["A", "B"], ) df.index.name = name - result = df.reset_index() + + with tm.assert_produces_warning(warn): + result = df.reset_index() + item = name if name is not None else "index" columns = Index([item, datetime(2013, 1, 1), datetime(2013, 1, 2)]) if isinstance(item, str) and item == "2012-12-31": @@ -366,8 +374,13 @@ def test_reset_index_multiindex_columns(self): result = df[["B"]].rename_axis("A").reset_index() tm.assert_frame_equal(result, df) + # GH#16120: already existing column + msg = r"cannot insert \('A', ''\), already exists" + with pytest.raises(ValueError, match=msg): + df.rename_axis("A").reset_index() + # GH#44755 reset_index with duplicate column labels - result = df.rename_axis("A").reset_index() + result = df.rename_axis("A").reset_index(allow_duplicates=True) levels = [["A", ""], ["A", ""], ["B", "b"]] expected = DataFrame( [[0, 0, 2], [1, 1, 3]], columns=MultiIndex.from_tuples(levels) diff --git a/pandas/tests/io/pytables/test_put.py b/pandas/tests/io/pytables/test_put.py index 5b73aae9a3541..41addc5023436 100644 --- a/pandas/tests/io/pytables/test_put.py +++ b/pandas/tests/io/pytables/test_put.py @@ -351,7 +351,7 @@ def make_index(names=None): columns=["a", "b"], index=make_index(["date", "a", "t"]), ) - msg = "cannot reindex on an axis with duplicate labels" + msg = "duplicate names/columns in the multi-index when storing as a table" with pytest.raises(ValueError, match=msg): store.append("df", df) diff --git a/pandas/tests/series/methods/test_reset_index.py b/pandas/tests/series/methods/test_reset_index.py index 3bb44b798edff..27d005e6f0d16 100644 --- a/pandas/tests/series/methods/test_reset_index.py +++ b/pandas/tests/series/methods/test_reset_index.py @@ -195,9 +195,14 @@ def test_reset_index_dtypes_on_empty_series_with_multiindex(array, dtype): (["level_1", None], ["level_1", "level_1"]), ], ) -def test_column_name_duplicates(names, expected_names): +@pytest.mark.parametrize("allow_duplicates", [False, True]) +def test_column_name_duplicates(names, expected_names, allow_duplicates): # GH#44755 reset_index with duplicate column labels s = Series([1], index=MultiIndex.from_arrays([[1], [1]], names=names)) - result = s.reset_index() - expected = DataFrame([[1, 1, 1]], columns=expected_names + [0]) - tm.assert_frame_equal(result, expected) + if allow_duplicates: + result = s.reset_index(allow_duplicates=allow_duplicates) + expected = DataFrame([[1, 1, 1]], columns=expected_names + [0]) + tm.assert_frame_equal(result, expected) + else: + with pytest.raises(ValueError, match="cannot insert"): + s.reset_index(allow_duplicates=allow_duplicates) \ No newline at end of file From b4828e6ffb411d587c34f985d62b2bc46c2850c1 Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Mon, 6 Dec 2021 13:48:49 +0000 Subject: [PATCH 06/31] Formatting --- pandas/core/series.py | 13 +++++++++++-- pandas/tests/frame/methods/test_reset_index.py | 4 ++-- pandas/tests/series/methods/test_reset_index.py | 2 +- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 4b912f9803867..c1dbfa6f8b1f8 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1350,7 +1350,14 @@ def repeat(self, repeats, axis=None) -> Series: ) @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "level"]) - def reset_index(self, level=None, drop=False, name=lib.no_default, inplace=False, allow_duplicates: bool = False): + def reset_index( + self, + level=None, + drop=False, + name=lib.no_default, + inplace=False, + allow_duplicates: bool = False, + ): """ Generate a new DataFrame or Series with the index reset. @@ -1488,7 +1495,9 @@ def reset_index(self, level=None, drop=False, name=lib.no_default, inplace=False name = self.name df = self.to_frame(name) - return df.reset_index(level=level, drop=drop, allow_duplicates=allow_duplicates) + return df.reset_index( + level=level, drop=drop, allow_duplicates=allow_duplicates + ) # ---------------------------------------------------------------------- # Rendering Methods diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 4a37554766a20..02cb0e2ab493b 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -375,9 +375,9 @@ def test_reset_index_multiindex_columns(self): tm.assert_frame_equal(result, df) # GH#16120: already existing column - msg = r"cannot insert \('A', ''\), already exists" + msg = r"cannot insert \('A', ''\), already exists" with pytest.raises(ValueError, match=msg): - df.rename_axis("A").reset_index() + df.rename_axis("A").reset_index() # GH#44755 reset_index with duplicate column labels result = df.rename_axis("A").reset_index(allow_duplicates=True) diff --git a/pandas/tests/series/methods/test_reset_index.py b/pandas/tests/series/methods/test_reset_index.py index 27d005e6f0d16..8d8f34f26eb7a 100644 --- a/pandas/tests/series/methods/test_reset_index.py +++ b/pandas/tests/series/methods/test_reset_index.py @@ -205,4 +205,4 @@ def test_column_name_duplicates(names, expected_names, allow_duplicates): tm.assert_frame_equal(result, expected) else: with pytest.raises(ValueError, match="cannot insert"): - s.reset_index(allow_duplicates=allow_duplicates) \ No newline at end of file + s.reset_index(allow_duplicates=allow_duplicates) From d9d60eeb1a4e58ecbb07fe6f0de89b7bbbb358eb Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Mon, 6 Dec 2021 14:10:32 +0000 Subject: [PATCH 07/31] Add docstrings --- pandas/core/frame.py | 3 +++ pandas/core/series.py | 2 ++ 2 files changed, 5 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 41d51c6f4e80b..68dda2f422dd7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5668,6 +5668,9 @@ def reset_index( col_fill : object, default '' If the columns have multiple levels, determines how the other levels are named. If None then the index name is repeated. + allow_duplicates : bool or None, default False + Allow duplicate column labels to be created. + If None take value from self.flags.allows_duplicate_labels. Returns ------- diff --git a/pandas/core/series.py b/pandas/core/series.py index c1dbfa6f8b1f8..9bedee548a2a5 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1379,6 +1379,8 @@ def reset_index( when `drop` is True. inplace : bool, default False Modify the Series in place (do not create a new object). + allow_duplicates : bool, default False + Allow duplicate column labels to be created. Returns ------- From e1bb16f67a481faff25e6a6ec942940b6de431c4 Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Mon, 6 Dec 2021 15:24:16 +0000 Subject: [PATCH 08/31] Trigger CI From e924f93496b0b204a38d1603f8ac8c785a3c2ee7 Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Mon, 6 Dec 2021 16:03:38 +0000 Subject: [PATCH 09/31] Trigger CI From 96af74daf145596c3a471aeee72dd8450fef1690 Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Tue, 7 Dec 2021 09:11:07 +0000 Subject: [PATCH 10/31] Update v1.4.0.rst --- doc/source/whatsnew/v1.4.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index bff5c36b50250..de33891ba5a38 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -229,6 +229,7 @@ Other enhancements - :meth:`Series.info` has been added, for compatibility with :meth:`DataFrame.info` (:issue:`5167`) - Implemented :meth:`IntervalArray.min`, :meth:`IntervalArray.max`, as a result of which ``min`` and ``max`` now work for :class:`IntervalIndex`, :class:`Series` and :class:`DataFrame` with ``IntervalDtype`` (:issue:`44746`) - :meth:`UInt64Index.map` now retains ``dtype`` where possible (:issue:`44609`) +- :meth:`Series.reset_index` and :meth:`DataFrame.reset_index` now support the argument ``allow_duplicates`` (:issue:`44410`) - @@ -699,7 +700,6 @@ Indexing - Bug in indexing on columns with ``loc`` or ``iloc`` using a slice with a negative step with ``ExtensionDtype`` columns incorrectly raising (:issue:`44551`) - Bug in :meth:`IntervalIndex.get_indexer_non_unique` returning boolean mask instead of array of integers for a non unique and non monotonic index (:issue:`44084`) - Bug in :meth:`IntervalIndex.get_indexer_non_unique` not handling targets of ``dtype`` 'object' with NaNs correctly (:issue:`44482`) -- Bug in :meth:`Series.reset_index` on a MultiIndex with duplicate levels raises a ValueError (:issue:`44410`) - Missing From 0e90ae90fe254d9ac6bd1404158806cb44f17e7b Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Tue, 7 Dec 2021 09:23:04 +0000 Subject: [PATCH 11/31] Update test_reset_index.py --- pandas/tests/series/methods/test_reset_index.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/series/methods/test_reset_index.py b/pandas/tests/series/methods/test_reset_index.py index 8d8f34f26eb7a..e7340aaf376e5 100644 --- a/pandas/tests/series/methods/test_reset_index.py +++ b/pandas/tests/series/methods/test_reset_index.py @@ -200,9 +200,9 @@ def test_column_name_duplicates(names, expected_names, allow_duplicates): # GH#44755 reset_index with duplicate column labels s = Series([1], index=MultiIndex.from_arrays([[1], [1]], names=names)) if allow_duplicates: - result = s.reset_index(allow_duplicates=allow_duplicates) + result = s.reset_index(allow_duplicates=True) expected = DataFrame([[1, 1, 1]], columns=expected_names + [0]) tm.assert_frame_equal(result, expected) else: with pytest.raises(ValueError, match="cannot insert"): - s.reset_index(allow_duplicates=allow_duplicates) + s.reset_index() From 01ad5388d69aa1250e8cc1cac53ef6d044216f82 Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Fri, 10 Dec 2021 15:45:50 +0000 Subject: [PATCH 12/31] Make separate allow_duplicates test --- .../tests/frame/methods/test_reset_index.py | 52 ++++++++++++------- 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 02cb0e2ab493b..9a31ad9f367ee 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -25,6 +25,12 @@ import pandas._testing as tm +@pytest.fixture() +def multiindex_df(): + levels = [["A", ""], ["B", "b"]] + return DataFrame([[0, 2], [1, 3]], columns=MultiIndex.from_tuples(levels)) + + class TestResetIndex: def test_set_reset(self): @@ -368,41 +374,31 @@ def test_reset_index_range(self): ) tm.assert_frame_equal(result, expected) - def test_reset_index_multiindex_columns(self): - levels = [["A", ""], ["B", "b"]] - df = DataFrame([[0, 2], [1, 3]], columns=MultiIndex.from_tuples(levels)) - result = df[["B"]].rename_axis("A").reset_index() - tm.assert_frame_equal(result, df) + def test_reset_index_multiindex_columns(self, multiindex_df): + result = multiindex_df[["B"]].rename_axis("A").reset_index() + tm.assert_frame_equal(result, multiindex_df) # GH#16120: already existing column msg = r"cannot insert \('A', ''\), already exists" with pytest.raises(ValueError, match=msg): - df.rename_axis("A").reset_index() - - # GH#44755 reset_index with duplicate column labels - result = df.rename_axis("A").reset_index(allow_duplicates=True) - levels = [["A", ""], ["A", ""], ["B", "b"]] - expected = DataFrame( - [[0, 0, 2], [1, 1, 3]], columns=MultiIndex.from_tuples(levels) - ) - tm.assert_frame_equal(result, expected) + multiindex_df.rename_axis("A").reset_index() # GH#16164: multiindex (tuple) full key - result = df.set_index([("A", "")]).reset_index() - tm.assert_frame_equal(result, df) + result = multiindex_df.set_index([("A", "")]).reset_index() + tm.assert_frame_equal(result, multiindex_df) # with additional (unnamed) index level idx_col = DataFrame( [[0], [1]], columns=MultiIndex.from_tuples([("level_0", "")]) ) - expected = pd.concat([idx_col, df[[("B", "b"), ("A", "")]]], axis=1) - result = df.set_index([("B", "b")], append=True).reset_index() + expected = pd.concat([idx_col, multiindex_df[[("B", "b"), ("A", "")]]], axis=1) + result = multiindex_df.set_index([("B", "b")], append=True).reset_index() tm.assert_frame_equal(result, expected) # with index name which is a too long tuple... msg = "Item must have length equal to number of levels." with pytest.raises(ValueError, match=msg): - df.rename_axis([("C", "c", "i")]).reset_index() + multiindex_df.rename_axis([("C", "c", "i")]).reset_index() # or too short... levels = [["A", "a", ""], ["B", "b", "i"]] @@ -428,6 +424,24 @@ def test_reset_index_multiindex_columns(self): result = df2.rename_axis([("c", "ii")]).reset_index(col_level=1, col_fill="C") tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("allow_duplicates", ["absent", False, None, True]) + def test_reset_index_duplicate_columns(self, multiindex_df, allow_duplicates): + # GH#44755 reset_index with duplicate column labels + if allow_duplicates is False or allow_duplicates == "absent": + msg = r"cannot insert \('A', ''\), already exists" + with pytest.raises(ValueError, match=msg): + if allow_duplicates == "absent": + multiindex_df.rename_axis("A").reset_index() + else: + multiindex_df.rename_axis("A").reset_index(allow_duplicates=False) + else: + result = multiindex_df.rename_axis("A").reset_index(allow_duplicates=True) + levels = [["A", ""], ["A", ""], ["B", "b"]] + expected = DataFrame( + [[0, 0, 2], [1, 1, 3]], columns=MultiIndex.from_tuples(levels) + ) + tm.assert_frame_equal(result, expected) + @pytest.mark.filterwarnings("ignore:Timestamp.freq is deprecated:FutureWarning") def test_reset_index_datetime(self, tz_naive_fixture): # GH#3950 From 9da58eda43ad509c8e38d5accf2af83e2e4365df Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Fri, 10 Dec 2021 18:30:43 +0000 Subject: [PATCH 13/31] Update test_reset_index.py --- .../tests/frame/methods/test_reset_index.py | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 9a31ad9f367ee..65d5ba493a98b 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -424,18 +424,27 @@ def test_reset_index_multiindex_columns(self, multiindex_df): result = df2.rename_axis([("c", "ii")]).reset_index(col_level=1, col_fill="C") tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("flag", [False, True]) @pytest.mark.parametrize("allow_duplicates", ["absent", False, None, True]) - def test_reset_index_duplicate_columns(self, multiindex_df, allow_duplicates): + def test_reset_index_duplicate_columns(self, multiindex_df, flag, allow_duplicates): # GH#44755 reset_index with duplicate column labels - if allow_duplicates is False or allow_duplicates == "absent": - msg = r"cannot insert \('A', ''\), already exists" + df = multiindex_df.rename_axis("A") + df = df.set_flags(allows_duplicate_labels=flag) + + if ( + flag and (allow_duplicates is False or allow_duplicates == "absent") + ) or not flag: + if allow_duplicates is True and flag is False: + msg = "Cannot specify 'allow_duplicates=True' when 'self.flags.allows_duplicate_labels' is False" + else: + msg = r"cannot insert \('A', ''\), already exists" with pytest.raises(ValueError, match=msg): if allow_duplicates == "absent": - multiindex_df.rename_axis("A").reset_index() + df.reset_index() else: - multiindex_df.rename_axis("A").reset_index(allow_duplicates=False) + df.reset_index(allow_duplicates=allow_duplicates) else: - result = multiindex_df.rename_axis("A").reset_index(allow_duplicates=True) + result = df.reset_index(allow_duplicates=allow_duplicates) levels = [["A", ""], ["A", ""], ["B", "b"]] expected = DataFrame( [[0, 0, 2], [1, 1, 3]], columns=MultiIndex.from_tuples(levels) From 9d988a0433e8ca5a4ea113da37e67b29a6df4c44 Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Fri, 10 Dec 2021 18:46:06 +0000 Subject: [PATCH 14/31] Update test_reset_index.py --- pandas/tests/frame/methods/test_reset_index.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 65d5ba493a98b..ba8b91ee55067 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -435,7 +435,8 @@ def test_reset_index_duplicate_columns(self, multiindex_df, flag, allow_duplicat flag and (allow_duplicates is False or allow_duplicates == "absent") ) or not flag: if allow_duplicates is True and flag is False: - msg = "Cannot specify 'allow_duplicates=True' when 'self.flags.allows_duplicate_labels' is False" + msg = "Cannot specify 'allow_duplicates=True' when " + "'self.flags.allows_duplicate_labels' is False" else: msg = r"cannot insert \('A', ''\), already exists" with pytest.raises(ValueError, match=msg): From 2b37ab314171370cec7166376cd88d9d3121e623 Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Sat, 11 Dec 2021 07:57:51 +0000 Subject: [PATCH 15/31] Trigger CI From e27df829eb62a372167d4e06d818326b1cf88705 Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Sun, 12 Dec 2021 12:26:14 +0000 Subject: [PATCH 16/31] Change from None to use_flag --- pandas/core/frame.py | 30 +++++++++++-------- .../tests/frame/methods/test_reset_index.py | 7 ++++- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9b0a0718d4627..947507650ec21 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5571,7 +5571,7 @@ def reset_index( inplace: Literal[False] = ..., col_level: Hashable = ..., col_fill: Hashable = ..., - allow_duplicates: bool | None = ..., + allow_duplicates: bool | str = ..., ) -> DataFrame: ... @@ -5583,7 +5583,7 @@ def reset_index( inplace: Literal[True], col_level: Hashable = ..., col_fill: Hashable = ..., - allow_duplicates: bool | None = ..., + allow_duplicates: bool | str = ..., ) -> None: ... @@ -5595,7 +5595,7 @@ def reset_index( inplace: Literal[True], col_level: Hashable = ..., col_fill: Hashable = ..., - allow_duplicates: bool | None = ..., + allow_duplicates: bool | str = ..., ) -> None: ... @@ -5607,7 +5607,7 @@ def reset_index( inplace: Literal[True], col_level: Hashable = ..., col_fill: Hashable = ..., - allow_duplicates: bool | None = ..., + allow_duplicates: bool | str = ..., ) -> None: ... @@ -5618,7 +5618,7 @@ def reset_index( inplace: Literal[True], col_level: Hashable = ..., col_fill: Hashable = ..., - allow_duplicates: bool | None = ..., + allow_duplicates: bool | str = ..., ) -> None: ... @@ -5630,7 +5630,7 @@ def reset_index( inplace: bool = ..., col_level: Hashable = ..., col_fill: Hashable = ..., - allow_duplicates: bool | None = ..., + allow_duplicates: bool | str = ..., ) -> DataFrame | None: ... @@ -5642,7 +5642,7 @@ def reset_index( inplace: bool = False, col_level: Hashable = 0, col_fill: Hashable = "", - allow_duplicates: bool | None = False, + allow_duplicates: bool | str = False, ) -> DataFrame | None: """ Reset the index, or a level of it. @@ -5668,9 +5668,9 @@ def reset_index( col_fill : object, default '' If the columns have multiple levels, determines how the other levels are named. If None then the index name is repeated. - allow_duplicates : bool or None, default False + allow_duplicates : bool or str, default False Allow duplicate column labels to be created. - If None take value from self.flags.allows_duplicate_labels. + If "use_flag" take value from self.flags.allows_duplicate_labels. Returns ------- @@ -5795,6 +5795,9 @@ class max type else: new_obj = self.copy() + if allow_duplicates not in [False, True, "use_flag"]: + raise ValueError(f"Illegal allow_duplicates value: {allow_duplicates}") + new_index = default_index(len(new_obj)) if level is not None: if not isinstance(level, (tuple, list)): @@ -5817,6 +5820,10 @@ class max type to_insert = ((self.index, None),) multi_col = isinstance(self.columns, MultiIndex) + + if allow_duplicates == "use_flag": + allow_duplicates = self.flags.allows_duplicate_labels + for i, (lev, lab) in reversed(list(enumerate(to_insert))): if level is not None and i not in level: continue @@ -5848,14 +5855,11 @@ class max type level_values, lab, allow_fill=True, fill_value=lev._na_value ) - if allow_duplicates is None: - allow_duplicates = self.flags.allows_duplicate_labels - new_obj.insert( 0, name, level_values, - allow_duplicates=allow_duplicates, + allow_duplicates=cast(bool, allow_duplicates), ) new_obj.index = new_index diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index ba8b91ee55067..8e822051e1c98 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -425,7 +425,7 @@ def test_reset_index_multiindex_columns(self, multiindex_df): tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("flag", [False, True]) - @pytest.mark.parametrize("allow_duplicates", ["absent", False, None, True]) + @pytest.mark.parametrize("allow_duplicates", ["absent", False, "use_flag", True]) def test_reset_index_duplicate_columns(self, multiindex_df, flag, allow_duplicates): # GH#44755 reset_index with duplicate column labels df = multiindex_df.rename_axis("A") @@ -452,6 +452,11 @@ def test_reset_index_duplicate_columns(self, multiindex_df, flag, allow_duplicat ) tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("allow_duplicates", [None, "bad value"]) + def test_reset_index_allow_duplicates_check(self, multiindex_df, allow_duplicates): + with pytest.raises(ValueError, match="Illegal allow_duplicates value"): + multiindex_df.reset_index(allow_duplicates=allow_duplicates) + @pytest.mark.filterwarnings("ignore:Timestamp.freq is deprecated:FutureWarning") def test_reset_index_datetime(self, tz_naive_fixture): # GH#3950 From 749838ca48e8243d6c6a31091fa6ba695417581e Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Sun, 12 Dec 2021 19:16:55 +0000 Subject: [PATCH 17/31] Get rid of use_flag altogether --- pandas/core/frame.py | 26 ++++++++----------- .../tests/frame/methods/test_reset_index.py | 10 +++---- 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 947507650ec21..6e73cfd5d9e0b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5571,7 +5571,7 @@ def reset_index( inplace: Literal[False] = ..., col_level: Hashable = ..., col_fill: Hashable = ..., - allow_duplicates: bool | str = ..., + allow_duplicates: bool = ..., ) -> DataFrame: ... @@ -5583,7 +5583,7 @@ def reset_index( inplace: Literal[True], col_level: Hashable = ..., col_fill: Hashable = ..., - allow_duplicates: bool | str = ..., + allow_duplicates: bool = ..., ) -> None: ... @@ -5595,7 +5595,7 @@ def reset_index( inplace: Literal[True], col_level: Hashable = ..., col_fill: Hashable = ..., - allow_duplicates: bool | str = ..., + allow_duplicates: bool = ..., ) -> None: ... @@ -5607,7 +5607,7 @@ def reset_index( inplace: Literal[True], col_level: Hashable = ..., col_fill: Hashable = ..., - allow_duplicates: bool | str = ..., + allow_duplicates: bool = ..., ) -> None: ... @@ -5618,7 +5618,7 @@ def reset_index( inplace: Literal[True], col_level: Hashable = ..., col_fill: Hashable = ..., - allow_duplicates: bool | str = ..., + allow_duplicates: bool = ..., ) -> None: ... @@ -5630,7 +5630,7 @@ def reset_index( inplace: bool = ..., col_level: Hashable = ..., col_fill: Hashable = ..., - allow_duplicates: bool | str = ..., + allow_duplicates: bool = ..., ) -> DataFrame | None: ... @@ -5642,7 +5642,7 @@ def reset_index( inplace: bool = False, col_level: Hashable = 0, col_fill: Hashable = "", - allow_duplicates: bool | str = False, + allow_duplicates: bool = False, ) -> DataFrame | None: """ Reset the index, or a level of it. @@ -5668,9 +5668,9 @@ def reset_index( col_fill : object, default '' If the columns have multiple levels, determines how the other levels are named. If None then the index name is repeated. - allow_duplicates : bool or str, default False + allow_duplicates : bool, default False Allow duplicate column labels to be created. - If "use_flag" take value from self.flags.allows_duplicate_labels. + Does not work if self.flags.allows_duplicate_labels is False. Returns ------- @@ -5795,8 +5795,7 @@ class max type else: new_obj = self.copy() - if allow_duplicates not in [False, True, "use_flag"]: - raise ValueError(f"Illegal allow_duplicates value: {allow_duplicates}") + allow_duplicates = validate_bool_kwarg(allow_duplicates, "allow_duplicates") new_index = default_index(len(new_obj)) if level is not None: @@ -5821,9 +5820,6 @@ class max type multi_col = isinstance(self.columns, MultiIndex) - if allow_duplicates == "use_flag": - allow_duplicates = self.flags.allows_duplicate_labels - for i, (lev, lab) in reversed(list(enumerate(to_insert))): if level is not None and i not in level: continue @@ -5859,7 +5855,7 @@ class max type 0, name, level_values, - allow_duplicates=cast(bool, allow_duplicates), + allow_duplicates=allow_duplicates, ) new_obj.index = new_index diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 8e822051e1c98..8a700cc4d4945 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -425,15 +425,13 @@ def test_reset_index_multiindex_columns(self, multiindex_df): tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("flag", [False, True]) - @pytest.mark.parametrize("allow_duplicates", ["absent", False, "use_flag", True]) + @pytest.mark.parametrize("allow_duplicates", ["absent", None, False, True]) def test_reset_index_duplicate_columns(self, multiindex_df, flag, allow_duplicates): # GH#44755 reset_index with duplicate column labels df = multiindex_df.rename_axis("A") df = df.set_flags(allows_duplicate_labels=flag) - if ( - flag and (allow_duplicates is False or allow_duplicates == "absent") - ) or not flag: + if (flag and allow_duplicates is not True) or not flag: if allow_duplicates is True and flag is False: msg = "Cannot specify 'allow_duplicates=True' when " "'self.flags.allows_duplicate_labels' is False" @@ -452,9 +450,9 @@ def test_reset_index_duplicate_columns(self, multiindex_df, flag, allow_duplicat ) tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize("allow_duplicates", [None, "bad value"]) + @pytest.mark.parametrize("allow_duplicates", ["bad value"]) def test_reset_index_allow_duplicates_check(self, multiindex_df, allow_duplicates): - with pytest.raises(ValueError, match="Illegal allow_duplicates value"): + with pytest.raises(ValueError, match="expected type bool"): multiindex_df.reset_index(allow_duplicates=allow_duplicates) @pytest.mark.filterwarnings("ignore:Timestamp.freq is deprecated:FutureWarning") From c7cf48320b7f43b89c10eaa80c6cd4f6ebb1943b Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Sun, 12 Dec 2021 19:22:10 +0000 Subject: [PATCH 18/31] Update frame.py --- pandas/core/frame.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6e73cfd5d9e0b..68a1efab5d2da 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5819,7 +5819,6 @@ class max type to_insert = ((self.index, None),) multi_col = isinstance(self.columns, MultiIndex) - for i, (lev, lab) in reversed(list(enumerate(to_insert))): if level is not None and i not in level: continue From 7711474fd9f30d2fb8011ce715049368e4cf391d Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Tue, 14 Dec 2021 11:01:19 +0000 Subject: [PATCH 19/31] Added version and improved tests --- pandas/core/frame.py | 8 ++++- pandas/core/series.py | 2 ++ .../tests/frame/methods/test_reset_index.py | 36 +++++++++++-------- 3 files changed, 31 insertions(+), 15 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 68a1efab5d2da..cc2ab6791db39 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5670,13 +5670,19 @@ def reset_index( levels are named. If None then the index name is repeated. allow_duplicates : bool, default False Allow duplicate column labels to be created. - Does not work if self.flags.allows_duplicate_labels is False. + + .. versionadded:: 1.4.0 Returns ------- DataFrame or None DataFrame with the new index or None if ``inplace=True``. + Raises + ------ + ValueError + If allow_duplicates and not self.flags.allows_duplicate_labels. + See Also -------- DataFrame.set_index : Opposite of reset_index. diff --git a/pandas/core/series.py b/pandas/core/series.py index daff4c8d7426e..c131077229ec2 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1382,6 +1382,8 @@ def reset_index( allow_duplicates : bool, default False Allow duplicate column labels to be created. + .. versionadded:: 1.4.0 + Returns ------- Series or DataFrame or None diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 8a700cc4d4945..4bfcdc91380ff 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -425,30 +425,38 @@ def test_reset_index_multiindex_columns(self, multiindex_df): tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("flag", [False, True]) - @pytest.mark.parametrize("allow_duplicates", ["absent", None, False, True]) - def test_reset_index_duplicate_columns(self, multiindex_df, flag, allow_duplicates): + @pytest.mark.parametrize("allow_duplicates", [False, True]) + def test_reset_index_duplicate_columns_allow( + self, multiindex_df, flag, allow_duplicates + ): # GH#44755 reset_index with duplicate column labels df = multiindex_df.rename_axis("A") df = df.set_flags(allows_duplicate_labels=flag) - if (flag and allow_duplicates is not True) or not flag: - if allow_duplicates is True and flag is False: - msg = "Cannot specify 'allow_duplicates=True' when " - "'self.flags.allows_duplicate_labels' is False" - else: - msg = r"cannot insert \('A', ''\), already exists" - with pytest.raises(ValueError, match=msg): - if allow_duplicates == "absent": - df.reset_index() - else: - df.reset_index(allow_duplicates=allow_duplicates) - else: + if flag and allow_duplicates: result = df.reset_index(allow_duplicates=allow_duplicates) levels = [["A", ""], ["A", ""], ["B", "b"]] expected = DataFrame( [[0, 0, 2], [1, 1, 3]], columns=MultiIndex.from_tuples(levels) ) tm.assert_frame_equal(result, expected) + else: + if not flag and allow_duplicates: + msg = "Cannot specify 'allow_duplicates=True' when " + "'self.flags.allows_duplicate_labels' is False" + else: + msg = r"cannot insert \('A', ''\), already exists" + with pytest.raises(ValueError, match=msg): + df.reset_index(allow_duplicates=allow_duplicates) + + @pytest.mark.parametrize("flag", [False, True]) + def test_reset_index_duplicate_columns_default(self, multiindex_df, flag): + df = multiindex_df.rename_axis("A") + df = df.set_flags(allows_duplicate_labels=flag) + + msg = r"cannot insert \('A', ''\), already exists" + with pytest.raises(ValueError, match=msg): + df.reset_index() @pytest.mark.parametrize("allow_duplicates", ["bad value"]) def test_reset_index_allow_duplicates_check(self, multiindex_df, allow_duplicates): From 0ecf6fdeaabc4d77b2b1a6e6dc36b98db47e04e8 Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Tue, 14 Dec 2021 11:16:22 +0000 Subject: [PATCH 20/31] Trigger CI From 250999ad38ba20bfa8d972a0dba13676b0a3795a Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Tue, 14 Dec 2021 23:33:27 +0000 Subject: [PATCH 21/31] Trigger CI From 14a07e142d8347e478f1858014e89f77aa4bfc74 Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Wed, 15 Dec 2021 10:40:40 +0000 Subject: [PATCH 22/31] Trigger CI From 13372834920f0d5aba65d4ca15f3d1d40126c941 Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Mon, 20 Dec 2021 10:50:27 +0000 Subject: [PATCH 23/31] Trigger CI From bb54268e46cb914782337c5df8e06938b0029fd3 Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Fri, 24 Dec 2021 10:43:49 +0000 Subject: [PATCH 24/31] allow_duplicates lib.no_default --- pandas/core/frame.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4ebada6a46982..c62d1981a723e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4378,7 +4378,7 @@ def insert( loc: int, column: Hashable, value: Scalar | AnyArrayLike, - allow_duplicates: bool = False, + allow_duplicates: bool | lib.NoDefault = lib.no_default, ) -> None: """ Insert column into DataFrame at specified location. @@ -4393,7 +4393,7 @@ def insert( column : str, number, or hashable object Label of the inserted column. value : Scalar, Series, or array-like - allow_duplicates : bool, optional default False + allow_duplicates : bool, optional, default lib.no_default See Also -------- @@ -4425,6 +4425,8 @@ def insert( 0 NaN 100 1 99 3 1 5.0 100 2 99 4 """ + if allow_duplicates is lib.no_default: + allow_duplicates = False if allow_duplicates and not self.flags.allows_duplicate_labels: raise ValueError( "Cannot specify 'allow_duplicates=True' when " @@ -5638,7 +5640,7 @@ def reset_index( inplace: bool = False, col_level: Hashable = 0, col_fill: Hashable = "", - allow_duplicates: bool = False, + allow_duplicates: bool | lib.NoDefault = lib.no_default, ) -> DataFrame | None: """ Reset the index, or a level of it. @@ -5664,7 +5666,7 @@ def reset_index( col_fill : object, default '' If the columns have multiple levels, determines how the other levels are named. If None then the index name is repeated. - allow_duplicates : bool, default False + allow_duplicates : bool, optional, default lib.no_default Allow duplicate column labels to be created. .. versionadded:: 1.4.0 @@ -5796,8 +5798,8 @@ class max type new_obj = self else: new_obj = self.copy() - - allow_duplicates = validate_bool_kwarg(allow_duplicates, "allow_duplicates") + if allow_duplicates is not lib.no_default: + allow_duplicates = validate_bool_kwarg(allow_duplicates, "allow_duplicates") new_index = default_index(len(new_obj)) if level is not None: From d21846b52371e9d7a677602a34357f1f04897a57 Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Fri, 24 Dec 2021 12:20:03 +0000 Subject: [PATCH 25/31] Trigger CI From 540b3079b71c03700203bbe383dd73afa2a6f6c3 Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Mon, 27 Dec 2021 09:31:40 +0000 Subject: [PATCH 26/31] Correct overloads --- pandas/core/frame.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7759708281991..1c304c422cdb1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5567,7 +5567,7 @@ def reset_index( inplace: Literal[False] = ..., col_level: Hashable = ..., col_fill: Hashable = ..., - allow_duplicates: bool = ..., + allow_duplicates: bool | lib.NoDefault = lib.no_default = ..., ) -> DataFrame: ... @@ -5579,7 +5579,7 @@ def reset_index( inplace: Literal[True], col_level: Hashable = ..., col_fill: Hashable = ..., - allow_duplicates: bool = ..., + allow_duplicates: bool | lib.NoDefault = lib.no_default = ..., ) -> None: ... @@ -5591,7 +5591,7 @@ def reset_index( inplace: Literal[True], col_level: Hashable = ..., col_fill: Hashable = ..., - allow_duplicates: bool = ..., + allow_duplicates: bool | lib.NoDefault = lib.no_default = ..., ) -> None: ... @@ -5603,7 +5603,7 @@ def reset_index( inplace: Literal[True], col_level: Hashable = ..., col_fill: Hashable = ..., - allow_duplicates: bool = ..., + allow_duplicates: bool | lib.NoDefault = lib.no_default = ..., ) -> None: ... @@ -5614,7 +5614,7 @@ def reset_index( inplace: Literal[True], col_level: Hashable = ..., col_fill: Hashable = ..., - allow_duplicates: bool = ..., + allow_duplicates: bool | lib.NoDefault = lib.no_default = ..., ) -> None: ... @@ -5626,7 +5626,7 @@ def reset_index( inplace: bool = ..., col_level: Hashable = ..., col_fill: Hashable = ..., - allow_duplicates: bool = ..., + allow_duplicates: bool | lib.NoDefault = lib.no_default = ..., ) -> DataFrame | None: ... From a59644a2f66152452ea8c2ca9169b4eb621d4afd Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Mon, 27 Dec 2021 09:35:49 +0000 Subject: [PATCH 27/31] Update frame.py --- pandas/core/frame.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1c304c422cdb1..c3a5fc8e1e54b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5567,7 +5567,7 @@ def reset_index( inplace: Literal[False] = ..., col_level: Hashable = ..., col_fill: Hashable = ..., - allow_duplicates: bool | lib.NoDefault = lib.no_default = ..., + allow_duplicates: bool | lib.no_default = ..., ) -> DataFrame: ... @@ -5579,7 +5579,7 @@ def reset_index( inplace: Literal[True], col_level: Hashable = ..., col_fill: Hashable = ..., - allow_duplicates: bool | lib.NoDefault = lib.no_default = ..., + allow_duplicates: bool | lib.no_default = ..., ) -> None: ... @@ -5591,7 +5591,7 @@ def reset_index( inplace: Literal[True], col_level: Hashable = ..., col_fill: Hashable = ..., - allow_duplicates: bool | lib.NoDefault = lib.no_default = ..., + allow_duplicates: bool | lib.no_default = ..., ) -> None: ... @@ -5603,7 +5603,7 @@ def reset_index( inplace: Literal[True], col_level: Hashable = ..., col_fill: Hashable = ..., - allow_duplicates: bool | lib.NoDefault = lib.no_default = ..., + allow_duplicates: bool | lib.no_default = ..., ) -> None: ... @@ -5614,7 +5614,7 @@ def reset_index( inplace: Literal[True], col_level: Hashable = ..., col_fill: Hashable = ..., - allow_duplicates: bool | lib.NoDefault = lib.no_default = ..., + allow_duplicates: bool | lib.no_default = ..., ) -> None: ... @@ -5626,7 +5626,7 @@ def reset_index( inplace: bool = ..., col_level: Hashable = ..., col_fill: Hashable = ..., - allow_duplicates: bool | lib.NoDefault = lib.no_default = ..., + allow_duplicates: bool | lib.no_default = ..., ) -> DataFrame | None: ... From 67a5956670f5ad752620ca4639387ccaf957ecad Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Mon, 27 Dec 2021 22:05:07 +0000 Subject: [PATCH 28/31] Update frame.py --- pandas/core/frame.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c3a5fc8e1e54b..ca51006d4e5df 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5567,7 +5567,7 @@ def reset_index( inplace: Literal[False] = ..., col_level: Hashable = ..., col_fill: Hashable = ..., - allow_duplicates: bool | lib.no_default = ..., + allow_duplicates: bool | lib.NoDefault = ..., ) -> DataFrame: ... @@ -5579,7 +5579,7 @@ def reset_index( inplace: Literal[True], col_level: Hashable = ..., col_fill: Hashable = ..., - allow_duplicates: bool | lib.no_default = ..., + allow_duplicates: bool | lib.NoDefault = ..., ) -> None: ... @@ -5591,7 +5591,7 @@ def reset_index( inplace: Literal[True], col_level: Hashable = ..., col_fill: Hashable = ..., - allow_duplicates: bool | lib.no_default = ..., + allow_duplicates: bool | lib.NoDefault = ..., ) -> None: ... @@ -5603,7 +5603,7 @@ def reset_index( inplace: Literal[True], col_level: Hashable = ..., col_fill: Hashable = ..., - allow_duplicates: bool | lib.no_default = ..., + allow_duplicates: bool | lib.NoDefault = ..., ) -> None: ... @@ -5614,7 +5614,7 @@ def reset_index( inplace: Literal[True], col_level: Hashable = ..., col_fill: Hashable = ..., - allow_duplicates: bool | lib.no_default = ..., + allow_duplicates: bool | lib.NoDefault = ..., ) -> None: ... @@ -5626,7 +5626,7 @@ def reset_index( inplace: bool = ..., col_level: Hashable = ..., col_fill: Hashable = ..., - allow_duplicates: bool | lib.no_default = ..., + allow_duplicates: bool | lib.NoDefault = ..., ) -> DataFrame | None: ... From 04b3a381a7535f60622077185fbcf7fc07c3c22f Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Mon, 17 Jan 2022 15:53:47 +0000 Subject: [PATCH 29/31] Removed docstring Raises --- pandas/core/frame.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e4d5feaa434c7..962a105c9e290 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5696,11 +5696,6 @@ def reset_index( DataFrame or None DataFrame with the new index or None if ``inplace=True``. - Raises - ------ - ValueError - If allow_duplicates and not self.flags.allows_duplicate_labels. - See Also -------- DataFrame.set_index : Opposite of reset_index. From 2f941709b770332ea1067833cbbf68fca004a5f1 Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Mon, 17 Jan 2022 16:04:13 +0000 Subject: [PATCH 30/31] Version to 1.5 --- doc/source/whatsnew/v1.4.0.rst | 1 - doc/source/whatsnew/v1.5.0.rst | 1 + pandas/core/frame.py | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 47692fb9a7aff..47427620c8ece 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -220,7 +220,6 @@ Other enhancements - :meth:`Series.info` has been added, for compatibility with :meth:`DataFrame.info` (:issue:`5167`) - Implemented :meth:`IntervalArray.min`, :meth:`IntervalArray.max`, as a result of which ``min`` and ``max`` now work for :class:`IntervalIndex`, :class:`Series` and :class:`DataFrame` with ``IntervalDtype`` (:issue:`44746`) - :meth:`UInt64Index.map` now retains ``dtype`` where possible (:issue:`44609`) -- :meth:`Series.reset_index` and :meth:`DataFrame.reset_index` now support the argument ``allow_duplicates`` (:issue:`44410`) - :meth:`read_json` can now parse unsigned long long integers (:issue:`26068`) - :meth:`DataFrame.take` now raises a ``TypeError`` when passed a scalar for the indexer (:issue:`42875`) - :meth:`is_list_like` now identifies duck-arrays as list-like unless ``.ndim == 0`` (:issue:`35131`) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index b259f182a1197..7a7736225f02d 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -34,6 +34,7 @@ Other enhancements - :class:`StringArray` now accepts array-likes containing nan-likes (``None``, ``np.nan``) for the ``values`` parameter in its constructor in addition to strings and :attr:`pandas.NA`. (:issue:`40839`) - Improved the rendering of ``categories`` in :class:`CategoricalIndex` (:issue:`45218`) - :meth:`to_numeric` now preserves float64 arrays when downcasting would generate values not representable in float32 (:issue:`43693`) +- :meth:`Series.reset_index` and :meth:`DataFrame.reset_index` now support the argument ``allow_duplicates`` (:issue:`44410`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 962a105c9e290..56d8048281939 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5689,7 +5689,7 @@ def reset_index( allow_duplicates : bool, optional, default lib.no_default Allow duplicate column labels to be created. - .. versionadded:: 1.4.0 + .. versionadded:: 1.5.0 Returns ------- From 14edbdb7cdb12593bc1c16334015e5d477264212 Mon Sep 17 00:00:00 2001 From: John Zangwill Date: Mon, 24 Jan 2022 09:56:50 +0000 Subject: [PATCH 31/31] Update series.py --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 65142778a0c31..399d9abcd8a8c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1392,7 +1392,7 @@ def reset_index( allow_duplicates : bool, default False Allow duplicate column labels to be created. - .. versionadded:: 1.4.0 + .. versionadded:: 1.5.0 Returns -------