From 46821e823a6041c96c63f0c2b37ad79cbebc0351 Mon Sep 17 00:00:00 2001 From: Jake Stevens-Haas Date: Wed, 11 Nov 2020 13:14:58 -0800 Subject: [PATCH 01/11] BUG: GH37635 Previously, the variable "cols" was created solely to concatenate the lists "index" and "columns" when the "values" parameter was left as None. It did so in a way that it modified the variable passed as "index" using list.extend(), affecting the caller's namespace. This change simply uses an anonymous list in place of the variable "cols" --- pandas/core/reshape/pivot.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 8fae01cb30d3d..35243cddf0d1e 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -447,13 +447,12 @@ def pivot( if values is None: if index is not None: - cols = com.convert_to_list_like(index) + index = com.convert_to_list_like(index) else: - cols = [] - cols.extend(columns) + index = [] append = index is None - indexed = data.set_index(cols, append=append) + indexed = data.set_index(index + columns, append=append) else: if index is None: index = [Series(data.index, name=data.index.name)] From 9a7a60ab3d7e9ab6dbcfdd8a7c4230b6794e6b4d Mon Sep 17 00:00:00 2001 From: Jake Stevens-Haas Date: Wed, 11 Nov 2020 13:43:51 -0800 Subject: [PATCH 02/11] DOC: Documentation for GH37635 This is a amended commit to solve pre-commit test: "rst ``code`` is two backticks" --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index f751a91cecf19..44472157f16c9 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -553,6 +553,7 @@ Reshaping - Bug in :meth:`DataFrame.agg` with ``func={'name':}`` incorrectly raising ``TypeError`` when ``DataFrame.columns==['Name']`` (:issue:`36212`) - Bug in :meth:`Series.transform` would give incorrect results or raise when the argument ``func`` was dictionary (:issue:`35811`) - Bug in :meth:`DataFrame.pivot` did not preserve :class:`MultiIndex` level names for columns when rows and columns both multiindexed (:issue:`36360`) +- Bug in :meth:``DataFrame.pivot`` modified caller's ``index`` parameter when ``columns`` was passed but ``values`` was not (:issue:`37635`) - Bug in :func:`join` returned a non deterministic level-order for the resulting :class:`MultiIndex` (:issue:`36910`) - Bug in :meth:`DataFrame.combine_first()` caused wrong alignment with dtype ``string`` and one level of ``MultiIndex`` containing only ``NA`` (:issue:`37591`) - Fixed regression in :func:`merge` on merging DatetimeIndex with empty DataFrame (:issue:`36895`) From edebfaa1fe6162cd6e16b61681a23f422f30872a Mon Sep 17 00:00:00 2001 From: Jake Stevens-Haas Date: Wed, 11 Nov 2020 19:43:56 -0800 Subject: [PATCH 03/11] TST: Added test for GH37635 --- pandas/tests/reshape/test_pivot.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index adab64577ee7a..d3fbcc6bca2ad 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2153,3 +2153,20 @@ def test_pivot_index_none(self): expected.columns.name = "columns" tm.assert_frame_equal(result, expected) + + def test_pivot_index_list_values_none(self): + # Tests when index is a list and values None. See GH#37635 + data = { + "index": ["A", "B", "C", "C", "B", "A"], + "columns": ["One", "One", "One", "Two", "Two", "Two"], + "values": [1.0, 2.0, 3.0, 3.0, 2.0, 1.0], + } + index = ["index"] + columns = ["columns"] + frame = DataFrame(data) + frame.pivot(index=index, columns=columns, values=None) + + expected_ser = Series(["index"]) + result_ser = Series(index) + + tm.assert_series_equal(result_ser, expected_ser) From 6fbebe4b4641522ec7860b549a3cdce57d1c2847 Mon Sep 17 00:00:00 2001 From: Jake Stevens-Haas Date: Wed, 11 Nov 2020 20:48:07 -0800 Subject: [PATCH 04/11] TST: correct GH37635 test for code review * Use test case from the issue * Use standard assert instead of making series and using pandas._testing --- pandas/tests/reshape/test_pivot.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index d3fbcc6bca2ad..9cd0bd8b4df25 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2156,17 +2156,15 @@ def test_pivot_index_none(self): def test_pivot_index_list_values_none(self): # Tests when index is a list and values None. See GH#37635 - data = { - "index": ["A", "B", "C", "C", "B", "A"], - "columns": ["One", "One", "One", "Two", "Two", "Two"], - "values": [1.0, 2.0, 3.0, 3.0, 2.0, 1.0], - } - index = ["index"] - columns = ["columns"] - frame = DataFrame(data) + frame = pd.DataFrame({ + "lev1": [1, 1, 1, 2, 2, 2], + "lev2": [1, 1, 2, 1, 1, 2], + "lev3": [1, 2, 1, 2, 1, 2], + "lev4": [1, 2, 3, 4, 5, 6], + "values": [0, 1, 2, 3, 4, 5] + }) + index = ["lev1", "lev2"] + columns = ["lev3"] frame.pivot(index=index, columns=columns, values=None) - expected_ser = Series(["index"]) - result_ser = Series(index) - - tm.assert_series_equal(result_ser, expected_ser) + assert index == ["lev1", "lev2"] \ No newline at end of file From d673a9d3f9e0208cfa92addcfc804b94e1e71e60 Mon Sep 17 00:00:00 2001 From: Jake Stevens-Haas Date: Wed, 11 Nov 2020 20:52:45 -0800 Subject: [PATCH 05/11] DOC: Corrected whats_new for GH37635 Now I think I've got the hang of when to use one backtick versus two --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 44472157f16c9..3da46683cb3ae 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -553,7 +553,7 @@ Reshaping - Bug in :meth:`DataFrame.agg` with ``func={'name':}`` incorrectly raising ``TypeError`` when ``DataFrame.columns==['Name']`` (:issue:`36212`) - Bug in :meth:`Series.transform` would give incorrect results or raise when the argument ``func`` was dictionary (:issue:`35811`) - Bug in :meth:`DataFrame.pivot` did not preserve :class:`MultiIndex` level names for columns when rows and columns both multiindexed (:issue:`36360`) -- Bug in :meth:``DataFrame.pivot`` modified caller's ``index`` parameter when ``columns`` was passed but ``values`` was not (:issue:`37635`) +- Bug in :meth:`DataFrame.pivot` modified caller's ``index`` parameter when ``columns`` was passed but ``values`` was not (:issue:`37635`) - Bug in :func:`join` returned a non deterministic level-order for the resulting :class:`MultiIndex` (:issue:`36910`) - Bug in :meth:`DataFrame.combine_first()` caused wrong alignment with dtype ``string`` and one level of ``MultiIndex`` containing only ``NA`` (:issue:`37591`) - Fixed regression in :func:`merge` on merging DatetimeIndex with empty DataFrame (:issue:`36895`) From 5833b33527d535534b1eb7e5137b19436037b381 Mon Sep 17 00:00:00 2001 From: Jake Stevens-Haas Date: Wed, 11 Nov 2020 21:10:08 -0800 Subject: [PATCH 06/11] CLN: newline at end of file --- pandas/tests/reshape/test_pivot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 9cd0bd8b4df25..51c700196d5fd 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2167,4 +2167,4 @@ def test_pivot_index_list_values_none(self): columns = ["lev3"] frame.pivot(index=index, columns=columns, values=None) - assert index == ["lev1", "lev2"] \ No newline at end of file + assert index == ["lev1", "lev2"] From 51acab098626f1b70a7c797fc25c32ee894b41f0 Mon Sep 17 00:00:00 2001 From: Jake Stevens-Haas Date: Wed, 11 Nov 2020 21:41:00 -0800 Subject: [PATCH 07/11] BUG: Fixed regression in TestPivot.test_pivot_index_none * My previous change caused the append = index is None to never set append = True * black pandas fixed my KR braces in test_pivot.py * Also black pandas fixed my KR braces on test_pivot.py --- pandas/core/reshape/pivot.py | 6 +++--- pandas/tests/reshape/test_pivot.py | 16 +++++++++------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 35243cddf0d1e..c1198cdfcda81 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -447,12 +447,12 @@ def pivot( if values is None: if index is not None: - index = com.convert_to_list_like(index) + cols = com.convert_to_list_like(index) else: - index = [] + cols = [] append = index is None - indexed = data.set_index(index + columns, append=append) + indexed = data.set_index(cols + columns, append=append) else: if index is None: index = [Series(data.index, name=data.index.name)] diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 51c700196d5fd..d61a092c3fd8d 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2156,13 +2156,15 @@ def test_pivot_index_none(self): def test_pivot_index_list_values_none(self): # Tests when index is a list and values None. See GH#37635 - frame = pd.DataFrame({ - "lev1": [1, 1, 1, 2, 2, 2], - "lev2": [1, 1, 2, 1, 1, 2], - "lev3": [1, 2, 1, 2, 1, 2], - "lev4": [1, 2, 3, 4, 5, 6], - "values": [0, 1, 2, 3, 4, 5] - }) + frame = pd.DataFrame( + { + "lev1": [1, 1, 1, 2, 2, 2], + "lev2": [1, 1, 2, 1, 1, 2], + "lev3": [1, 2, 1, 2, 1, 2], + "lev4": [1, 2, 3, 4, 5, 6], + "values": [0, 1, 2, 3, 4, 5], + } + ) index = ["lev1", "lev2"] columns = ["lev3"] frame.pivot(index=index, columns=columns, values=None) From 752ca204c955b6290b305f8392727aad690eb17d Mon Sep 17 00:00:00 2001 From: Jake Stevens-Haas Date: Wed, 11 Nov 2020 22:11:54 -0800 Subject: [PATCH 08/11] TST: Code review on GH37635 * Naming the test * Commenting the test * `frame` is now named `df` * Added check that the frame actually pivots correctly * Also: solved inconsistent pandas namespace in tests (precommit CI) --- pandas/tests/reshape/test_pivot.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index d61a092c3fd8d..93d474e54a497 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2154,9 +2154,9 @@ def test_pivot_index_none(self): expected.columns.name = "columns" tm.assert_frame_equal(result, expected) - def test_pivot_index_list_values_none(self): - # Tests when index is a list and values None. See GH#37635 - frame = pd.DataFrame( + def test_pivot_index_list_values_none_modifies_args(self): + # GH37635 + df = DataFrame( { "lev1": [1, 1, 1, 2, 2, 2], "lev2": [1, 1, 2, 1, 1, 2], @@ -2167,6 +2167,23 @@ def test_pivot_index_list_values_none(self): ) index = ["lev1", "lev2"] columns = ["lev3"] - frame.pivot(index=index, columns=columns, values=None) + pivoted = df.pivot(index=index, columns=columns, values=None) + + expected = DataFrame( + { + "a": [1.0, 3.0, 5.0, float("nan")], + "b": [2.0, float("nan"), 4.0, 6.0], + "c": [0.0, 2.0, 4.0, float("nan")], + "d": [1.0, float("nan"), 3.0, 5.0], + } + ) + expected.index = MultiIndex.from_arrays( + [(1, 1, 2, 2), (1, 2, 1, 2)], names=["lev1", "lev2"] + ) + expected.columns = MultiIndex.from_arrays( + [("lev4", "lev4", "values", "values"), (1, 2, 1, 2)], names=[None, "lev3"] + ) + + tm.assert_frame_equal(pivoted, expected) assert index == ["lev1", "lev2"] From 4cff34b1391d59589d11df4012c7e0178027d3c0 Mon Sep 17 00:00:00 2001 From: Jake Stevens-Haas Date: Wed, 11 Nov 2020 23:13:59 -0800 Subject: [PATCH 09/11] TST: Code review on GH37635 --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/tests/reshape/test_pivot.py | 27 +++++++++++++++------------ 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 3da46683cb3ae..fba430ac1b5b6 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -553,7 +553,7 @@ Reshaping - Bug in :meth:`DataFrame.agg` with ``func={'name':}`` incorrectly raising ``TypeError`` when ``DataFrame.columns==['Name']`` (:issue:`36212`) - Bug in :meth:`Series.transform` would give incorrect results or raise when the argument ``func`` was dictionary (:issue:`35811`) - Bug in :meth:`DataFrame.pivot` did not preserve :class:`MultiIndex` level names for columns when rows and columns both multiindexed (:issue:`36360`) -- Bug in :meth:`DataFrame.pivot` modified caller's ``index`` parameter when ``columns`` was passed but ``values`` was not (:issue:`37635`) +- Bug in :meth:`DataFrame.pivot` modified ``index`` argument when ``columns`` was passed but ``values`` was not (:issue:`37635`) - Bug in :func:`join` returned a non deterministic level-order for the resulting :class:`MultiIndex` (:issue:`36910`) - Bug in :meth:`DataFrame.combine_first()` caused wrong alignment with dtype ``string`` and one level of ``MultiIndex`` containing only ``NA`` (:issue:`37591`) - Fixed regression in :func:`merge` on merging DatetimeIndex with empty DataFrame (:issue:`36895`) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 93d474e54a497..88c489dc4a67f 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2170,18 +2170,21 @@ def test_pivot_index_list_values_none_modifies_args(self): pivoted = df.pivot(index=index, columns=columns, values=None) expected = DataFrame( - { - "a": [1.0, 3.0, 5.0, float("nan")], - "b": [2.0, float("nan"), 4.0, 6.0], - "c": [0.0, 2.0, 4.0, float("nan")], - "d": [1.0, float("nan"), 3.0, 5.0], - } - ) - expected.index = MultiIndex.from_arrays( - [(1, 1, 2, 2), (1, 2, 1, 2)], names=["lev1", "lev2"] - ) - expected.columns = MultiIndex.from_arrays( - [("lev4", "lev4", "values", "values"), (1, 2, 1, 2)], names=[None, "lev3"] + np.array( + [ + [1.0, 2.0, 0.0, 1.0], + [3.0, np.nan, 2.0, np.nan], + [5.0, 4.0, 4.0, 3.0], + [np.nan, 6.0, np.nan, 5.0], + ] + ), + index=MultiIndex.from_arrays( + [(1, 1, 2, 2), (1, 2, 1, 2)], names=["lev1", "lev2"] + ), + columns=MultiIndex.from_arrays( + [("lev4", "lev4", "values", "values"), (1, 2, 1, 2)], + names=[None, "lev3"], + ), ) tm.assert_frame_equal(pivoted, expected) From 2e249c9fbb68e1e5ee93eab5ec62ac0ef5c3626b Mon Sep 17 00:00:00 2001 From: Jake Stevens-Haas Date: Thu, 12 Nov 2020 10:47:55 -0800 Subject: [PATCH 10/11] TST: Code review on GH37635 * renamed test variable to 'result' * renamed test --- pandas/tests/reshape/test_pivot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 88c489dc4a67f..db568b3cb6aa6 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2167,7 +2167,7 @@ def test_pivot_index_list_values_none_modifies_args(self): ) index = ["lev1", "lev2"] columns = ["lev3"] - pivoted = df.pivot(index=index, columns=columns, values=None) + result = df.pivot(index=index, columns=columns, values=None) expected = DataFrame( np.array( @@ -2187,6 +2187,6 @@ def test_pivot_index_list_values_none_modifies_args(self): ), ) - tm.assert_frame_equal(pivoted, expected) + tm.assert_frame_equal(result, expected) assert index == ["lev1", "lev2"] From 9e9ed00c5e6003e9d54c6c8c95709a2a8dc7997f Mon Sep 17 00:00:00 2001 From: Jake Stevens-Haas Date: Fri, 13 Nov 2020 10:12:35 -0800 Subject: [PATCH 11/11] TST: code review GH37635 * Added test that columns variable is immutable * Changed name of test per code review (missed in prev commit) --- pandas/tests/reshape/test_pivot.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index db568b3cb6aa6..e08876226cbc8 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2154,7 +2154,7 @@ def test_pivot_index_none(self): expected.columns.name = "columns" tm.assert_frame_equal(result, expected) - def test_pivot_index_list_values_none_modifies_args(self): + def test_pivot_index_list_values_none_immutable_args(self): # GH37635 df = DataFrame( { @@ -2190,3 +2190,4 @@ def test_pivot_index_list_values_none_modifies_args(self): tm.assert_frame_equal(result, expected) assert index == ["lev1", "lev2"] + assert columns == ["lev3"]