From faded183c8d5502f72100764c21ebccfc277ad26 Mon Sep 17 00:00:00 2001 From: Ian Thompson Date: Mon, 14 Apr 2025 15:48:44 -0500 Subject: [PATCH 01/21] modified: pandas/tests/reshape/test_pivot_multilevel.py - Added two tests, :func:`test_pivot_table_values_in_columns` and :func:`test_pivot_table_values_in_index`, to ensure that the `values` param is still used when the argument is shared between the `columns` and `values` params, and `index` and `values` params. --- pandas/tests/reshape/test_pivot_multilevel.py | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/pandas/tests/reshape/test_pivot_multilevel.py b/pandas/tests/reshape/test_pivot_multilevel.py index 2c9d54c3db72c..b67d42b42bddd 100644 --- a/pandas/tests/reshape/test_pivot_multilevel.py +++ b/pandas/tests/reshape/test_pivot_multilevel.py @@ -250,3 +250,49 @@ def test_pivot_df_multiindex_index_none(): columns=Index(["label1", "label2"], name="label"), ) tm.assert_frame_equal(result, expected) + + +def test_pivot_table_values_in_columns(): + data = [ + ["A", 1, 50, -1], + ["B", 1, 100, -2], + ["A", 2, 100, -2], + ["B", 2, 200, -4], + ] + df = pd.DataFrame(data=data, columns=["index", "col", "value", "extra"]) + result = df.pivot_table(values="value", index="index", columns=["col", "value"]) + nan = np.nan + e_data = [ + [50.0, nan, 100.0, nan], + [nan, 100.0, nan, 200.0], + ] + e_index = Index(data=["A", "B"], name="index") + e_cols = MultiIndex.from_arrays( + arrays=[[1, 1, 2, 2], [50, 100, 100, 200]], names=["col", "value"] + ) + expected = pd.DataFrame(data=e_data, index=e_index, columns=e_cols) + tm.assert_frame_equal(left=result, right=expected) + + +def test_pivot_table_values_in_index(): + data = [ + ["A", 1, 50, -1], + ["B", 1, 100, -2], + ["A", 2, 100, -2], + ["B", 2, 200, -4], + ] + df = pd.DataFrame(data=data, columns=["index", "col", "value", "extra"]) + result = df.pivot_table(values="value", index=["index", "value"], columns="col") + nan = np.nan + e_data = [ + [50.0, nan], + [nan, 100.0], + [100.0, nan], + [nan, 200.0], + ] + e_index = MultiIndex.from_arrays( + arrays=[["A", "A", "B", "B"], [50, 100, 100, 200]], names=["index", "value"] + ) + e_cols = Index(data=[1, 2], name="col") + expected = pd.DataFrame(data=e_data, index=e_index, columns=e_cols) + tm.assert_frame_equal(left=result, right=expected) From 454a486587f7d9acdd272766b61a4faa8ed4546a Mon Sep 17 00:00:00 2001 From: Ian Thompson Date: Tue, 15 Apr 2025 12:16:55 -0500 Subject: [PATCH 02/21] modified: pandas/core/reshape/pivot.py - Added condition to :func:`__internal_pivot_table` to aggregate `values` explicitly if `values` were passed, otherwise aggregate all remaining columns. This allows the tests :func:`test_pivot_table_values_in_columns` and :func:`test_pivot_table_values_in_index` in test_pivot_multilevel.py to pass. --- pandas/core/reshape/pivot.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 0a8ade581dea0..36a727dbc01cd 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -336,7 +336,11 @@ def __internal_pivot_table( values = list(values) grouped = data.groupby(keys, observed=observed, sort=sort, dropna=dropna) - agged = grouped.agg(aggfunc, **kwargs) + if values_passed: + # Explicitly aggregate ``values``. + agged = grouped[values].agg(aggfunc, **kwargs) + else: + agged = grouped.agg(aggfunc, **kwargs) if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns): agged = agged.dropna(how="all") From babe28a81d21490f2bac3ffa24427db4d8e3e366 Mon Sep 17 00:00:00 2001 From: Ian Thompson Date: Tue, 15 Apr 2025 12:41:20 -0500 Subject: [PATCH 03/21] modified: pandas/tests/reshape/test_pivot.py - Added test :func:`test_pivot_table_values_as_two_params` to test that the updates in pivot.py result in expected results, satisfying GH issue #57876. --- pandas/tests/reshape/test_pivot.py | 32 ++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 614200ae5b7c2..40d8b039ae1b6 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2554,6 +2554,38 @@ def test_pivot_table_index_and_column_keys_with_nan(self, dropna): tm.assert_frame_equal(left=result, right=expected) + @pytest.mark.parametrize( + argnames=["index", "columns"], + argvalues=[("Category", "Value"), ("Value", "Category")], + ids=["values-and-columns", "values-and-index"], + ) + def test_pivot_table_values_as_two_params(self, index, columns, request): + data = {"Category": ["A", "B", "A", "B"], "Value": [10, 20, 40, 50]} + df = DataFrame(data) + result = df.pivot_table( + index=index, columns=columns, values="Value", aggfunc="count" + ) + nan = np.nan + cat_index = Index(data=["A", "B"], name="Category") + val_index = Index(data=[10, 20, 40, 50], name="Value") + if request.node.callspec.id == "values-and-columns": + e_data = [ + [1.0, nan, 1.0, nan], + [nan, 1.0, nan, 1.0], + ] + expected = DataFrame(data=e_data, index=cat_index, columns=val_index) + + else: + e_data = [ + [1.0, nan], + [nan, 1.0], + [1.0, nan], + [nan, 1.0], + ] + expected = DataFrame(data=e_data, index=val_index, columns=cat_index) + + tm.assert_frame_equal(left=result, right=expected) + class TestPivot: def test_pivot(self): From a1f7694616197da16c005268c22d38d4ebe0f6ab Mon Sep 17 00:00:00 2001 From: Ian Thompson Date: Tue, 15 Apr 2025 12:43:32 -0500 Subject: [PATCH 04/21] modified: pandas/tests/reshape/test_pivot.py - Added GH issue comment to test :func:`test_pivot_table_values_as_two_params`. --- pandas/tests/reshape/test_pivot.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 40d8b039ae1b6..6094633899679 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2560,6 +2560,7 @@ def test_pivot_table_index_and_column_keys_with_nan(self, dropna): ids=["values-and-columns", "values-and-index"], ) def test_pivot_table_values_as_two_params(self, index, columns, request): + # GH#57876 data = {"Category": ["A", "B", "A", "B"], "Value": [10, 20, 40, 50]} df = DataFrame(data) result = df.pivot_table( From 8df5acfe9055cc7c7c7b2c08fd8d6a0686d1d269 Mon Sep 17 00:00:00 2001 From: Ian Thompson Date: Tue, 15 Apr 2025 12:50:32 -0500 Subject: [PATCH 05/21] modified: pandas/tests/reshape/test_pivot_multilevel.py - Combined tests :func:`test_pivot_table_values_in_columns` and :func:`test_pivot_table_values_in_index` into a single parametrized test, :func:`test_pivot_table_multiindex_values_as_two_params` to reduce duplicate setup code. --- pandas/tests/reshape/test_pivot_multilevel.py | 62 +++++++++---------- 1 file changed, 29 insertions(+), 33 deletions(-) diff --git a/pandas/tests/reshape/test_pivot_multilevel.py b/pandas/tests/reshape/test_pivot_multilevel.py index b67d42b42bddd..e46f5e8a298ce 100644 --- a/pandas/tests/reshape/test_pivot_multilevel.py +++ b/pandas/tests/reshape/test_pivot_multilevel.py @@ -252,7 +252,12 @@ def test_pivot_df_multiindex_index_none(): tm.assert_frame_equal(result, expected) -def test_pivot_table_values_in_columns(): +@pytest.mark.parametrize( + argnames=["index", "columns"], + argvalues=[("index", ["col", "value"]), (["index", "value"], "col")], + ids=["values-and-columns", "values-and-index"], +) +def test_pivot_table_multiindex_values_as_two_params(index, columns, request): data = [ ["A", 1, 50, -1], ["B", 1, 100, -2], @@ -260,39 +265,30 @@ def test_pivot_table_values_in_columns(): ["B", 2, 200, -4], ] df = pd.DataFrame(data=data, columns=["index", "col", "value", "extra"]) - result = df.pivot_table(values="value", index="index", columns=["col", "value"]) + result = df.pivot_table(values="value", index=index, columns=columns) nan = np.nan - e_data = [ - [50.0, nan, 100.0, nan], - [nan, 100.0, nan, 200.0], - ] - e_index = Index(data=["A", "B"], name="index") - e_cols = MultiIndex.from_arrays( - arrays=[[1, 1, 2, 2], [50, 100, 100, 200]], names=["col", "value"] - ) - expected = pd.DataFrame(data=e_data, index=e_index, columns=e_cols) - tm.assert_frame_equal(left=result, right=expected) + if request.node.callspec.id == "values-and-columns": + e_data = [ + [50.0, nan, 100.0, nan], + [nan, 100.0, nan, 200.0], + ] + e_index = Index(data=["A", "B"], name="index") + e_cols = MultiIndex.from_arrays( + arrays=[[1, 1, 2, 2], [50, 100, 100, 200]], names=["col", "value"] + ) + expected = pd.DataFrame(data=e_data, index=e_index, columns=e_cols) + else: + e_data = [ + [50.0, nan], + [nan, 100.0], + [100.0, nan], + [nan, 200.0], + ] + e_index = MultiIndex.from_arrays( + arrays=[["A", "A", "B", "B"], [50, 100, 100, 200]], names=["index", "value"] + ) + e_cols = Index(data=[1, 2], name="col") + expected = pd.DataFrame(data=e_data, index=e_index, columns=e_cols) -def test_pivot_table_values_in_index(): - data = [ - ["A", 1, 50, -1], - ["B", 1, 100, -2], - ["A", 2, 100, -2], - ["B", 2, 200, -4], - ] - df = pd.DataFrame(data=data, columns=["index", "col", "value", "extra"]) - result = df.pivot_table(values="value", index=["index", "value"], columns="col") - nan = np.nan - e_data = [ - [50.0, nan], - [nan, 100.0], - [100.0, nan], - [nan, 200.0], - ] - e_index = MultiIndex.from_arrays( - arrays=[["A", "A", "B", "B"], [50, 100, 100, 200]], names=["index", "value"] - ) - e_cols = Index(data=[1, 2], name="col") - expected = pd.DataFrame(data=e_data, index=e_index, columns=e_cols) tm.assert_frame_equal(left=result, right=expected) From aa6dd7a2422b4c9e5c24e237a4103d57adf4b6e1 Mon Sep 17 00:00:00 2001 From: Ian Thompson Date: Tue, 15 Apr 2025 12:52:52 -0500 Subject: [PATCH 06/21] modified: pandas/tests/reshape/test_pivot_multilevel.py - Added GH issue #61292 as comment to test :func:`test_pivot_table_multiindex_values_as_two_params`. --- pandas/tests/reshape/test_pivot_multilevel.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/reshape/test_pivot_multilevel.py b/pandas/tests/reshape/test_pivot_multilevel.py index e46f5e8a298ce..170a21d9d3850 100644 --- a/pandas/tests/reshape/test_pivot_multilevel.py +++ b/pandas/tests/reshape/test_pivot_multilevel.py @@ -258,6 +258,7 @@ def test_pivot_df_multiindex_index_none(): ids=["values-and-columns", "values-and-index"], ) def test_pivot_table_multiindex_values_as_two_params(index, columns, request): + # GH#61292 data = [ ["A", 1, 50, -1], ["B", 1, 100, -2], From b18121d458a85c7a010c14eefa83eed16abaf87a Mon Sep 17 00:00:00 2001 From: Ian Thompson Date: Tue, 15 Apr 2025 12:59:02 -0500 Subject: [PATCH 07/21] modified: pandas/core/reshape/pivot.py - Simplified proposed logic in :func:`__internal_pivot_table`. --- pandas/core/reshape/pivot.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 36a727dbc01cd..43ef2674c5db9 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -338,9 +338,9 @@ def __internal_pivot_table( grouped = data.groupby(keys, observed=observed, sort=sort, dropna=dropna) if values_passed: # Explicitly aggregate ``values``. - agged = grouped[values].agg(aggfunc, **kwargs) - else: - agged = grouped.agg(aggfunc, **kwargs) + grouped = grouped[values] + + agged = grouped.agg(aggfunc, **kwargs) if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns): agged = agged.dropna(how="all") From 11e79f22c8b08eb3b822df1648e1326ec7f3bcca Mon Sep 17 00:00:00 2001 From: Ian Thompson Date: Tue, 15 Apr 2025 13:06:02 -0500 Subject: [PATCH 08/21] modified: pandas/core/reshape/pivot.py - Added GH issue numbers to new logic in :func:`__internal_pivot_table`. --- pandas/core/reshape/pivot.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 43ef2674c5db9..101cffd4604d0 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -337,6 +337,7 @@ def __internal_pivot_table( grouped = data.groupby(keys, observed=observed, sort=sort, dropna=dropna) if values_passed: + # GH#57876 and GH#61292 # Explicitly aggregate ``values``. grouped = grouped[values] From de8e781bf1f94eeebe52ebb42309e5e5dbea6463 Mon Sep 17 00:00:00 2001 From: Ian Thompson Date: Tue, 15 Apr 2025 15:40:52 -0500 Subject: [PATCH 09/21] modified: pandas/core/reshape/pivot.py - Added ignore-comment to silence mypy error in :func:`__internal_pivot_table`. - Added TODO-comment stating that the :meth:`DataFrameGroupBy.__getitem__` should be overloaded to match the pandas-stubs type declarations, informing mypy that the type is correct given `values` is a list. --- pandas/core/reshape/pivot.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 101cffd4604d0..e9622ac50e2eb 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -339,7 +339,12 @@ def __internal_pivot_table( if values_passed: # GH#57876 and GH#61292 # Explicitly aggregate ``values``. - grouped = grouped[values] + grouped = grouped[values] # type: ignore[assignment] + # TODO: ``grouped`` will have type ``DataFrameGroupBy`` because + # ``values`` is guaranteed to be a ``list[Any]`` per above + # logic. The type hints for ``DataFrameGroupBy`` require an + # overload for mypy to determine this. See stubs in pandas-stubs. + # https://github.com/pandas-dev/pandas-stubs/blob/8434bde95460b996323cc8c0fea7b0a8bb00ea26/pandas-stubs/core/groupby/generic.pyi#L222 agged = grouped.agg(aggfunc, **kwargs) From f7a4fb529f4885b099470df5eb08c16aa4e3571e Mon Sep 17 00:00:00 2001 From: Ian Thompson Date: Wed, 16 Apr 2025 10:53:07 -0500 Subject: [PATCH 10/21] modified: doc/source/whatsnew/v3.0.0.rst - Added pivot_table bug to Bugs/Reshaping section referencing issues #57876 and #61292. --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index b03b2305172a7..35b5e01c621be 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -806,6 +806,7 @@ Reshaping - Bug in :meth:`DataFrame.unstack` producing incorrect results when ``sort=False`` (:issue:`54987`, :issue:`55516`) - Bug in :meth:`DataFrame.merge` when merging two :class:`DataFrame` on ``intc`` or ``uintc`` types on Windows (:issue:`60091`, :issue:`58713`) - Bug in :meth:`DataFrame.pivot_table` incorrectly subaggregating results when called without an ``index`` argument (:issue:`58722`) +- Bug in :meth:`DataFrame.pivot_table` incorrectly ignoring the ``values`` argument when also supplied to the ``index`` or ``columns`` parameters (:issue:`57876`, :issue:`61292`) - Bug in :meth:`DataFrame.stack` with the new implementation where ``ValueError`` is raised when ``level=[]`` (:issue:`60740`) - Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`) - Bug in :meth:`concat` where concatenating DataFrame and Series with ``ignore_index = True`` drops the series name (:issue:`60723`, :issue:`56257`) From 618f638a736a90069353f50813a833e1427b76b1 Mon Sep 17 00:00:00 2001 From: Ian Thompson Date: Sat, 19 Apr 2025 13:24:27 -0500 Subject: [PATCH 11/21] modified: pandas/core/reshape/pivot.py - Moved and simplified mypy comment per feedback. --- pandas/core/reshape/pivot.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index e9622ac50e2eb..6b6903454f9b1 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -339,12 +339,8 @@ def __internal_pivot_table( if values_passed: # GH#57876 and GH#61292 # Explicitly aggregate ``values``. + # mypy is not aware `grouped[values]` will always be a DataFrameGroupBy grouped = grouped[values] # type: ignore[assignment] - # TODO: ``grouped`` will have type ``DataFrameGroupBy`` because - # ``values`` is guaranteed to be a ``list[Any]`` per above - # logic. The type hints for ``DataFrameGroupBy`` require an - # overload for mypy to determine this. See stubs in pandas-stubs. - # https://github.com/pandas-dev/pandas-stubs/blob/8434bde95460b996323cc8c0fea7b0a8bb00ea26/pandas-stubs/core/groupby/generic.pyi#L222 agged = grouped.agg(aggfunc, **kwargs) From fd1ec0408f95fbfaa2fa45a510bae5bbeaebb75d Mon Sep 17 00:00:00 2001 From: Ian Thompson Date: Sat, 19 Apr 2025 13:25:25 -0500 Subject: [PATCH 12/21] modified: pandas/core/reshape/pivot.py - Removed comment about explicit aggregation per feedback. --- pandas/core/reshape/pivot.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 6b6903454f9b1..4e77f0a6bf5bf 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -338,7 +338,6 @@ def __internal_pivot_table( grouped = data.groupby(keys, observed=observed, sort=sort, dropna=dropna) if values_passed: # GH#57876 and GH#61292 - # Explicitly aggregate ``values``. # mypy is not aware `grouped[values]` will always be a DataFrameGroupBy grouped = grouped[values] # type: ignore[assignment] From 1f87cc113f64d7a603ccaf23fc6f4615cf87f637 Mon Sep 17 00:00:00 2001 From: Ian Thompson Date: Sat, 19 Apr 2025 13:27:39 -0500 Subject: [PATCH 13/21] modified: pandas/tests/reshape/test_pivot.py - Removed param names and updated `argnames` arg per feedback in parametrized marker. --- pandas/tests/reshape/test_pivot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 6094633899679..9e9c1c4ee21a2 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2555,8 +2555,8 @@ def test_pivot_table_index_and_column_keys_with_nan(self, dropna): tm.assert_frame_equal(left=result, right=expected) @pytest.mark.parametrize( - argnames=["index", "columns"], - argvalues=[("Category", "Value"), ("Value", "Category")], + "index, columns", + [("Category", "Value"), ("Value", "Category")], ids=["values-and-columns", "values-and-index"], ) def test_pivot_table_values_as_two_params(self, index, columns, request): From 7b3c4038f5dc3e9fcabf61b398e298b8d8672968 Mon Sep 17 00:00:00 2001 From: Ian Thompson Date: Sat, 19 Apr 2025 13:29:58 -0500 Subject: [PATCH 14/21] modified: pandas/tests/reshape/test_pivot.py - Removed param names in favor of implicit args per feedback. --- pandas/tests/reshape/test_pivot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 9e9c1c4ee21a2..9e97c8be3c239 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2585,7 +2585,7 @@ def test_pivot_table_values_as_two_params(self, index, columns, request): ] expected = DataFrame(data=e_data, index=val_index, columns=cat_index) - tm.assert_frame_equal(left=result, right=expected) + tm.assert_frame_equal(result, expected) class TestPivot: From bf877a568a4fe2986574194eae04aaa7abf9832f Mon Sep 17 00:00:00 2001 From: Ian Thompson Date: Sat, 19 Apr 2025 13:31:10 -0500 Subject: [PATCH 15/21] modified: pandas/tests/reshape/test_pivot_multilevel.py - Removed param names and updated arg for `argnames` in parametrized marker per feedback. --- pandas/tests/reshape/test_pivot_multilevel.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/reshape/test_pivot_multilevel.py b/pandas/tests/reshape/test_pivot_multilevel.py index 170a21d9d3850..bd1a1f075db7f 100644 --- a/pandas/tests/reshape/test_pivot_multilevel.py +++ b/pandas/tests/reshape/test_pivot_multilevel.py @@ -253,8 +253,8 @@ def test_pivot_df_multiindex_index_none(): @pytest.mark.parametrize( - argnames=["index", "columns"], - argvalues=[("index", ["col", "value"]), (["index", "value"], "col")], + "index, columns", + [("index", ["col", "value"]), (["index", "value"], "col")], ids=["values-and-columns", "values-and-index"], ) def test_pivot_table_multiindex_values_as_two_params(index, columns, request): From 6bdb3deece360b0ef9893cff3371ca76b6cd8ec6 Mon Sep 17 00:00:00 2001 From: Ian Thompson Date: Sat, 19 Apr 2025 13:33:08 -0500 Subject: [PATCH 16/21] modified: pandas/tests/reshape/test_pivot_multilevel.py - Reduced `expected` assignments from two to one per feedback. --- pandas/tests/reshape/test_pivot_multilevel.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/reshape/test_pivot_multilevel.py b/pandas/tests/reshape/test_pivot_multilevel.py index bd1a1f075db7f..bdebc078177c3 100644 --- a/pandas/tests/reshape/test_pivot_multilevel.py +++ b/pandas/tests/reshape/test_pivot_multilevel.py @@ -277,7 +277,6 @@ def test_pivot_table_multiindex_values_as_two_params(index, columns, request): e_cols = MultiIndex.from_arrays( arrays=[[1, 1, 2, 2], [50, 100, 100, 200]], names=["col", "value"] ) - expected = pd.DataFrame(data=e_data, index=e_index, columns=e_cols) else: e_data = [ @@ -290,6 +289,6 @@ def test_pivot_table_multiindex_values_as_two_params(index, columns, request): arrays=[["A", "A", "B", "B"], [50, 100, 100, 200]], names=["index", "value"] ) e_cols = Index(data=[1, 2], name="col") - expected = pd.DataFrame(data=e_data, index=e_index, columns=e_cols) + expected = pd.DataFrame(data=e_data, index=e_index, columns=e_cols) tm.assert_frame_equal(left=result, right=expected) From 16c20516d28c39aa87e1a10af2b829099cfb9c53 Mon Sep 17 00:00:00 2001 From: Ian Thompson Date: Sat, 19 Apr 2025 13:34:06 -0500 Subject: [PATCH 17/21] modified: pandas/tests/reshape/test_pivot_multilevel.py - Removed param names in favor of implicit args per feedback. --- pandas/tests/reshape/test_pivot_multilevel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/test_pivot_multilevel.py b/pandas/tests/reshape/test_pivot_multilevel.py index bdebc078177c3..c22607ecf120b 100644 --- a/pandas/tests/reshape/test_pivot_multilevel.py +++ b/pandas/tests/reshape/test_pivot_multilevel.py @@ -291,4 +291,4 @@ def test_pivot_table_multiindex_values_as_two_params(index, columns, request): e_cols = Index(data=[1, 2], name="col") expected = pd.DataFrame(data=e_data, index=e_index, columns=e_cols) - tm.assert_frame_equal(left=result, right=expected) + tm.assert_frame_equal(result, expected) From 3824c8aab717989302c58920dc9250700e13aa57 Mon Sep 17 00:00:00 2001 From: Ian Thompson Date: Wed, 23 Apr 2025 12:52:50 -0500 Subject: [PATCH 18/21] modified: pandas/tests/reshape/test_pivot_multilevel.py - Moved e_data, e_index, and e_cols to parametrized marker instead of declaring inside the test :func:`test_pivot_table_multiindex_values_as_two_params`. --- pandas/tests/reshape/test_pivot_multilevel.py | 59 +++++++++++-------- 1 file changed, 33 insertions(+), 26 deletions(-) diff --git a/pandas/tests/reshape/test_pivot_multilevel.py b/pandas/tests/reshape/test_pivot_multilevel.py index c22607ecf120b..d5d9a4f88cf0b 100644 --- a/pandas/tests/reshape/test_pivot_multilevel.py +++ b/pandas/tests/reshape/test_pivot_multilevel.py @@ -253,11 +253,41 @@ def test_pivot_df_multiindex_index_none(): @pytest.mark.parametrize( - "index, columns", - [("index", ["col", "value"]), (["index", "value"], "col")], + "index, columns, e_data, e_index, e_cols", + [ + ( + "index", + ["col", "value"], + [ + [50.0, (nan := np.nan), 100.0, nan], + [nan, 100.0, nan, 200.0], + ], + Index(data=["A", "B"], name="index"), + MultiIndex.from_arrays( + arrays=[[1, 1, 2, 2], [50, 100, 100, 200]], names=["col", "value"] + ), + ), + ( + ["index", "value"], + "col", + [ + [50.0, nan], + [nan, 100.0], + [100.0, nan], + [nan, 200.0], + ], + MultiIndex.from_arrays( + arrays=[["A", "A", "B", "B"], [50, 100, 100, 200]], + names=["index", "value"], + ), + Index(data=[1, 2], name="col"), + ), + ], ids=["values-and-columns", "values-and-index"], ) -def test_pivot_table_multiindex_values_as_two_params(index, columns, request): +def test_pivot_table_multiindex_values_as_two_params( + index, columns, e_data, e_index, e_cols +): # GH#61292 data = [ ["A", 1, 50, -1], @@ -267,28 +297,5 @@ def test_pivot_table_multiindex_values_as_two_params(index, columns, request): ] df = pd.DataFrame(data=data, columns=["index", "col", "value", "extra"]) result = df.pivot_table(values="value", index=index, columns=columns) - nan = np.nan - if request.node.callspec.id == "values-and-columns": - e_data = [ - [50.0, nan, 100.0, nan], - [nan, 100.0, nan, 200.0], - ] - e_index = Index(data=["A", "B"], name="index") - e_cols = MultiIndex.from_arrays( - arrays=[[1, 1, 2, 2], [50, 100, 100, 200]], names=["col", "value"] - ) - - else: - e_data = [ - [50.0, nan], - [nan, 100.0], - [100.0, nan], - [nan, 200.0], - ] - e_index = MultiIndex.from_arrays( - arrays=[["A", "A", "B", "B"], [50, 100, 100, 200]], names=["index", "value"] - ) - e_cols = Index(data=[1, 2], name="col") - expected = pd.DataFrame(data=e_data, index=e_index, columns=e_cols) tm.assert_frame_equal(result, expected) From 9ef0b723f9a22f3142b46cdd5d1a5512378558a5 Mon Sep 17 00:00:00 2001 From: Ian Thompson Date: Wed, 23 Apr 2025 13:00:49 -0500 Subject: [PATCH 19/21] modified: pandas/tests/reshape/test_pivot.py - Moved `expected` setup to parametrized marker instead of in the test :meth:`TestPivotTable.test_pivot_table_values_as_two_params`. --- pandas/tests/reshape/test_pivot.py | 51 +++++++++++++++++------------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 9e97c8be3c239..035f3cefa8d84 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2555,36 +2555,43 @@ def test_pivot_table_index_and_column_keys_with_nan(self, dropna): tm.assert_frame_equal(left=result, right=expected) @pytest.mark.parametrize( - "index, columns", - [("Category", "Value"), ("Value", "Category")], + "index, columns, e_data, e_index, e_cols", + [ + ( + "Category", + "Value", + [ + [1.0, (nan := np.nan), 1.0, nan], + [nan, 1.0, nan, 1.0], + ], + (cat_index := Index(data=["A", "B"], name="Category")), + (val_index := Index(data=[10, 20, 40, 50], name="Value")), + ), + ( + "Value", + "Category", + [ + [1.0, nan], + [nan, 1.0], + [1.0, nan], + [nan, 1.0], + ], + val_index, + cat_index, + ), + ], ids=["values-and-columns", "values-and-index"], ) - def test_pivot_table_values_as_two_params(self, index, columns, request): + def test_pivot_table_values_as_two_params( + self, index, columns, e_data, e_index, e_cols + ): # GH#57876 data = {"Category": ["A", "B", "A", "B"], "Value": [10, 20, 40, 50]} df = DataFrame(data) result = df.pivot_table( index=index, columns=columns, values="Value", aggfunc="count" ) - nan = np.nan - cat_index = Index(data=["A", "B"], name="Category") - val_index = Index(data=[10, 20, 40, 50], name="Value") - if request.node.callspec.id == "values-and-columns": - e_data = [ - [1.0, nan, 1.0, nan], - [nan, 1.0, nan, 1.0], - ] - expected = DataFrame(data=e_data, index=cat_index, columns=val_index) - - else: - e_data = [ - [1.0, nan], - [nan, 1.0], - [1.0, nan], - [nan, 1.0], - ] - expected = DataFrame(data=e_data, index=val_index, columns=cat_index) - + expected = DataFrame(data=e_data, index=e_index, columns=e_cols) tm.assert_frame_equal(result, expected) From 52cf56011edfd45bf4e2b5fae206dbc041336def Mon Sep 17 00:00:00 2001 From: Ian Thompson Date: Wed, 23 Apr 2025 13:37:29 -0500 Subject: [PATCH 20/21] modified: pandas/tests/reshape/test_pivot.py - Removed walrus operator declarations in parametrized marker for test :meth:TestPivotTable.test_pivot_table_values_as_two_params`. Appears related to this mypy issue -> https://github.com/python/mypy/issues/17377. --- pandas/tests/reshape/test_pivot.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 035f3cefa8d84..2a58815c1cece 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2561,23 +2561,23 @@ def test_pivot_table_index_and_column_keys_with_nan(self, dropna): "Category", "Value", [ - [1.0, (nan := np.nan), 1.0, nan], - [nan, 1.0, nan, 1.0], + [1.0, np.nan, 1.0, np.nan], + [np.nan, 1.0, np.nan, 1.0], ], - (cat_index := Index(data=["A", "B"], name="Category")), - (val_index := Index(data=[10, 20, 40, 50], name="Value")), + Index(data=["A", "B"], name="Category"), + Index(data=[10, 20, 40, 50], name="Value"), ), ( "Value", "Category", [ - [1.0, nan], - [nan, 1.0], - [1.0, nan], - [nan, 1.0], + [1.0, np.nan], + [np.nan, 1.0], + [1.0, np.nan], + [np.nan, 1.0], ], - val_index, - cat_index, + Index(data=[10, 20, 40, 50], name="Value"), + Index(data=["A", "B"], name="Category"), ), ], ids=["values-and-columns", "values-and-index"], From 2ae4921606bee6401dae80c7ed74d5e8d5f684ad Mon Sep 17 00:00:00 2001 From: Ian Thompson Date: Wed, 23 Apr 2025 13:39:19 -0500 Subject: [PATCH 21/21] modified: pandas/tests/reshape/test_pivot_multilevel.py - Removed walrus operator declarations as I'm sure mypy would raise an issue with it given that it did in test_pivot.py (see commit 52cf56011e). --- pandas/tests/reshape/test_pivot_multilevel.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/reshape/test_pivot_multilevel.py b/pandas/tests/reshape/test_pivot_multilevel.py index d5d9a4f88cf0b..af70210b37f3c 100644 --- a/pandas/tests/reshape/test_pivot_multilevel.py +++ b/pandas/tests/reshape/test_pivot_multilevel.py @@ -259,8 +259,8 @@ def test_pivot_df_multiindex_index_none(): "index", ["col", "value"], [ - [50.0, (nan := np.nan), 100.0, nan], - [nan, 100.0, nan, 200.0], + [50.0, np.nan, 100.0, np.nan], + [np.nan, 100.0, np.nan, 200.0], ], Index(data=["A", "B"], name="index"), MultiIndex.from_arrays( @@ -271,10 +271,10 @@ def test_pivot_df_multiindex_index_none(): ["index", "value"], "col", [ - [50.0, nan], - [nan, 100.0], - [100.0, nan], - [nan, 200.0], + [50.0, np.nan], + [np.nan, 100.0], + [100.0, np.nan], + [np.nan, 200.0], ], MultiIndex.from_arrays( arrays=[["A", "A", "B", "B"], [50, 100, 100, 200]],