From f9bb63bbe85cf84a40fb1ce35129483e1cf2753e Mon Sep 17 00:00:00 2001 From: Mabel Villalba Date: Sun, 1 Sep 2019 17:29:40 +0200 Subject: [PATCH 01/11] Fixes #24893: use maybe_downcast_to_dtype in margins --- pandas/core/reshape/pivot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index d653dd87308cf..5514510b76e84 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -254,7 +254,7 @@ def _add_margins( try: for dtype in set(result.dtypes): cols = result.select_dtypes([dtype]).columns - margin_dummy[cols] = margin_dummy[cols].astype(dtype) + margin_dummy[cols] = maybe_downcast_to_dtype(margin_dummy[cols], dtype) result = result.append(margin_dummy) except TypeError: From 7f2c8df0f21a284c0123c152b501a5d3a76310eb Mon Sep 17 00:00:00 2001 From: Mabel Villalba Date: Sun, 1 Sep 2019 19:01:36 +0200 Subject: [PATCH 02/11] Fixes #24893: use .apply for maybe_downcast_to_dtype in margins --- pandas/core/reshape/pivot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 5514510b76e84..2abcd08c460b1 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -254,7 +254,7 @@ def _add_margins( try: for dtype in set(result.dtypes): cols = result.select_dtypes([dtype]).columns - margin_dummy[cols] = maybe_downcast_to_dtype(margin_dummy[cols], dtype) + margin_dummy[cols] = margin_dummy[cols].apply(maybe_downcast_to_dtype, args=(dtype,)) result = result.append(margin_dummy) except TypeError: From b81149ba8b01be0db38044e890e06b411f596b38 Mon Sep 17 00:00:00 2001 From: Mabel Villalba Date: Sun, 1 Sep 2019 19:12:22 +0200 Subject: [PATCH 03/11] Fixes #24893: flake8 --- pandas/core/reshape/pivot.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 2abcd08c460b1..26d7165f35762 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -254,7 +254,8 @@ def _add_margins( try: for dtype in set(result.dtypes): cols = result.select_dtypes([dtype]).columns - margin_dummy[cols] = margin_dummy[cols].apply(maybe_downcast_to_dtype, args=(dtype,)) + margin_dummy[cols] = margin_dummy[cols].apply(maybe_downcast_to_dtype, + args=(dtype,)) result = result.append(margin_dummy) except TypeError: From 2d39e86ed00ab19ca4ee3d549b864d740727e14c Mon Sep 17 00:00:00 2001 From: Mabel Villalba Date: Tue, 3 Sep 2019 13:45:20 +0200 Subject: [PATCH 04/11] Fixes #24893: added test and run black --- pandas/core/reshape/pivot.py | 5 +++-- pandas/tests/reshape/test_pivot.py | 19 ++++++++++++++++++- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 26d7165f35762..a314806de58ad 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -254,8 +254,9 @@ def _add_margins( try: for dtype in set(result.dtypes): cols = result.select_dtypes([dtype]).columns - margin_dummy[cols] = margin_dummy[cols].apply(maybe_downcast_to_dtype, - args=(dtype,)) + margin_dummy[cols] = margin_dummy[cols].apply( + maybe_downcast_to_dtype, args=(dtype,) + ) result = result.append(margin_dummy) except TypeError: diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 03b15d2df1a26..afa59256a39ac 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1601,7 +1601,6 @@ def test_pivot_table_margins_name_with_aggfunc_list(self): expected = pd.DataFrame(table.values, index=ix, columns=cols) tm.assert_frame_equal(table, expected) - @pytest.mark.xfail(reason="GH#17035 (np.mean of ints is casted back to ints)") def test_categorical_margins(self, observed): # GH 10989 df = pd.DataFrame( @@ -1615,6 +1614,24 @@ def test_categorical_margins(self, observed): table = df.pivot_table("x", "y", "z", dropna=observed, margins=True) tm.assert_frame_equal(table, expected) + def test_margins_casted_to_float(self): + # GH #24893 + df = pd.DataFrame( + { + "A": [2, 4, 6, 8], + "B": [1, 4, 5, 8], + "C": [1, 3, 4, 6], + "D": ["X", "X", "Y", "Y"], + } + ) + + result = pd.pivot_table(df, index="D", margins=True) + expected = pd.DataFrame( + {"A": [3, 7, 5], "B": [2.5, 6.5, 4.5], "C": [2, 5, 3.5]}, + index=pd.Index(["X", "Y", "All"], name="D"), + ) + tm.assert_frame_equal(result, expected) + @pytest.mark.xfail(reason="GH#17035 (np.mean of ints is casted back to ints)") def test_categorical_margins_category(self, observed): df = pd.DataFrame( From 537354c74f3992d4f5cbf79e457f1d60e077b985 Mon Sep 17 00:00:00 2001 From: Mabel Villalba Date: Tue, 3 Sep 2019 14:47:20 +0200 Subject: [PATCH 05/11] Fixes #24893: re-added line --- pandas/tests/reshape/test_pivot.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index afa59256a39ac..88e6393a42ee2 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1601,6 +1601,7 @@ def test_pivot_table_margins_name_with_aggfunc_list(self): expected = pd.DataFrame(table.values, index=ix, columns=cols) tm.assert_frame_equal(table, expected) + @pytest.mark.xfail(reason="GH#17035 (np.mean of ints is casted back to ints)") def test_categorical_margins(self, observed): # GH 10989 df = pd.DataFrame( From 578c5e08fc38bb73d262986eccead6900f770b56 Mon Sep 17 00:00:00 2001 From: Mabel Villalba Date: Wed, 4 Sep 2019 16:28:10 +0200 Subject: [PATCH 06/11] Fixes #24893: updated tests --- pandas/tests/reshape/test_pivot.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 88e6393a42ee2..5e0d6f2503c55 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1601,21 +1601,22 @@ def test_pivot_table_margins_name_with_aggfunc_list(self): expected = pd.DataFrame(table.values, index=ix, columns=cols) tm.assert_frame_equal(table, expected) - @pytest.mark.xfail(reason="GH#17035 (np.mean of ints is casted back to ints)") + def test_categorical_margins(self, observed): # GH 10989 df = pd.DataFrame( {"x": np.arange(8), "y": np.arange(8) // 4, "z": np.arange(8) % 2} ) - expected = pd.DataFrame([[1.0, 2.0, 1.5], [5, 6, 5.5], [3, 4, 3.5]]) + expected = pd.DataFrame([[1, 2, 1.5], [5, 6, 5.5], [3, 4, 3.5]]) expected.index = Index([0, 1, "All"], name="y") expected.columns = Index([0, 1, "All"], name="z") table = df.pivot_table("x", "y", "z", dropna=observed, margins=True) tm.assert_frame_equal(table, expected) - def test_margins_casted_to_float(self): + + def test_margins_casted_to_float(self, observed): # GH #24893 df = pd.DataFrame( { @@ -1631,21 +1632,22 @@ def test_margins_casted_to_float(self): {"A": [3, 7, 5], "B": [2.5, 6.5, 4.5], "C": [2, 5, 3.5]}, index=pd.Index(["X", "Y", "All"], name="D"), ) + table = result tm.assert_frame_equal(result, expected) - @pytest.mark.xfail(reason="GH#17035 (np.mean of ints is casted back to ints)") def test_categorical_margins_category(self, observed): df = pd.DataFrame( {"x": np.arange(8), "y": np.arange(8) // 4, "z": np.arange(8) % 2} ) - expected = pd.DataFrame([[1.0, 2.0, 1.5], [5, 6, 5.5], [3, 4, 3.5]]) + expected = pd.DataFrame([[1, 2, 1.5], [5, 6, 5.5], [3, 4, 3.5]]) expected.index = Index([0, 1, "All"], name="y") expected.columns = Index([0, 1, "All"], name="z") df.y = df.y.astype("category") df.z = df.z.astype("category") table = df.pivot_table("x", "y", "z", dropna=observed, margins=True) + tm.assert_frame_equal(table, expected) def test_categorical_aggfunc(self, observed): From 03529301810a38fe9ad74522c1aea9f8db98ee54 Mon Sep 17 00:00:00 2001 From: Mabel Villalba Date: Thu, 5 Sep 2019 17:33:34 +0200 Subject: [PATCH 07/11] Fixes #24893: added comment in pivot --- pandas/core/reshape/pivot.py | 1 + pandas/tests/reshape/merge/test_pivot_old.py | 0 2 files changed, 1 insertion(+) create mode 100644 pandas/tests/reshape/merge/test_pivot_old.py diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index a314806de58ad..d97cceb2cd799 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -252,6 +252,7 @@ def _add_margins( row_names = result.index.names try: + # check the result column and leave floats for dtype in set(result.dtypes): cols = result.select_dtypes([dtype]).columns margin_dummy[cols] = margin_dummy[cols].apply( diff --git a/pandas/tests/reshape/merge/test_pivot_old.py b/pandas/tests/reshape/merge/test_pivot_old.py new file mode 100644 index 0000000000000..e69de29bb2d1d From 8460d14d2fb898c79dcd80f94596de7bc06ae7a8 Mon Sep 17 00:00:00 2001 From: Mabel Villalba Date: Thu, 5 Sep 2019 17:39:44 +0200 Subject: [PATCH 08/11] Fixes #24893: updated issue notice --- doc/source/whatsnew/v1.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 3b6288146bdf2..6b33e60c67bbe 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -186,6 +186,7 @@ Reshaping - - +- Bug in :meth:`pivot_table` not returning correct type ``float`` when ``margins=True`` and ``aggfunc='mean'`` (:issue:`24893`) Sparse ^^^^^^ From d0a04f88864dda1a90ec97c26124e01d75baa240 Mon Sep 17 00:00:00 2001 From: Mabel Villalba Date: Thu, 5 Sep 2019 22:01:06 +0200 Subject: [PATCH 09/11] Fixes #24893: re-added lines --- pandas/tests/reshape/test_pivot.py | 30 +++++------------------------- 1 file changed, 5 insertions(+), 25 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 5e0d6f2503c55..4d53572abf8e5 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1601,53 +1601,33 @@ def test_pivot_table_margins_name_with_aggfunc_list(self): expected = pd.DataFrame(table.values, index=ix, columns=cols) tm.assert_frame_equal(table, expected) - + @pytest.mark.xfail(reason="GH#17035 (np.mean of ints is casted back to ints)") def test_categorical_margins(self, observed): # GH 10989 df = pd.DataFrame( {"x": np.arange(8), "y": np.arange(8) // 4, "z": np.arange(8) % 2} ) - expected = pd.DataFrame([[1, 2, 1.5], [5, 6, 5.5], [3, 4, 3.5]]) + expected = pd.DataFrame([[1.0, 2.0, 1.5], [5, 6, 5.5], [3, 4, 3.5]]) expected.index = Index([0, 1, "All"], name="y") expected.columns = Index([0, 1, "All"], name="z") table = df.pivot_table("x", "y", "z", dropna=observed, margins=True) tm.assert_frame_equal(table, expected) - - def test_margins_casted_to_float(self, observed): - # GH #24893 - df = pd.DataFrame( - { - "A": [2, 4, 6, 8], - "B": [1, 4, 5, 8], - "C": [1, 3, 4, 6], - "D": ["X", "X", "Y", "Y"], - } - ) - - result = pd.pivot_table(df, index="D", margins=True) - expected = pd.DataFrame( - {"A": [3, 7, 5], "B": [2.5, 6.5, 4.5], "C": [2, 5, 3.5]}, - index=pd.Index(["X", "Y", "All"], name="D"), - ) - table = result - tm.assert_frame_equal(result, expected) - + @pytest.mark.xfail(reason="GH#17035 (np.mean of ints is casted back to ints)") def test_categorical_margins_category(self, observed): df = pd.DataFrame( {"x": np.arange(8), "y": np.arange(8) // 4, "z": np.arange(8) % 2} ) - expected = pd.DataFrame([[1, 2, 1.5], [5, 6, 5.5], [3, 4, 3.5]]) + expected = pd.DataFrame([[1.0, 2.0, 1.5], [5, 6, 5.5], [3, 4, 3.5]]) expected.index = Index([0, 1, "All"], name="y") expected.columns = Index([0, 1, "All"], name="z") df.y = df.y.astype("category") df.z = df.z.astype("category") table = df.pivot_table("x", "y", "z", dropna=observed, margins=True) - tm.assert_frame_equal(table, expected) def test_categorical_aggfunc(self, observed): @@ -2547,4 +2527,4 @@ def test_margin_normalize(self): codes=[[1, 1, 2, 2, 0], [1, 2, 1, 2, 0]], names=["A", "B"], ) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) \ No newline at end of file From 45ffc77b5af813b973e97a4e3f0a5186eae0db15 Mon Sep 17 00:00:00 2001 From: Mabel Villalba Date: Thu, 5 Sep 2019 22:02:55 +0200 Subject: [PATCH 10/11] Fixes #24893: re-added test --- pandas/tests/reshape/test_pivot.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 4d53572abf8e5..578d766a51ab0 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1630,6 +1630,25 @@ def test_categorical_margins_category(self, observed): table = df.pivot_table("x", "y", "z", dropna=observed, margins=True) tm.assert_frame_equal(table, expected) + def test_margins_casted_to_float(self, observed): + # GH #24893 + df = pd.DataFrame( + { + "A": [2, 4, 6, 8], + "B": [1, 4, 5, 8], + "C": [1, 3, 4, 6], + "D": ["X", "X", "Y", "Y"], + } + ) + + result = pd.pivot_table(df, index="D", margins=True) + expected = pd.DataFrame( + {"A": [3, 7, 5], "B": [2.5, 6.5, 4.5], "C": [2, 5, 3.5]}, + index=pd.Index(["X", "Y", "All"], name="D"), + ) + table = result + tm.assert_frame_equal(result, expected) + def test_categorical_aggfunc(self, observed): # GH 9534 df = pd.DataFrame( @@ -2527,4 +2546,4 @@ def test_margin_normalize(self): codes=[[1, 1, 2, 2, 0], [1, 2, 1, 2, 0]], names=["A", "B"], ) - tm.assert_frame_equal(result, expected) \ No newline at end of file + tm.assert_frame_equal(result, expected) From 414fb3aebf04cd3cf74112ce66c3d1f70776b7c2 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 11 Sep 2019 15:09:04 -0500 Subject: [PATCH 11/11] fixup --- pandas/tests/reshape/test_pivot.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index ed16c9300c60d..983aa1f38c839 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1672,7 +1672,6 @@ def test_margins_casted_to_float(self, observed): {"A": [3, 7, 5], "B": [2.5, 6.5, 4.5], "C": [2, 5, 3.5]}, index=pd.Index(["X", "Y", "All"], name="D"), ) - table = result tm.assert_frame_equal(result, expected) def test_categorical_aggfunc(self, observed):