From 73da55e91eea0afa4a46786f09ec862361d15071 Mon Sep 17 00:00:00 2001 From: weikhor Date: Sun, 22 May 2022 23:03:53 +0800 Subject: [PATCH 1/8] bug fix --- pandas/core/nanops.py | 3 ++- pandas/tests/frame/test_reductions.py | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 713d80c26ef7a..0ef613b52b4f9 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1472,7 +1472,8 @@ def _maybe_null_out( if np.iscomplexobj(result): result = result.astype("c16") else: - result = result.astype("f8") + if not is_float_dtype(result): + result = result.astype("f8") result[null_mask] = np.nan else: # GH12941, use None to auto cast null diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 7f2a13862f4ed..f1ce8e837a075 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -777,6 +777,12 @@ def test_sum_nanops_min_count(self): result = df.sum(min_count=10) expected = Series([np.nan, np.nan], index=["x", "y"]) tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("kwargs", [{"axis":1, "min_count":1}, {"axis":1, "min_count":2}, {"axis":1, "skipna":False}]) + def test_sum_nanops_dtype_min_count(self, kwargs): + df = pd.DataFrame({'a': [1., 2.3, 4.4], 'b': [2.2, 3, np.nan]}, dtype='float32') + result = df.sum(**kwargs).dtype + assert result == 'float32' def test_sum_object(self, float_frame): values = float_frame.values.astype(int) From 23c19b872a69bef95170707f1f1a42636ba56a75 Mon Sep 17 00:00:00 2001 From: weikhor Date: Sun, 22 May 2022 23:06:01 +0800 Subject: [PATCH 2/8] bug fix --- pandas/tests/frame/test_reductions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index f1ce8e837a075..ba27a7d3432e3 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -780,6 +780,7 @@ def test_sum_nanops_min_count(self): @pytest.mark.parametrize("kwargs", [{"axis":1, "min_count":1}, {"axis":1, "min_count":2}, {"axis":1, "skipna":False}]) def test_sum_nanops_dtype_min_count(self, kwargs): + df = pd.DataFrame({'a': [1., 2.3, 4.4], 'b': [2.2, 3, np.nan]}, dtype='float32') result = df.sum(**kwargs).dtype assert result == 'float32' From 8177bf98a21da6208eba61169f74b6740b94ab55 Mon Sep 17 00:00:00 2001 From: weikhor Date: Sun, 22 May 2022 23:08:23 +0800 Subject: [PATCH 3/8] bug fix --- pandas/tests/frame/test_reductions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index ba27a7d3432e3..9c127cfb4ee01 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -780,7 +780,7 @@ def test_sum_nanops_min_count(self): @pytest.mark.parametrize("kwargs", [{"axis":1, "min_count":1}, {"axis":1, "min_count":2}, {"axis":1, "skipna":False}]) def test_sum_nanops_dtype_min_count(self, kwargs): - + # GH#46947 df = pd.DataFrame({'a': [1., 2.3, 4.4], 'b': [2.2, 3, np.nan]}, dtype='float32') result = df.sum(**kwargs).dtype assert result == 'float32' From b1828fea42b5b36b002ef0eba35bbf5a731fcc82 Mon Sep 17 00:00:00 2001 From: weikhor Date: Sun, 22 May 2022 23:08:55 +0800 Subject: [PATCH 4/8] bug fix --- pandas/tests/frame/test_reductions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 9c127cfb4ee01..4a9d77a427e1f 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -780,6 +780,7 @@ def test_sum_nanops_min_count(self): @pytest.mark.parametrize("kwargs", [{"axis":1, "min_count":1}, {"axis":1, "min_count":2}, {"axis":1, "skipna":False}]) def test_sum_nanops_dtype_min_count(self, kwargs): + # GH#46947 df = pd.DataFrame({'a': [1., 2.3, 4.4], 'b': [2.2, 3, np.nan]}, dtype='float32') result = df.sum(**kwargs).dtype From b5590c41f749cfc3946041dfd9a9cb64606def13 Mon Sep 17 00:00:00 2001 From: weikhor Date: Sun, 22 May 2022 23:10:21 +0800 Subject: [PATCH 5/8] pre-commit --- pandas/tests/frame/test_reductions.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 4a9d77a427e1f..57045951717fa 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -777,14 +777,20 @@ def test_sum_nanops_min_count(self): result = df.sum(min_count=10) expected = Series([np.nan, np.nan], index=["x", "y"]) tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize("kwargs", [{"axis":1, "min_count":1}, {"axis":1, "min_count":2}, {"axis":1, "skipna":False}]) + + @pytest.mark.parametrize( + "kwargs", + [ + {"axis": 1, "min_count": 1}, + {"axis": 1, "min_count": 2}, + {"axis": 1, "skipna": False}, + ], + ) def test_sum_nanops_dtype_min_count(self, kwargs): - # GH#46947 - df = pd.DataFrame({'a': [1., 2.3, 4.4], 'b': [2.2, 3, np.nan]}, dtype='float32') + df = DataFrame({"a": [1.0, 2.3, 4.4], "b": [2.2, 3, np.nan]}, dtype="float32") result = df.sum(**kwargs).dtype - assert result == 'float32' + assert result == "float32" def test_sum_object(self, float_frame): values = float_frame.values.astype(int) From 273f17ab3cd67d9fe46ebadf3bc4e549b964c835 Mon Sep 17 00:00:00 2001 From: weikhor Date: Sun, 22 May 2022 23:13:24 +0800 Subject: [PATCH 6/8] add rst --- doc/source/whatsnew/v1.5.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index b23dd5c2f05a6..b9c992b898ce9 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -713,7 +713,7 @@ Indexing - Bug in setting large integer values into :class:`Series` with ``float32`` or ``float16`` dtype incorrectly altering these values instead of coercing to ``float64`` dtype (:issue:`45844`) - Bug in :meth:`Series.asof` and :meth:`DataFrame.asof` incorrectly casting bool-dtype results to ``float64`` dtype (:issue:`16063`) - Bug in :meth:`NDFrame.xs`, :meth:`DataFrame.iterrows`, :meth:`DataFrame.loc` and :meth:`DataFrame.iloc` not always propagating metadata (:issue:`28283`) -- +- Bug in :meth:`_maybe_null_out` pd.DataFrame.sum with min_count changes dtype if result contains NaNs Missing ^^^^^^^ From 070a963dd2921a3ba41f03a8efa9c433e5c09a44 Mon Sep 17 00:00:00 2001 From: weikhor Date: Sun, 29 May 2022 22:33:23 +0800 Subject: [PATCH 7/8] update --- doc/source/whatsnew/v1.5.0.rst | 2 +- pandas/core/nanops.py | 5 ++--- pandas/tests/frame/test_reductions.py | 19 +++++++++++-------- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index cf67a79a5de5d..ebb4708ada45b 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -780,7 +780,7 @@ Indexing - Bug in setting large integer values into :class:`Series` with ``float32`` or ``float16`` dtype incorrectly altering these values instead of coercing to ``float64`` dtype (:issue:`45844`) - Bug in :meth:`Series.asof` and :meth:`DataFrame.asof` incorrectly casting bool-dtype results to ``float64`` dtype (:issue:`16063`) - Bug in :meth:`NDFrame.xs`, :meth:`DataFrame.iterrows`, :meth:`DataFrame.loc` and :meth:`DataFrame.iloc` not always propagating metadata (:issue:`28283`) -- Bug in :meth:`_maybe_null_out` pd.DataFrame.sum with min_count changes dtype if result contains NaNs +- Bug in :meth:`DataFrame.sum` min_count changes dtype if input contains NaNs (:issue:`46947`) Missing ^^^^^^^ diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 0ef613b52b4f9..a96fb9c8129dd 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1471,9 +1471,8 @@ def _maybe_null_out( if is_numeric_dtype(result): if np.iscomplexobj(result): result = result.astype("c16") - else: - if not is_float_dtype(result): - result = result.astype("f8") + elif not is_float_dtype(result): + result = result.astype("f8", copy=False) result[null_mask] = np.nan else: # GH12941, use None to auto cast null diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 700321940689e..702faf2f480e8 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -778,19 +778,22 @@ def test_sum_nanops_min_count(self): expected = Series([np.nan, np.nan], index=["x", "y"]) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("float_type", ["float16", "float32", "float64"]) @pytest.mark.parametrize( - "kwargs", + "kwargs, expected_result", [ - {"axis": 1, "min_count": 1}, - {"axis": 1, "min_count": 2}, - {"axis": 1, "skipna": False}, + ({"axis": 1, "min_count": 2}, [3.2, 5.3, np.NaN]), + ({"axis": 1, "min_count": 3}, [np.NaN, np.NaN, np.NaN]), + ({"axis": 1, "skipna": False}, [3.2, 5.3, np.NaN]), ], ) - def test_sum_nanops_dtype_min_count(self, kwargs): + def test_sum_nanops_dtype_min_count(self, float_type, kwargs, expected_result): # GH#46947 - df = DataFrame({"a": [1.0, 2.3, 4.4], "b": [2.2, 3, np.nan]}, dtype="float32") - result = df.sum(**kwargs).dtype - assert result == "float32" + # pass + df = DataFrame({"a": [1.0, 2.3, 4.4], "b": [2.2, 3, np.nan]}, dtype=float_type) + result = df.sum(**kwargs) + expected = Series(expected_result).astype(float_type) + tm.assert_series_equal(result, expected) def test_sum_object(self, float_frame): values = float_frame.values.astype(int) From d1e108bcb36a7c5c0d7963dcf9c41a3dc1694685 Mon Sep 17 00:00:00 2001 From: weikhor Date: Mon, 30 May 2022 20:10:41 +0800 Subject: [PATCH 8/8] add test for prob --- pandas/tests/frame/test_reductions.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 702faf2f480e8..b4d3d1ae548b5 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -789,12 +789,29 @@ def test_sum_nanops_min_count(self): ) def test_sum_nanops_dtype_min_count(self, float_type, kwargs, expected_result): # GH#46947 - # pass df = DataFrame({"a": [1.0, 2.3, 4.4], "b": [2.2, 3, np.nan]}, dtype=float_type) result = df.sum(**kwargs) expected = Series(expected_result).astype(float_type) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("float_type", ["float16", "float32", "float64"]) + @pytest.mark.parametrize( + "kwargs, expected_result", + [ + ({"axis": 1, "min_count": 2}, [2.0, 4.0, np.NaN]), + ({"axis": 1, "min_count": 3}, [np.NaN, np.NaN, np.NaN]), + ({"axis": 1, "skipna": False}, [2.0, 4.0, np.NaN]), + ], + ) + def test_prod_nanops_dtype_min_count(self, float_type, kwargs, expected_result): + # GH#46947 + df = DataFrame( + {"a": [1.0, 2.0, 4.4], "b": [2.0, 2.0, np.nan]}, dtype=float_type + ) + result = df.prod(**kwargs) + expected = Series(expected_result).astype(float_type) + tm.assert_series_equal(result, expected) + def test_sum_object(self, float_frame): values = float_frame.values.astype(int) frame = DataFrame(values, index=float_frame.index, columns=float_frame.columns)