From 9cbfe93c50d55ebbbf93193ed1dc3389ce4e4f8d Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 28 May 2021 16:00:58 -0700 Subject: [PATCH 1/2] REGR: DataFrame reduction with min_count --- pandas/core/frame.py | 3 +-- pandas/core/internals/blocks.py | 2 +- pandas/core/nanops.py | 3 +-- pandas/tests/frame/test_reductions.py | 31 ++++++++++++++++++++------- 4 files changed, 26 insertions(+), 13 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6e71cb49596c8..bab4b6d3e2190 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9765,7 +9765,6 @@ def _reduce( **kwds, ): - min_count = kwds.get("min_count", 0) assert filter_type is None or filter_type == "bool", filter_type out_dtype = "bool" if filter_type == "bool" else None @@ -9814,7 +9813,7 @@ def _get_data() -> DataFrame: data = self._get_bool_data() return data - if (numeric_only is not None or axis == 0) and min_count == 0: + if numeric_only is not None or axis == 0: # For numeric_only non-None and axis non-None, we know # which blocks to use and no try/except is needed. # For numeric_only=None only the case with axis==0 and no object diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 4f1b16e747394..5b8130c293263 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -393,7 +393,7 @@ def reduce(self, func, ignore_failures: bool = False) -> list[Block]: return [] raise - if np.ndim(result) == 0: + if self.values.ndim == 1: # TODO(EA2D): special case not needed with 2D EAs res_values = np.array([[result]]) else: diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index b8909f16ee876..673c482bced18 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -245,8 +245,7 @@ def _maybe_get_mask( """ if mask is None: if is_bool_dtype(values.dtype) or is_integer_dtype(values.dtype): - # Boolean data cannot contain nulls, so signal via mask being None - return None + return np.broadcast_to(False, values.shape) if skipna or needs_i8_conversion(values.dtype): mask = isna(values) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 564f5d20b0301..9d778cdee6a5b 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1,5 +1,6 @@ from datetime import timedelta from decimal import Decimal +import re from dateutil.tz import tzlocal import numpy as np @@ -811,35 +812,36 @@ def test_sum_corner(self): assert len(axis1) == 0 @pytest.mark.parametrize("method, unit", [("sum", 0), ("prod", 1)]) - def test_sum_prod_nanops(self, method, unit): + @pytest.mark.parametrize("numeric_only", [None, True, False]) + def test_sum_prod_nanops(self, method, unit, numeric_only): idx = ["a", "b", "c"] df = DataFrame({"a": [unit, unit], "b": [unit, np.nan], "c": [np.nan, np.nan]}) # The default - result = getattr(df, method)() + result = getattr(df, method)(numeric_only=numeric_only) expected = Series([unit, unit, unit], index=idx, dtype="float64") tm.assert_series_equal(result, expected) # min_count=1 - result = getattr(df, method)(min_count=1) + result = getattr(df, method)(numeric_only=numeric_only, min_count=1) expected = Series([unit, unit, np.nan], index=idx) tm.assert_series_equal(result, expected) # min_count=0 - result = getattr(df, method)(min_count=0) + result = getattr(df, method)(numeric_only=numeric_only, min_count=0) expected = Series([unit, unit, unit], index=idx, dtype="float64") tm.assert_series_equal(result, expected) - result = getattr(df.iloc[1:], method)(min_count=1) + result = getattr(df.iloc[1:], method)(numeric_only=numeric_only, min_count=1) expected = Series([unit, np.nan, np.nan], index=idx) tm.assert_series_equal(result, expected) # min_count > 1 df = DataFrame({"A": [unit] * 10, "B": [unit] * 5 + [np.nan] * 5}) - result = getattr(df, method)(min_count=5) + result = getattr(df, method)(numeric_only=numeric_only, min_count=5) expected = Series(result, index=["A", "B"]) tm.assert_series_equal(result, expected) - result = getattr(df, method)(min_count=6) + result = getattr(df, method)(numeric_only=numeric_only, min_count=6) expected = Series(result, index=["A", "B"]) tm.assert_series_equal(result, expected) @@ -1685,7 +1687,7 @@ def test_minmax_extensionarray(method, numeric_only): @pytest.mark.parametrize("meth", ["max", "min", "sum", "mean", "median"]) -def test_groupy_regular_arithmetic_equivalent(meth): +def test_groupby_regular_arithmetic_equivalent(meth): # GH#40660 df = DataFrame( {"a": [pd.Timedelta(hours=6), pd.Timedelta(hours=7)], "b": [12.1, 13.3]} @@ -1708,3 +1710,16 @@ def test_frame_mixed_numeric_object_with_timestamp(ts_value): result = df.sum() expected = Series([1, 1.1, "foo"], index=list("abc")) tm.assert_series_equal(result, expected) + + +def test_prod_sum_min_count_mixed_object(): + # https://github.com/pandas-dev/pandas/issues/41074 + df = DataFrame([1, "a", True]) + + result = df.prod(axis=0, min_count=1, numeric_only=False) + expected = Series(["a"]) + tm.assert_series_equal(result, expected) + + msg = re.escape("unsupported operand type(s) for +: 'int' and 'str'") + with pytest.raises(TypeError, match=msg): + df.sum(axis=0, min_count=1, numeric_only=False) From 30e8230246e475684781ca396c9e18408a7b2340 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 31 May 2021 11:19:30 -0700 Subject: [PATCH 2/2] copy whatsnew from 41701 --- doc/source/whatsnew/v1.2.5.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.5.rst b/doc/source/whatsnew/v1.2.5.rst index 60e146b2212eb..1d7b7a762e2ae 100644 --- a/doc/source/whatsnew/v1.2.5.rst +++ b/doc/source/whatsnew/v1.2.5.rst @@ -15,6 +15,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ - Regression in :func:`concat` between two :class:`DataFrames` where one has an :class:`Index` that is all-None and the other is :class:`DatetimeIndex` incorrectly raising (:issue:`40841`) +- Fixed regression in :meth:`DataFrame.sum` and :meth:`DataFrame.prod` when ``min_count`` and ``numeric_only`` are both given (:issue:`41074`) - Regression in :func:`read_csv` when using ``memory_map=True`` with an non-UTF8 encoding (:issue:`40986`) -