Skip to content

Commit f7dd14b

Browse files
authored
REGR: DataFrame reduction with min_count (#41711)
1 parent db6e71b commit f7dd14b

File tree

5 files changed

+27
-13
lines changed

5 files changed

+27
-13
lines changed

doc/source/whatsnew/v1.2.5.rst

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ including other versions of pandas.
1515
Fixed regressions
1616
~~~~~~~~~~~~~~~~~
1717
- Regression in :func:`concat` between two :class:`DataFrames` where one has an :class:`Index` that is all-None and the other is :class:`DatetimeIndex` incorrectly raising (:issue:`40841`)
18+
- Fixed regression in :meth:`DataFrame.sum` and :meth:`DataFrame.prod` when ``min_count`` and ``numeric_only`` are both given (:issue:`41074`)
1819
- Regression in :func:`read_csv` when using ``memory_map=True`` with an non-UTF8 encoding (:issue:`40986`)
1920
- Regression in :meth:`DataFrame.replace` and :meth:`Series.replace` when the values to replace is a NumPy float array (:issue:`40371`)
2021

pandas/core/frame.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -9772,7 +9772,6 @@ def _reduce(
97729772
**kwds,
97739773
):
97749774

9775-
min_count = kwds.get("min_count", 0)
97769775
assert filter_type is None or filter_type == "bool", filter_type
97779776
out_dtype = "bool" if filter_type == "bool" else None
97789777

@@ -9821,7 +9820,7 @@ def _get_data() -> DataFrame:
98219820
data = self._get_bool_data()
98229821
return data
98239822

9824-
if (numeric_only is not None or axis == 0) and min_count == 0:
9823+
if numeric_only is not None or axis == 0:
98259824
# For numeric_only non-None and axis non-None, we know
98269825
# which blocks to use and no try/except is needed.
98279826
# For numeric_only=None only the case with axis==0 and no object

pandas/core/internals/blocks.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,7 @@ def reduce(self, func, ignore_failures: bool = False) -> list[Block]:
395395
return []
396396
raise
397397

398-
if np.ndim(result) == 0:
398+
if self.values.ndim == 1:
399399
# TODO(EA2D): special case not needed with 2D EAs
400400
res_values = np.array([[result]])
401401
else:

pandas/core/nanops.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -245,8 +245,7 @@ def _maybe_get_mask(
245245
"""
246246
if mask is None:
247247
if is_bool_dtype(values.dtype) or is_integer_dtype(values.dtype):
248-
# Boolean data cannot contain nulls, so signal via mask being None
249-
return None
248+
return np.broadcast_to(False, values.shape)
250249

251250
if skipna or needs_i8_conversion(values.dtype):
252251
mask = isna(values)

pandas/tests/frame/test_reductions.py

+23-8
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from datetime import timedelta
22
from decimal import Decimal
3+
import re
34

45
from dateutil.tz import tzlocal
56
import numpy as np
@@ -811,35 +812,36 @@ def test_sum_corner(self):
811812
assert len(axis1) == 0
812813

813814
@pytest.mark.parametrize("method, unit", [("sum", 0), ("prod", 1)])
814-
def test_sum_prod_nanops(self, method, unit):
815+
@pytest.mark.parametrize("numeric_only", [None, True, False])
816+
def test_sum_prod_nanops(self, method, unit, numeric_only):
815817
idx = ["a", "b", "c"]
816818
df = DataFrame({"a": [unit, unit], "b": [unit, np.nan], "c": [np.nan, np.nan]})
817819
# The default
818-
result = getattr(df, method)()
820+
result = getattr(df, method)(numeric_only=numeric_only)
819821
expected = Series([unit, unit, unit], index=idx, dtype="float64")
820822
tm.assert_series_equal(result, expected)
821823

822824
# min_count=1
823-
result = getattr(df, method)(min_count=1)
825+
result = getattr(df, method)(numeric_only=numeric_only, min_count=1)
824826
expected = Series([unit, unit, np.nan], index=idx)
825827
tm.assert_series_equal(result, expected)
826828

827829
# min_count=0
828-
result = getattr(df, method)(min_count=0)
830+
result = getattr(df, method)(numeric_only=numeric_only, min_count=0)
829831
expected = Series([unit, unit, unit], index=idx, dtype="float64")
830832
tm.assert_series_equal(result, expected)
831833

832-
result = getattr(df.iloc[1:], method)(min_count=1)
834+
result = getattr(df.iloc[1:], method)(numeric_only=numeric_only, min_count=1)
833835
expected = Series([unit, np.nan, np.nan], index=idx)
834836
tm.assert_series_equal(result, expected)
835837

836838
# min_count > 1
837839
df = DataFrame({"A": [unit] * 10, "B": [unit] * 5 + [np.nan] * 5})
838-
result = getattr(df, method)(min_count=5)
840+
result = getattr(df, method)(numeric_only=numeric_only, min_count=5)
839841
expected = Series(result, index=["A", "B"])
840842
tm.assert_series_equal(result, expected)
841843

842-
result = getattr(df, method)(min_count=6)
844+
result = getattr(df, method)(numeric_only=numeric_only, min_count=6)
843845
expected = Series(result, index=["A", "B"])
844846
tm.assert_series_equal(result, expected)
845847

@@ -1685,7 +1687,7 @@ def test_minmax_extensionarray(method, numeric_only):
16851687

16861688

16871689
@pytest.mark.parametrize("meth", ["max", "min", "sum", "mean", "median"])
1688-
def test_groupy_regular_arithmetic_equivalent(meth):
1690+
def test_groupby_regular_arithmetic_equivalent(meth):
16891691
# GH#40660
16901692
df = DataFrame(
16911693
{"a": [pd.Timedelta(hours=6), pd.Timedelta(hours=7)], "b": [12.1, 13.3]}
@@ -1708,3 +1710,16 @@ def test_frame_mixed_numeric_object_with_timestamp(ts_value):
17081710
result = df.sum()
17091711
expected = Series([1, 1.1, "foo"], index=list("abc"))
17101712
tm.assert_series_equal(result, expected)
1713+
1714+
1715+
def test_prod_sum_min_count_mixed_object():
1716+
# https://github.com/pandas-dev/pandas/issues/41074
1717+
df = DataFrame([1, "a", True])
1718+
1719+
result = df.prod(axis=0, min_count=1, numeric_only=False)
1720+
expected = Series(["a"])
1721+
tm.assert_series_equal(result, expected)
1722+
1723+
msg = re.escape("unsupported operand type(s) for +: 'int' and 'str'")
1724+
with pytest.raises(TypeError, match=msg):
1725+
df.sum(axis=0, min_count=1, numeric_only=False)

0 commit comments

Comments
 (0)