Skip to content

Commit 404a3c7

Browse files
jbrockmendelsimonjayhawkins
authored andcommitted
Backport PR pandas-dev#41711: REGR: DataFrame reduction with min_count
1 parent 7bc9992 commit 404a3c7

File tree

5 files changed

+26
-12
lines changed

5 files changed

+26
-12
lines changed

doc/source/whatsnew/v1.2.5.rst

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ including other versions of pandas.
1515
Fixed regressions
1616
~~~~~~~~~~~~~~~~~
1717
- Regression in :func:`concat` between two :class:`DataFrames` where one has an :class:`Index` that is all-None and the other is :class:`DatetimeIndex` incorrectly raising (:issue:`40841`)
18+
- Fixed regression in :meth:`DataFrame.sum` and :meth:`DataFrame.prod` when ``min_count`` and ``numeric_only`` are both given (:issue:`41074`)
1819
- Regression in :func:`read_csv` when using ``memory_map=True`` with an non-UTF8 encoding (:issue:`40986`)
1920
- Regression in :meth:`DataFrame.replace` and :meth:`Series.replace` when the values to replace is a NumPy float array (:issue:`40371`)
2021

pandas/core/frame.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -8786,7 +8786,6 @@ def _reduce(
87868786
**kwds,
87878787
):
87888788

8789-
min_count = kwds.get("min_count", 0)
87908789
assert filter_type is None or filter_type == "bool", filter_type
87918790
out_dtype = "bool" if filter_type == "bool" else None
87928791

@@ -8831,7 +8830,7 @@ def _get_data() -> DataFrame:
88318830
data = self._get_bool_data()
88328831
return data
88338832

8834-
if (numeric_only is not None or axis == 0) and min_count == 0:
8833+
if numeric_only is not None or axis == 0:
88358834
# For numeric_only non-None and axis non-None, we know
88368835
# which blocks to use and no try/except is needed.
88378836
# For numeric_only=None only the case with axis==0 and no object

pandas/core/internals/blocks.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -391,7 +391,7 @@ def reduce(self, func, ignore_failures: bool = False) -> List["Block"]:
391391
return []
392392
raise
393393

394-
if np.ndim(result) == 0:
394+
if self.values.ndim == 1:
395395
# TODO(EA2D): special case not needed with 2D EAs
396396
res_values = np.array([[result]])
397397
else:

pandas/core/nanops.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -231,8 +231,7 @@ def _maybe_get_mask(
231231
"""
232232
if mask is None:
233233
if is_bool_dtype(values.dtype) or is_integer_dtype(values.dtype):
234-
# Boolean data cannot contain nulls, so signal via mask being None
235-
return None
234+
return np.broadcast_to(False, values.shape)
236235

237236
if skipna or needs_i8_conversion(values.dtype):
238237
mask = isna(values)

pandas/tests/frame/test_reductions.py

+22-7
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from datetime import timedelta
22
from decimal import Decimal
3+
import re
34

45
from dateutil.tz import tzlocal
56
import numpy as np
@@ -783,34 +784,35 @@ def test_sum_corner(self):
783784
assert len(axis1) == 0
784785

785786
@pytest.mark.parametrize("method, unit", [("sum", 0), ("prod", 1)])
786-
def test_sum_prod_nanops(self, method, unit):
787+
@pytest.mark.parametrize("numeric_only", [None, True, False])
788+
def test_sum_prod_nanops(self, method, unit, numeric_only):
787789
idx = ["a", "b", "c"]
788790
df = DataFrame({"a": [unit, unit], "b": [unit, np.nan], "c": [np.nan, np.nan]})
789791
# The default
790-
result = getattr(df, method)
792+
result = getattr(df, method)(numeric_only=numeric_only)
791793
expected = Series([unit, unit, unit], index=idx, dtype="float64")
792794

793795
# min_count=1
794-
result = getattr(df, method)(min_count=1)
796+
result = getattr(df, method)(numeric_only=numeric_only, min_count=1)
795797
expected = Series([unit, unit, np.nan], index=idx)
796798
tm.assert_series_equal(result, expected)
797799

798800
# min_count=0
799-
result = getattr(df, method)(min_count=0)
801+
result = getattr(df, method)(numeric_only=numeric_only, min_count=0)
800802
expected = Series([unit, unit, unit], index=idx, dtype="float64")
801803
tm.assert_series_equal(result, expected)
802804

803-
result = getattr(df.iloc[1:], method)(min_count=1)
805+
result = getattr(df.iloc[1:], method)(numeric_only=numeric_only, min_count=1)
804806
expected = Series([unit, np.nan, np.nan], index=idx)
805807
tm.assert_series_equal(result, expected)
806808

807809
# min_count > 1
808810
df = DataFrame({"A": [unit] * 10, "B": [unit] * 5 + [np.nan] * 5})
809-
result = getattr(df, method)(min_count=5)
811+
result = getattr(df, method)(numeric_only=numeric_only, min_count=5)
810812
expected = Series(result, index=["A", "B"])
811813
tm.assert_series_equal(result, expected)
812814

813-
result = getattr(df, method)(min_count=6)
815+
result = getattr(df, method)(numeric_only=numeric_only, min_count=6)
814816
expected = Series(result, index=["A", "B"])
815817
tm.assert_series_equal(result, expected)
816818

@@ -1491,3 +1493,16 @@ def test_minmax_extensionarray(method, numeric_only):
14911493
[getattr(int64_info, method)], index=Index(["Int64"], dtype="object")
14921494
)
14931495
tm.assert_series_equal(result, expected)
1496+
1497+
1498+
def test_prod_sum_min_count_mixed_object():
1499+
# https://github.com/pandas-dev/pandas/issues/41074
1500+
df = DataFrame([1, "a", True])
1501+
1502+
result = df.prod(axis=0, min_count=1, numeric_only=False)
1503+
expected = Series(["a"])
1504+
tm.assert_series_equal(result, expected)
1505+
1506+
msg = re.escape("unsupported operand type(s) for +: 'int' and 'str'")
1507+
with pytest.raises(TypeError, match=msg):
1508+
df.sum(axis=0, min_count=1, numeric_only=False)

0 commit comments

Comments
 (0)