Skip to content

Commit 59fe44e

Browse files
Backport PR #40143 on branch 1.2.x (REGR: reduction operations failing if min_count is larger) (#40237)
Co-authored-by: Simon Hawkins <[email protected]>
1 parent 738f3a0 commit 59fe44e

File tree

4 files changed

+23
-12
lines changed

4 files changed

+23
-12
lines changed

doc/source/whatsnew/v1.2.4.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ including other versions of pandas.
1515
Fixed regressions
1616
~~~~~~~~~~~~~~~~~
1717

18-
-
18+
- Fixed regression in :meth:`DataFrame.sum` when ``min_count`` greater than the :class:`DataFrame` shape was passed resulted in a ``ValueError`` (:issue:`39738`)
1919
-
2020

2121
.. ---------------------------------------------------------------------------

pandas/core/frame.py

+12-10
Original file line numberDiff line numberDiff line change
@@ -8786,6 +8786,7 @@ def _reduce(
87868786
**kwds,
87878787
):
87888788

8789+
min_count = kwds.get("min_count", 0)
87898790
assert filter_type is None or filter_type == "bool", filter_type
87908791
out_dtype = "bool" if filter_type == "bool" else None
87918792

@@ -8830,7 +8831,7 @@ def _get_data() -> DataFrame:
88308831
data = self._get_bool_data()
88318832
return data
88328833

8833-
if numeric_only is not None or axis == 0:
8834+
if (numeric_only is not None or axis == 0) and min_count == 0:
88348835
# For numeric_only non-None and axis non-None, we know
88358836
# which blocks to use and no try/except is needed.
88368837
# For numeric_only=None only the case with axis==0 and no object
@@ -8847,7 +8848,7 @@ def _get_data() -> DataFrame:
88478848

88488849
# After possibly _get_data and transposing, we are now in the
88498850
# simple case where we can use BlockManager.reduce
8850-
res, indexer = df._mgr.reduce(blk_func, ignore_failures=ignore_failures)
8851+
res, _ = df._mgr.reduce(blk_func, ignore_failures=ignore_failures)
88518852
out = df._constructor(res).iloc[0]
88528853
if out_dtype is not None:
88538854
out = out.astype(out_dtype)
@@ -8875,14 +8876,15 @@ def _get_data() -> DataFrame:
88758876
with np.errstate(all="ignore"):
88768877
result = func(values)
88778878

8878-
if filter_type == "bool" and notna(result).all():
8879-
result = result.astype(np.bool_)
8880-
elif filter_type is None and is_object_dtype(result.dtype):
8881-
try:
8882-
result = result.astype(np.float64)
8883-
except (ValueError, TypeError):
8884-
# try to coerce to the original dtypes item by item if we can
8885-
pass
8879+
if hasattr(result, "dtype"):
8880+
if filter_type == "bool" and notna(result).all():
8881+
result = result.astype(np.bool_)
8882+
elif filter_type is None and is_object_dtype(result.dtype):
8883+
try:
8884+
result = result.astype(np.float64)
8885+
except (ValueError, TypeError):
8886+
# try to coerce to the original dtypes item by item if we can
8887+
pass
88868888

88878889
result = self._constructor_sliced(result, index=labels)
88888890
return result

pandas/core/nanops.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from __future__ import annotations
2+
13
import functools
24
import itertools
35
import operator
@@ -1368,7 +1370,7 @@ def _maybe_null_out(
13681370
mask: Optional[np.ndarray],
13691371
shape: Tuple[int, ...],
13701372
min_count: int = 1,
1371-
) -> float:
1373+
) -> Union[np.ndarray, float]:
13721374
"""
13731375
Returns
13741376
-------

pandas/tests/frame/test_reductions.py

+7
Original file line numberDiff line numberDiff line change
@@ -835,6 +835,13 @@ def test_sum_nanops_timedelta(self):
835835
expected = Series([0, 0, np.nan], dtype="m8[ns]", index=idx)
836836
tm.assert_series_equal(result, expected)
837837

838+
def test_sum_nanops_min_count(self):
839+
# https://github.com/pandas-dev/pandas/issues/39738
840+
df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
841+
result = df.sum(min_count=10)
842+
expected = Series([np.nan, np.nan], index=["x", "y"])
843+
tm.assert_series_equal(result, expected)
844+
838845
def test_sum_object(self, float_frame):
839846
values = float_frame.values.astype(int)
840847
frame = DataFrame(values, index=float_frame.index, columns=float_frame.columns)

0 commit comments

Comments
 (0)