diff --git a/doc/source/whatsnew/v1.2.4.rst b/doc/source/whatsnew/v1.2.4.rst index 790ff4c78cad6..edf23bf89d7e1 100644 --- a/doc/source/whatsnew/v1.2.4.rst +++ b/doc/source/whatsnew/v1.2.4.rst @@ -15,7 +15,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ -- +- Fixed regression in :meth:`DataFrame.sum` when ``min_count`` greater than the :class:`DataFrame` shape was passed resulted in a ``ValueError`` (:issue:`39738`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0094ebc744a34..4c156d7470364 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8786,6 +8786,7 @@ def _reduce( **kwds, ): + min_count = kwds.get("min_count", 0) assert filter_type is None or filter_type == "bool", filter_type out_dtype = "bool" if filter_type == "bool" else None @@ -8830,7 +8831,7 @@ def _get_data() -> DataFrame: data = self._get_bool_data() return data - if numeric_only is not None or axis == 0: + if (numeric_only is not None or axis == 0) and min_count == 0: # For numeric_only non-None and axis non-None, we know # which blocks to use and no try/except is needed. # For numeric_only=None only the case with axis==0 and no object @@ -8847,7 +8848,7 @@ def _get_data() -> DataFrame: # After possibly _get_data and transposing, we are now in the # simple case where we can use BlockManager.reduce - res, indexer = df._mgr.reduce(blk_func, ignore_failures=ignore_failures) + res, _ = df._mgr.reduce(blk_func, ignore_failures=ignore_failures) out = df._constructor(res).iloc[0] if out_dtype is not None: out = out.astype(out_dtype) @@ -8875,14 +8876,15 @@ def _get_data() -> DataFrame: with np.errstate(all="ignore"): result = func(values) - if filter_type == "bool" and notna(result).all(): - result = result.astype(np.bool_) - elif filter_type is None and is_object_dtype(result.dtype): - try: - result = result.astype(np.float64) - except (ValueError, TypeError): - # try to coerce to the original dtypes item by item if we can - pass + if hasattr(result, "dtype"): + if filter_type == "bool" and notna(result).all(): + result = result.astype(np.bool_) + elif filter_type is None and is_object_dtype(result.dtype): + try: + result = result.astype(np.float64) + except (ValueError, TypeError): + # try to coerce to the original dtypes item by item if we can + pass result = self._constructor_sliced(result, index=labels) return result diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 88662a4fabed8..edc1b1e96509e 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import functools import itertools import operator @@ -1368,7 +1370,7 @@ def _maybe_null_out( mask: Optional[np.ndarray], shape: Tuple[int, ...], min_count: int = 1, -) -> float: +) -> Union[np.ndarray, float]: """ Returns ------- diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index d843d4b0e9504..cb481613eb97f 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -835,6 +835,13 @@ def test_sum_nanops_timedelta(self): expected = Series([0, 0, np.nan], dtype="m8[ns]", index=idx) tm.assert_series_equal(result, expected) + def test_sum_nanops_min_count(self): + # https://github.com/pandas-dev/pandas/issues/39738 + df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}) + result = df.sum(min_count=10) + expected = Series([np.nan, np.nan], index=["x", "y"]) + tm.assert_series_equal(result, expected) + def test_sum_object(self, float_frame): values = float_frame.values.astype(int) frame = DataFrame(values, index=float_frame.index, columns=float_frame.columns)