diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 1c1415255bf89..b230c009472d7 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -928,7 +928,7 @@ ExtensionArray ^^^^^^^^^^^^^^ - Bug in :func:`array` failing to preserve :class:`PandasArray` (:issue:`43887`) - NumPy ufuncs ``np.abs``, ``np.positive``, ``np.negative`` now correctly preserve dtype when called on ExtensionArrays that implement ``__abs__, __pos__, __neg__``, respectively. In particular this is fixed for :class:`TimedeltaArray` (:issue:`43899`, :issue:`23316`) -- NumPy ufuncs ``np.minimum.reduce`` and ``np.maximum.reduce`` now work correctly instead of raising ``NotImplementedError`` on :class:`Series` with ``IntegerDtype`` or ``FloatDtype`` (:issue:`43923`) +- NumPy ufuncs ``np.minimum.reduce`` ``np.maximum.reduce``, ``np.add.reduce``, and ``np.prod.reduce`` now work correctly instead of raising ``NotImplementedError`` on :class:`Series` with ``IntegerDtype`` or ``FloatDtype`` (:issue:`43923`, :issue:`44793`) - Avoid raising ``PerformanceWarning`` about fragmented DataFrame when using many columns with an extension dtype (:issue:`44098`) - Bug in :class:`IntegerArray` and :class:`FloatingArray` construction incorrectly coercing mismatched NA values (e.g. ``np.timedelta64("NaT")``) to numeric NA (:issue:`44514`) - Bug in :meth:`BooleanArray.__eq__` and :meth:`BooleanArray.__ne__` raising ``TypeError`` on comparison with an incompatible type (like a string). This caused :meth:`DataFrame.replace` to sometimes raise a ``TypeError`` if a nullable boolean column was included (:issue:`44499`) diff --git a/pandas/conftest.py b/pandas/conftest.py index be28dbe35fcb2..7b8fa00f8aed6 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -404,6 +404,18 @@ def index_or_series_or_array(request): return request.param +@pytest.fixture(params=[Index, Series, DataFrame, pd.array], ids=lambda x: x.__name__) +def box_with_array(request): + """ + Fixture to test behavior for Index, Series, DataFrame, and pandas Array + classes + """ + return request.param + + +box_with_array2 = box_with_array + + @pytest.fixture def dict_subclass(): """ diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index 7fa231846e721..94116c5db7f11 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -25,6 +25,8 @@ REDUCTION_ALIASES = { "maximum": "max", "minimum": "min", + "add": "sum", + "multiply": "prod", } diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index b9500924159af..782fad435c1c5 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -419,9 +419,6 @@ def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): # For MaskedArray inputs, we apply the ufunc to ._data # and mask the result. - if method == "reduce" and ufunc not in [np.maximum, np.minimum]: - # Not clear how to handle missing values in reductions. Raise. - raise NotImplementedError("The 'reduce' method is not supported.") out = kwargs.get("out", ()) @@ -482,6 +479,11 @@ def reconstruct(x): if ufunc.nout > 1: # e.g. np.divmod return tuple(reconstruct(x) for x in result) + elif method == "reduce": + # e.g. np.add.reduce; test_ufunc_reduce_raises + if self._mask.any(): + return self._na_value + return result else: return reconstruct(result) diff --git a/pandas/tests/arithmetic/conftest.py b/pandas/tests/arithmetic/conftest.py index 49a6e442f890f..e847f31cd3f9c 100644 --- a/pandas/tests/arithmetic/conftest.py +++ b/pandas/tests/arithmetic/conftest.py @@ -221,17 +221,6 @@ def mismatched_freq(request): # ------------------------------------------------------------------ -@pytest.fixture( - params=[pd.Index, pd.Series, pd.DataFrame, pd.array], ids=lambda x: x.__name__ -) -def box_with_array(request): - """ - Fixture to test behavior for Index, Series, DataFrame, and pandas Array - classes - """ - return request.param - - @pytest.fixture( params=[pd.Index, pd.Series, tm.to_array, np.array, list], ids=lambda x: x.__name__ ) @@ -241,7 +230,3 @@ def box_1d_array(request): classes """ return request.param - - -# alias so we can use the same fixture for multiple parameters in a test -box_with_array2 = box_with_array diff --git a/pandas/tests/arrays/boolean/test_function.py b/pandas/tests/arrays/boolean/test_function.py index db362afc80087..78992f3124779 100644 --- a/pandas/tests/arrays/boolean/test_function.py +++ b/pandas/tests/arrays/boolean/test_function.py @@ -79,10 +79,14 @@ def test_ufunc_numeric(): @pytest.mark.parametrize("values", [[True, False], [True, None]]) def test_ufunc_reduce_raises(values): - a = pd.array(values, dtype="boolean") - msg = "The 'reduce' method is not supported" - with pytest.raises(NotImplementedError, match=msg): - np.add.reduce(a) + arr = pd.array(values, dtype="boolean") + + res = np.add.reduce(arr) + if arr[-1] is pd.NA: + expected = pd.NA + else: + expected = arr._data.sum() + tm.assert_almost_equal(res, expected) def test_value_counts_na(): diff --git a/pandas/tests/arrays/floating/test_function.py b/pandas/tests/arrays/floating/test_function.py index ff84116fa1b18..3fe869280dc2c 100644 --- a/pandas/tests/arrays/floating/test_function.py +++ b/pandas/tests/arrays/floating/test_function.py @@ -67,10 +67,11 @@ def test_ufuncs_binary_float(ufunc): @pytest.mark.parametrize("values", [[0, 1], [0, None]]) def test_ufunc_reduce_raises(values): - a = pd.array(values, dtype="Float64") - msg = r"The 'reduce' method is not supported." - with pytest.raises(NotImplementedError, match=msg): - np.add.reduce(a) + arr = pd.array(values, dtype="Float64") + + res = np.add.reduce(arr) + expected = arr.sum(skipna=False) + tm.assert_almost_equal(res, expected) @pytest.mark.skipif(not IS64, reason="GH 36579: fail on 32-bit system") diff --git a/pandas/tests/arrays/integer/test_function.py b/pandas/tests/arrays/integer/test_function.py index 596b78f8bbe77..96fe1e77f6bc5 100644 --- a/pandas/tests/arrays/integer/test_function.py +++ b/pandas/tests/arrays/integer/test_function.py @@ -79,10 +79,11 @@ def test_ufunc_binary_output(): @pytest.mark.parametrize("values", [[0, 1], [0, None]]) def test_ufunc_reduce_raises(values): - a = pd.array(values) - msg = r"The 'reduce' method is not supported." - with pytest.raises(NotImplementedError, match=msg): - np.add.reduce(a) + arr = pd.array(values) + + res = np.add.reduce(arr) + expected = arr.sum(skipna=False) + tm.assert_almost_equal(res, expected) @pytest.mark.parametrize( diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index 23c432b2d10bf..44fd5ac493a8a 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -249,9 +249,8 @@ def __add__(self, other): tm.assert_series_equal(np.add(ser, Dummy(1)), pd.Series(np.add(ser, Dummy(1)))) -@pytest.mark.parametrize( - "values", - [ +@pytest.fixture( + params=[ pd.array([1, 3, 2], dtype=np.int64), pd.array([1, 3, 2], dtype="Int64"), pd.array([1, 3, 2], dtype="Float32"), @@ -264,41 +263,121 @@ def __add__(self, other): ], ids=lambda x: str(x.dtype), ) -@pytest.mark.parametrize("box", [pd.array, pd.Index, pd.Series, pd.DataFrame]) -def test_reduce(values, box, request): - # TODO: cases with NAs +def values_for_np_reduce(request): + # min/max tests assume that these are monotonic increasing + return request.param - same_type = True - if box is pd.Index: - if values.dtype.kind in ["i", "f"]: +class TestNumpyReductions: + # TODO: cases with NAs, axis kwarg for DataFrame + + def test_multiply(self, values_for_np_reduce, box_with_array, request): + box = box_with_array + values = values_for_np_reduce + + obj = box(values) + + if isinstance(values, pd.core.arrays.SparseArray) and box is not pd.Index: + mark = pytest.mark.xfail(reason="SparseArray has no 'mul'") + request.node.add_marker(mark) + + if values.dtype.kind in "iuf": + result = np.multiply.reduce(obj) + if box is pd.DataFrame: + expected = obj.prod(numeric_only=False) + tm.assert_series_equal(result, expected) + elif box is pd.Index: + # Int64Index, Index has no 'prod' + expected = obj._values.prod() + assert result == expected + else: + + expected = obj.prod() + assert result == expected + else: + msg = "|".join( + [ + "does not support reduction", + "unsupported operand type", + "ufunc 'multiply' cannot use operands", + ] + ) + with pytest.raises(TypeError, match=msg): + np.multiply.reduce(obj) + + def test_add(self, values_for_np_reduce, box_with_array): + box = box_with_array + values = values_for_np_reduce + + obj = box(values) + + if values.dtype.kind in "miuf": + result = np.add.reduce(obj) + if box is pd.DataFrame: + expected = obj.sum(numeric_only=False) + tm.assert_series_equal(result, expected) + elif box is pd.Index: + # Int64Index, Index has no 'sum' + expected = obj._values.sum() + assert result == expected + else: + expected = obj.sum() + assert result == expected + else: + msg = "|".join( + [ + "does not support reduction", + "unsupported operand type", + "ufunc 'add' cannot use operands", + ] + ) + with pytest.raises(TypeError, match=msg): + np.add.reduce(obj) + + def test_max(self, values_for_np_reduce, box_with_array): + box = box_with_array + values = values_for_np_reduce + + same_type = True + if box is pd.Index and values.dtype.kind in ["i", "f"]: # ATM Index casts to object, so we get python ints/floats same_type = False - obj = box(values) + obj = box(values) - result = np.maximum.reduce(obj) - expected = values[1] - if box is pd.DataFrame: - # TODO: cases with axis kwarg - expected = obj.max(numeric_only=False) - tm.assert_series_equal(result, expected) - else: - assert result == expected - if same_type: - # check we have e.g. Timestamp instead of dt64 - assert type(result) == type(expected) - - result = np.minimum.reduce(obj) - expected = values[0] - if box is pd.DataFrame: - expected = obj.min(numeric_only=False) - tm.assert_series_equal(result, expected) - else: - assert result == expected - if same_type: - # check we have e.g. Timestamp instead of dt64 - assert type(result) == type(expected) + result = np.maximum.reduce(obj) + if box is pd.DataFrame: + # TODO: cases with axis kwarg + expected = obj.max(numeric_only=False) + tm.assert_series_equal(result, expected) + else: + expected = values[1] + assert result == expected + if same_type: + # check we have e.g. Timestamp instead of dt64 + assert type(result) == type(expected) + + def test_min(self, values_for_np_reduce, box_with_array): + box = box_with_array + values = values_for_np_reduce + + same_type = True + if box is pd.Index and values.dtype.kind in ["i", "f"]: + # ATM Index casts to object, so we get python ints/floats + same_type = False + + obj = box(values) + + result = np.minimum.reduce(obj) + if box is pd.DataFrame: + expected = obj.min(numeric_only=False) + tm.assert_series_equal(result, expected) + else: + expected = values[0] + assert result == expected + if same_type: + # check we have e.g. Timestamp instead of dt64 + assert type(result) == type(expected) @pytest.mark.parametrize("type_", [list, deque, tuple])