ENH: support np.add.reduce, np.multiply.reduce (#44793)

jbrockmendel · web-flow · commit ac79b7cba6bf · 2021-12-29T11:13:54.000-05:00
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -933,7 +933,7 @@ ExtensionArray
 ^^^^^^^^^^^^^^
 - Bug in :func:`array` failing to preserve :class:`PandasArray` (:issue:`43887`)
 - NumPy ufuncs ``np.abs``, ``np.positive``, ``np.negative`` now correctly preserve dtype when called on ExtensionArrays that implement ``__abs__, __pos__, __neg__``, respectively. In particular this is fixed for :class:`TimedeltaArray` (:issue:`43899`, :issue:`23316`)
-- NumPy ufuncs ``np.minimum.reduce`` and ``np.maximum.reduce`` now work correctly instead of raising ``NotImplementedError`` on :class:`Series` with ``IntegerDtype`` or ``FloatDtype`` (:issue:`43923`)
+- NumPy ufuncs ``np.minimum.reduce`` ``np.maximum.reduce``, ``np.add.reduce``, and ``np.prod.reduce`` now work correctly instead of raising ``NotImplementedError`` on :class:`Series` with ``IntegerDtype`` or ``FloatDtype`` (:issue:`43923`, :issue:`44793`)
 - Avoid raising ``PerformanceWarning`` about fragmented DataFrame when using many columns with an extension dtype (:issue:`44098`)
 - Bug in :class:`IntegerArray` and :class:`FloatingArray` construction incorrectly coercing mismatched NA values (e.g. ``np.timedelta64("NaT")``) to numeric NA (:issue:`44514`)
 - Bug in :meth:`BooleanArray.__eq__` and :meth:`BooleanArray.__ne__` raising ``TypeError`` on comparison with an incompatible type (like a string). This caused :meth:`DataFrame.replace` to sometimes raise a ``TypeError`` if a nullable boolean column was included (:issue:`44499`)
diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -404,6 +404,18 @@ def index_or_series_or_array(request):
     return request.param
 
 
+@pytest.fixture(params=[Index, Series, DataFrame, pd.array], ids=lambda x: x.__name__)
+def box_with_array(request):
+    """
+    Fixture to test behavior for Index, Series, DataFrame, and pandas Array
+    classes
+    """
+    return request.param
+
+
+box_with_array2 = box_with_array
+
+
 @pytest.fixture
 def dict_subclass():
     """
diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py
@@ -25,6 +25,8 @@
 REDUCTION_ALIASES = {
     "maximum": "max",
     "minimum": "min",
+    "add": "sum",
+    "multiply": "prod",
 }
 
 
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
@@ -419,9 +419,6 @@ def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
     def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
         # For MaskedArray inputs, we apply the ufunc to ._data
         # and mask the result.
-        if method == "reduce" and ufunc not in [np.maximum, np.minimum]:
-            # Not clear how to handle missing values in reductions. Raise.
-            raise NotImplementedError("The 'reduce' method is not supported.")
 
         out = kwargs.get("out", ())
 
@@ -482,6 +479,11 @@ def reconstruct(x):
         if ufunc.nout > 1:
             # e.g. np.divmod
             return tuple(reconstruct(x) for x in result)
+        elif method == "reduce":
+            # e.g. np.add.reduce; test_ufunc_reduce_raises
+            if self._mask.any():
+                return self._na_value
+            return result
         else:
             return reconstruct(result)
 
diff --git a/pandas/tests/arithmetic/conftest.py b/pandas/tests/arithmetic/conftest.py
@@ -221,17 +221,6 @@ def mismatched_freq(request):
 # ------------------------------------------------------------------
 
 
-@pytest.fixture(
-    params=[pd.Index, pd.Series, pd.DataFrame, pd.array], ids=lambda x: x.__name__
-)
-def box_with_array(request):
-    """
-    Fixture to test behavior for Index, Series, DataFrame, and pandas Array
-    classes
-    """
-    return request.param
-
-
 @pytest.fixture(
     params=[pd.Index, pd.Series, tm.to_array, np.array, list], ids=lambda x: x.__name__
 )
@@ -241,7 +230,3 @@ def box_1d_array(request):
     classes
     """
     return request.param
-
-
-# alias so we can use the same fixture for multiple parameters in a test
-box_with_array2 = box_with_array
diff --git a/pandas/tests/arrays/boolean/test_function.py b/pandas/tests/arrays/boolean/test_function.py
@@ -79,10 +79,14 @@ def test_ufunc_numeric():
 
 @pytest.mark.parametrize("values", [[True, False], [True, None]])
 def test_ufunc_reduce_raises(values):
-    a = pd.array(values, dtype="boolean")
-    msg = "The 'reduce' method is not supported"
-    with pytest.raises(NotImplementedError, match=msg):
-        np.add.reduce(a)
+    arr = pd.array(values, dtype="boolean")
+
+    res = np.add.reduce(arr)
+    if arr[-1] is pd.NA:
+        expected = pd.NA
+    else:
+        expected = arr._data.sum()
+    tm.assert_almost_equal(res, expected)
 
 
 def test_value_counts_na():
diff --git a/pandas/tests/arrays/floating/test_function.py b/pandas/tests/arrays/floating/test_function.py
@@ -67,10 +67,11 @@ def test_ufuncs_binary_float(ufunc):
 
 @pytest.mark.parametrize("values", [[0, 1], [0, None]])
 def test_ufunc_reduce_raises(values):
-    a = pd.array(values, dtype="Float64")
-    msg = r"The 'reduce' method is not supported."
-    with pytest.raises(NotImplementedError, match=msg):
-        np.add.reduce(a)
+    arr = pd.array(values, dtype="Float64")
+
+    res = np.add.reduce(arr)
+    expected = arr.sum(skipna=False)
+    tm.assert_almost_equal(res, expected)
 
 
 @pytest.mark.skipif(not IS64, reason="GH 36579: fail on 32-bit system")
diff --git a/pandas/tests/arrays/integer/test_function.py b/pandas/tests/arrays/integer/test_function.py
@@ -79,10 +79,11 @@ def test_ufunc_binary_output():
 
 @pytest.mark.parametrize("values", [[0, 1], [0, None]])
 def test_ufunc_reduce_raises(values):
-    a = pd.array(values)
-    msg = r"The 'reduce' method is not supported."
-    with pytest.raises(NotImplementedError, match=msg):
-        np.add.reduce(a)
+    arr = pd.array(values)
+
+    res = np.add.reduce(arr)
+    expected = arr.sum(skipna=False)
+    tm.assert_almost_equal(res, expected)
 
 
 @pytest.mark.parametrize(
diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py
@@ -249,9 +249,8 @@ def __add__(self, other):
     tm.assert_series_equal(np.add(ser, Dummy(1)), pd.Series(np.add(ser, Dummy(1))))
 
 
-@pytest.mark.parametrize(
-    "values",
-    [
+@pytest.fixture(
+    params=[
         pd.array([1, 3, 2], dtype=np.int64),
         pd.array([1, 3, 2], dtype="Int64"),
         pd.array([1, 3, 2], dtype="Float32"),
@@ -264,41 +263,121 @@ def __add__(self, other):
     ],
     ids=lambda x: str(x.dtype),
 )
-@pytest.mark.parametrize("box", [pd.array, pd.Index, pd.Series, pd.DataFrame])
-def test_reduce(values, box, request):
-    # TODO: cases with NAs
+def values_for_np_reduce(request):
+    # min/max tests assume that these are monotonic increasing
+    return request.param
 
-    same_type = True
 
-    if box is pd.Index:
-        if values.dtype.kind in ["i", "f"]:
+class TestNumpyReductions:
+    # TODO: cases with NAs, axis kwarg for DataFrame
+
+    def test_multiply(self, values_for_np_reduce, box_with_array, request):
+        box = box_with_array
+        values = values_for_np_reduce
+
+        obj = box(values)
+
+        if isinstance(values, pd.core.arrays.SparseArray) and box is not pd.Index:
+            mark = pytest.mark.xfail(reason="SparseArray has no 'mul'")
+            request.node.add_marker(mark)
+
+        if values.dtype.kind in "iuf":
+            result = np.multiply.reduce(obj)
+            if box is pd.DataFrame:
+                expected = obj.prod(numeric_only=False)
+                tm.assert_series_equal(result, expected)
+            elif box is pd.Index:
+                # Int64Index, Index has no 'prod'
+                expected = obj._values.prod()
+                assert result == expected
+            else:
+
+                expected = obj.prod()
+                assert result == expected
+        else:
+            msg = "|".join(
+                [
+                    "does not support reduction",
+                    "unsupported operand type",
+                    "ufunc 'multiply' cannot use operands",
+                ]
+            )
+            with pytest.raises(TypeError, match=msg):
+                np.multiply.reduce(obj)
+
+    def test_add(self, values_for_np_reduce, box_with_array):
+        box = box_with_array
+        values = values_for_np_reduce
+
+        obj = box(values)
+
+        if values.dtype.kind in "miuf":
+            result = np.add.reduce(obj)
+            if box is pd.DataFrame:
+                expected = obj.sum(numeric_only=False)
+                tm.assert_series_equal(result, expected)
+            elif box is pd.Index:
+                # Int64Index, Index has no 'sum'
+                expected = obj._values.sum()
+                assert result == expected
+            else:
+                expected = obj.sum()
+                assert result == expected
+        else:
+            msg = "|".join(
+                [
+                    "does not support reduction",
+                    "unsupported operand type",
+                    "ufunc 'add' cannot use operands",
+                ]
+            )
+            with pytest.raises(TypeError, match=msg):
+                np.add.reduce(obj)
+
+    def test_max(self, values_for_np_reduce, box_with_array):
+        box = box_with_array
+        values = values_for_np_reduce
+
+        same_type = True
+        if box is pd.Index and values.dtype.kind in ["i", "f"]:
             # ATM Index casts to object, so we get python ints/floats
             same_type = False
 
-    obj = box(values)
+        obj = box(values)
 
-    result = np.maximum.reduce(obj)
-    expected = values[1]
-    if box is pd.DataFrame:
-        # TODO: cases with axis kwarg
-        expected = obj.max(numeric_only=False)
-        tm.assert_series_equal(result, expected)
-    else:
-        assert result == expected
-        if same_type:
-            # check we have e.g. Timestamp instead of dt64
-            assert type(result) == type(expected)
-
-    result = np.minimum.reduce(obj)
-    expected = values[0]
-    if box is pd.DataFrame:
-        expected = obj.min(numeric_only=False)
-        tm.assert_series_equal(result, expected)
-    else:
-        assert result == expected
-        if same_type:
-            # check we have e.g. Timestamp instead of dt64
-            assert type(result) == type(expected)
+        result = np.maximum.reduce(obj)
+        if box is pd.DataFrame:
+            # TODO: cases with axis kwarg
+            expected = obj.max(numeric_only=False)
+            tm.assert_series_equal(result, expected)
+        else:
+            expected = values[1]
+            assert result == expected
+            if same_type:
+                # check we have e.g. Timestamp instead of dt64
+                assert type(result) == type(expected)
+
+    def test_min(self, values_for_np_reduce, box_with_array):
+        box = box_with_array
+        values = values_for_np_reduce
+
+        same_type = True
+        if box is pd.Index and values.dtype.kind in ["i", "f"]:
+            # ATM Index casts to object, so we get python ints/floats
+            same_type = False
+
+        obj = box(values)
+
+        result = np.minimum.reduce(obj)
+        if box is pd.DataFrame:
+            expected = obj.min(numeric_only=False)
+            tm.assert_series_equal(result, expected)
+        else:
+            expected = values[0]
+            assert result == expected
+            if same_type:
+                # check we have e.g. Timestamp instead of dt64
+                assert type(result) == type(expected)
 
 
 @pytest.mark.parametrize("type_", [list, deque, tuple])

Original file line number	Diff line number	Diff line change
`@@ -25,6 +25,8 @@`
`25`	`25`	`REDUCTION_ALIASES = {`
`26`	`26`	`"maximum": "max",`
`27`	`27`	`"minimum": "min",`
	`28`	`+ "add": "sum",`
	`29`	`+ "multiply": "prod",`
`28`	`30`	`}`
`29`	`31`
`30`	`32`