Skip to content

Commit ac79b7c

Browse files
authored
ENH: support np.add.reduce, np.multiply.reduce (#44793)
1 parent af76bd5 commit ac79b7c

File tree

9 files changed

+149
-63
lines changed

9 files changed

+149
-63
lines changed

doc/source/whatsnew/v1.4.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -933,7 +933,7 @@ ExtensionArray
933933
^^^^^^^^^^^^^^
934934
- Bug in :func:`array` failing to preserve :class:`PandasArray` (:issue:`43887`)
935935
- NumPy ufuncs ``np.abs``, ``np.positive``, ``np.negative`` now correctly preserve dtype when called on ExtensionArrays that implement ``__abs__, __pos__, __neg__``, respectively. In particular this is fixed for :class:`TimedeltaArray` (:issue:`43899`, :issue:`23316`)
936-
- NumPy ufuncs ``np.minimum.reduce`` and ``np.maximum.reduce`` now work correctly instead of raising ``NotImplementedError`` on :class:`Series` with ``IntegerDtype`` or ``FloatDtype`` (:issue:`43923`)
936+
- NumPy ufuncs ``np.minimum.reduce`` ``np.maximum.reduce``, ``np.add.reduce``, and ``np.prod.reduce`` now work correctly instead of raising ``NotImplementedError`` on :class:`Series` with ``IntegerDtype`` or ``FloatDtype`` (:issue:`43923`, :issue:`44793`)
937937
- Avoid raising ``PerformanceWarning`` about fragmented DataFrame when using many columns with an extension dtype (:issue:`44098`)
938938
- Bug in :class:`IntegerArray` and :class:`FloatingArray` construction incorrectly coercing mismatched NA values (e.g. ``np.timedelta64("NaT")``) to numeric NA (:issue:`44514`)
939939
- Bug in :meth:`BooleanArray.__eq__` and :meth:`BooleanArray.__ne__` raising ``TypeError`` on comparison with an incompatible type (like a string). This caused :meth:`DataFrame.replace` to sometimes raise a ``TypeError`` if a nullable boolean column was included (:issue:`44499`)

pandas/conftest.py

+12
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,18 @@ def index_or_series_or_array(request):
404404
return request.param
405405

406406

407+
@pytest.fixture(params=[Index, Series, DataFrame, pd.array], ids=lambda x: x.__name__)
408+
def box_with_array(request):
409+
"""
410+
Fixture to test behavior for Index, Series, DataFrame, and pandas Array
411+
classes
412+
"""
413+
return request.param
414+
415+
416+
box_with_array2 = box_with_array
417+
418+
407419
@pytest.fixture
408420
def dict_subclass():
409421
"""

pandas/core/arraylike.py

+2
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
REDUCTION_ALIASES = {
2626
"maximum": "max",
2727
"minimum": "min",
28+
"add": "sum",
29+
"multiply": "prod",
2830
}
2931

3032

pandas/core/arrays/masked.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -419,9 +419,6 @@ def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
419419
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
420420
# For MaskedArray inputs, we apply the ufunc to ._data
421421
# and mask the result.
422-
if method == "reduce" and ufunc not in [np.maximum, np.minimum]:
423-
# Not clear how to handle missing values in reductions. Raise.
424-
raise NotImplementedError("The 'reduce' method is not supported.")
425422

426423
out = kwargs.get("out", ())
427424

@@ -482,6 +479,11 @@ def reconstruct(x):
482479
if ufunc.nout > 1:
483480
# e.g. np.divmod
484481
return tuple(reconstruct(x) for x in result)
482+
elif method == "reduce":
483+
# e.g. np.add.reduce; test_ufunc_reduce_raises
484+
if self._mask.any():
485+
return self._na_value
486+
return result
485487
else:
486488
return reconstruct(result)
487489

pandas/tests/arithmetic/conftest.py

-15
Original file line numberDiff line numberDiff line change
@@ -221,17 +221,6 @@ def mismatched_freq(request):
221221
# ------------------------------------------------------------------
222222

223223

224-
@pytest.fixture(
225-
params=[pd.Index, pd.Series, pd.DataFrame, pd.array], ids=lambda x: x.__name__
226-
)
227-
def box_with_array(request):
228-
"""
229-
Fixture to test behavior for Index, Series, DataFrame, and pandas Array
230-
classes
231-
"""
232-
return request.param
233-
234-
235224
@pytest.fixture(
236225
params=[pd.Index, pd.Series, tm.to_array, np.array, list], ids=lambda x: x.__name__
237226
)
@@ -241,7 +230,3 @@ def box_1d_array(request):
241230
classes
242231
"""
243232
return request.param
244-
245-
246-
# alias so we can use the same fixture for multiple parameters in a test
247-
box_with_array2 = box_with_array

pandas/tests/arrays/boolean/test_function.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -79,10 +79,14 @@ def test_ufunc_numeric():
7979

8080
@pytest.mark.parametrize("values", [[True, False], [True, None]])
8181
def test_ufunc_reduce_raises(values):
82-
a = pd.array(values, dtype="boolean")
83-
msg = "The 'reduce' method is not supported"
84-
with pytest.raises(NotImplementedError, match=msg):
85-
np.add.reduce(a)
82+
arr = pd.array(values, dtype="boolean")
83+
84+
res = np.add.reduce(arr)
85+
if arr[-1] is pd.NA:
86+
expected = pd.NA
87+
else:
88+
expected = arr._data.sum()
89+
tm.assert_almost_equal(res, expected)
8690

8791

8892
def test_value_counts_na():

pandas/tests/arrays/floating/test_function.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,11 @@ def test_ufuncs_binary_float(ufunc):
6767

6868
@pytest.mark.parametrize("values", [[0, 1], [0, None]])
6969
def test_ufunc_reduce_raises(values):
70-
a = pd.array(values, dtype="Float64")
71-
msg = r"The 'reduce' method is not supported."
72-
with pytest.raises(NotImplementedError, match=msg):
73-
np.add.reduce(a)
70+
arr = pd.array(values, dtype="Float64")
71+
72+
res = np.add.reduce(arr)
73+
expected = arr.sum(skipna=False)
74+
tm.assert_almost_equal(res, expected)
7475

7576

7677
@pytest.mark.skipif(not IS64, reason="GH 36579: fail on 32-bit system")

pandas/tests/arrays/integer/test_function.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -79,10 +79,11 @@ def test_ufunc_binary_output():
7979

8080
@pytest.mark.parametrize("values", [[0, 1], [0, None]])
8181
def test_ufunc_reduce_raises(values):
82-
a = pd.array(values)
83-
msg = r"The 'reduce' method is not supported."
84-
with pytest.raises(NotImplementedError, match=msg):
85-
np.add.reduce(a)
82+
arr = pd.array(values)
83+
84+
res = np.add.reduce(arr)
85+
expected = arr.sum(skipna=False)
86+
tm.assert_almost_equal(res, expected)
8687

8788

8889
@pytest.mark.parametrize(

pandas/tests/series/test_ufunc.py

+111-32
Original file line numberDiff line numberDiff line change
@@ -249,9 +249,8 @@ def __add__(self, other):
249249
tm.assert_series_equal(np.add(ser, Dummy(1)), pd.Series(np.add(ser, Dummy(1))))
250250

251251

252-
@pytest.mark.parametrize(
253-
"values",
254-
[
252+
@pytest.fixture(
253+
params=[
255254
pd.array([1, 3, 2], dtype=np.int64),
256255
pd.array([1, 3, 2], dtype="Int64"),
257256
pd.array([1, 3, 2], dtype="Float32"),
@@ -264,41 +263,121 @@ def __add__(self, other):
264263
],
265264
ids=lambda x: str(x.dtype),
266265
)
267-
@pytest.mark.parametrize("box", [pd.array, pd.Index, pd.Series, pd.DataFrame])
268-
def test_reduce(values, box, request):
269-
# TODO: cases with NAs
266+
def values_for_np_reduce(request):
267+
# min/max tests assume that these are monotonic increasing
268+
return request.param
270269

271-
same_type = True
272270

273-
if box is pd.Index:
274-
if values.dtype.kind in ["i", "f"]:
271+
class TestNumpyReductions:
272+
# TODO: cases with NAs, axis kwarg for DataFrame
273+
274+
def test_multiply(self, values_for_np_reduce, box_with_array, request):
275+
box = box_with_array
276+
values = values_for_np_reduce
277+
278+
obj = box(values)
279+
280+
if isinstance(values, pd.core.arrays.SparseArray) and box is not pd.Index:
281+
mark = pytest.mark.xfail(reason="SparseArray has no 'mul'")
282+
request.node.add_marker(mark)
283+
284+
if values.dtype.kind in "iuf":
285+
result = np.multiply.reduce(obj)
286+
if box is pd.DataFrame:
287+
expected = obj.prod(numeric_only=False)
288+
tm.assert_series_equal(result, expected)
289+
elif box is pd.Index:
290+
# Int64Index, Index has no 'prod'
291+
expected = obj._values.prod()
292+
assert result == expected
293+
else:
294+
295+
expected = obj.prod()
296+
assert result == expected
297+
else:
298+
msg = "|".join(
299+
[
300+
"does not support reduction",
301+
"unsupported operand type",
302+
"ufunc 'multiply' cannot use operands",
303+
]
304+
)
305+
with pytest.raises(TypeError, match=msg):
306+
np.multiply.reduce(obj)
307+
308+
def test_add(self, values_for_np_reduce, box_with_array):
309+
box = box_with_array
310+
values = values_for_np_reduce
311+
312+
obj = box(values)
313+
314+
if values.dtype.kind in "miuf":
315+
result = np.add.reduce(obj)
316+
if box is pd.DataFrame:
317+
expected = obj.sum(numeric_only=False)
318+
tm.assert_series_equal(result, expected)
319+
elif box is pd.Index:
320+
# Int64Index, Index has no 'sum'
321+
expected = obj._values.sum()
322+
assert result == expected
323+
else:
324+
expected = obj.sum()
325+
assert result == expected
326+
else:
327+
msg = "|".join(
328+
[
329+
"does not support reduction",
330+
"unsupported operand type",
331+
"ufunc 'add' cannot use operands",
332+
]
333+
)
334+
with pytest.raises(TypeError, match=msg):
335+
np.add.reduce(obj)
336+
337+
def test_max(self, values_for_np_reduce, box_with_array):
338+
box = box_with_array
339+
values = values_for_np_reduce
340+
341+
same_type = True
342+
if box is pd.Index and values.dtype.kind in ["i", "f"]:
275343
# ATM Index casts to object, so we get python ints/floats
276344
same_type = False
277345

278-
obj = box(values)
346+
obj = box(values)
279347

280-
result = np.maximum.reduce(obj)
281-
expected = values[1]
282-
if box is pd.DataFrame:
283-
# TODO: cases with axis kwarg
284-
expected = obj.max(numeric_only=False)
285-
tm.assert_series_equal(result, expected)
286-
else:
287-
assert result == expected
288-
if same_type:
289-
# check we have e.g. Timestamp instead of dt64
290-
assert type(result) == type(expected)
291-
292-
result = np.minimum.reduce(obj)
293-
expected = values[0]
294-
if box is pd.DataFrame:
295-
expected = obj.min(numeric_only=False)
296-
tm.assert_series_equal(result, expected)
297-
else:
298-
assert result == expected
299-
if same_type:
300-
# check we have e.g. Timestamp instead of dt64
301-
assert type(result) == type(expected)
348+
result = np.maximum.reduce(obj)
349+
if box is pd.DataFrame:
350+
# TODO: cases with axis kwarg
351+
expected = obj.max(numeric_only=False)
352+
tm.assert_series_equal(result, expected)
353+
else:
354+
expected = values[1]
355+
assert result == expected
356+
if same_type:
357+
# check we have e.g. Timestamp instead of dt64
358+
assert type(result) == type(expected)
359+
360+
def test_min(self, values_for_np_reduce, box_with_array):
361+
box = box_with_array
362+
values = values_for_np_reduce
363+
364+
same_type = True
365+
if box is pd.Index and values.dtype.kind in ["i", "f"]:
366+
# ATM Index casts to object, so we get python ints/floats
367+
same_type = False
368+
369+
obj = box(values)
370+
371+
result = np.minimum.reduce(obj)
372+
if box is pd.DataFrame:
373+
expected = obj.min(numeric_only=False)
374+
tm.assert_series_equal(result, expected)
375+
else:
376+
expected = values[0]
377+
assert result == expected
378+
if same_type:
379+
# check we have e.g. Timestamp instead of dt64
380+
assert type(result) == type(expected)
302381

303382

304383
@pytest.mark.parametrize("type_", [list, deque, tuple])

0 commit comments

Comments
 (0)