Skip to content

ENH: support np.add.reduce, np.multiply.reduce #44793

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -928,7 +928,7 @@ ExtensionArray
^^^^^^^^^^^^^^
- Bug in :func:`array` failing to preserve :class:`PandasArray` (:issue:`43887`)
- NumPy ufuncs ``np.abs``, ``np.positive``, ``np.negative`` now correctly preserve dtype when called on ExtensionArrays that implement ``__abs__, __pos__, __neg__``, respectively. In particular this is fixed for :class:`TimedeltaArray` (:issue:`43899`, :issue:`23316`)
- NumPy ufuncs ``np.minimum.reduce`` and ``np.maximum.reduce`` now work correctly instead of raising ``NotImplementedError`` on :class:`Series` with ``IntegerDtype`` or ``FloatDtype`` (:issue:`43923`)
- NumPy ufuncs ``np.minimum.reduce`` ``np.maximum.reduce``, ``np.add.reduce``, and ``np.prod.reduce`` now work correctly instead of raising ``NotImplementedError`` on :class:`Series` with ``IntegerDtype`` or ``FloatDtype`` (:issue:`43923`, :issue:`44793`)
- Avoid raising ``PerformanceWarning`` about fragmented DataFrame when using many columns with an extension dtype (:issue:`44098`)
- Bug in :class:`IntegerArray` and :class:`FloatingArray` construction incorrectly coercing mismatched NA values (e.g. ``np.timedelta64("NaT")``) to numeric NA (:issue:`44514`)
- Bug in :meth:`BooleanArray.__eq__` and :meth:`BooleanArray.__ne__` raising ``TypeError`` on comparison with an incompatible type (like a string). This caused :meth:`DataFrame.replace` to sometimes raise a ``TypeError`` if a nullable boolean column was included (:issue:`44499`)
Expand Down
12 changes: 12 additions & 0 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,18 @@ def index_or_series_or_array(request):
return request.param


@pytest.fixture(params=[Index, Series, DataFrame, pd.array], ids=lambda x: x.__name__)
def box_with_array(request):
"""
Fixture to test behavior for Index, Series, DataFrame, and pandas Array
classes
"""
return request.param


box_with_array2 = box_with_array


@pytest.fixture
def dict_subclass():
"""
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/arraylike.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
REDUCTION_ALIASES = {
"maximum": "max",
"minimum": "min",
"add": "sum",
"multiply": "prod",
}


Expand Down
8 changes: 5 additions & 3 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,9 +419,6 @@ def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
# For MaskedArray inputs, we apply the ufunc to ._data
# and mask the result.
if method == "reduce" and ufunc not in [np.maximum, np.minimum]:
# Not clear how to handle missing values in reductions. Raise.
raise NotImplementedError("The 'reduce' method is not supported.")

out = kwargs.get("out", ())

Expand Down Expand Up @@ -482,6 +479,11 @@ def reconstruct(x):
if ufunc.nout > 1:
# e.g. np.divmod
return tuple(reconstruct(x) for x in result)
elif method == "reduce":
# e.g. np.add.reduce; test_ufunc_reduce_raises
if self._mask.any():
return self._na_value
return result
else:
return reconstruct(result)

Expand Down
15 changes: 0 additions & 15 deletions pandas/tests/arithmetic/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,17 +221,6 @@ def mismatched_freq(request):
# ------------------------------------------------------------------


@pytest.fixture(
params=[pd.Index, pd.Series, pd.DataFrame, pd.array], ids=lambda x: x.__name__
)
def box_with_array(request):
"""
Fixture to test behavior for Index, Series, DataFrame, and pandas Array
classes
"""
return request.param


@pytest.fixture(
params=[pd.Index, pd.Series, tm.to_array, np.array, list], ids=lambda x: x.__name__
)
Expand All @@ -241,7 +230,3 @@ def box_1d_array(request):
classes
"""
return request.param


# alias so we can use the same fixture for multiple parameters in a test
box_with_array2 = box_with_array
12 changes: 8 additions & 4 deletions pandas/tests/arrays/boolean/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,14 @@ def test_ufunc_numeric():

@pytest.mark.parametrize("values", [[True, False], [True, None]])
def test_ufunc_reduce_raises(values):
a = pd.array(values, dtype="boolean")
msg = "The 'reduce' method is not supported"
with pytest.raises(NotImplementedError, match=msg):
np.add.reduce(a)
arr = pd.array(values, dtype="boolean")

res = np.add.reduce(arr)
if arr[-1] is pd.NA:
expected = pd.NA
else:
expected = arr._data.sum()
tm.assert_almost_equal(res, expected)


def test_value_counts_na():
Expand Down
9 changes: 5 additions & 4 deletions pandas/tests/arrays/floating/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,11 @@ def test_ufuncs_binary_float(ufunc):

@pytest.mark.parametrize("values", [[0, 1], [0, None]])
def test_ufunc_reduce_raises(values):
a = pd.array(values, dtype="Float64")
msg = r"The 'reduce' method is not supported."
with pytest.raises(NotImplementedError, match=msg):
np.add.reduce(a)
arr = pd.array(values, dtype="Float64")

res = np.add.reduce(arr)
expected = arr.sum(skipna=False)
tm.assert_almost_equal(res, expected)


@pytest.mark.skipif(not IS64, reason="GH 36579: fail on 32-bit system")
Expand Down
9 changes: 5 additions & 4 deletions pandas/tests/arrays/integer/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,11 @@ def test_ufunc_binary_output():

@pytest.mark.parametrize("values", [[0, 1], [0, None]])
def test_ufunc_reduce_raises(values):
a = pd.array(values)
msg = r"The 'reduce' method is not supported."
with pytest.raises(NotImplementedError, match=msg):
np.add.reduce(a)
arr = pd.array(values)

res = np.add.reduce(arr)
expected = arr.sum(skipna=False)
tm.assert_almost_equal(res, expected)


@pytest.mark.parametrize(
Expand Down
143 changes: 111 additions & 32 deletions pandas/tests/series/test_ufunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,9 +249,8 @@ def __add__(self, other):
tm.assert_series_equal(np.add(ser, Dummy(1)), pd.Series(np.add(ser, Dummy(1))))


@pytest.mark.parametrize(
"values",
[
@pytest.fixture(
params=[
pd.array([1, 3, 2], dtype=np.int64),
pd.array([1, 3, 2], dtype="Int64"),
pd.array([1, 3, 2], dtype="Float32"),
Expand All @@ -264,41 +263,121 @@ def __add__(self, other):
],
ids=lambda x: str(x.dtype),
)
@pytest.mark.parametrize("box", [pd.array, pd.Index, pd.Series, pd.DataFrame])
def test_reduce(values, box, request):
# TODO: cases with NAs
def values_for_np_reduce(request):
# min/max tests assume that these are monotonic increasing
return request.param

same_type = True

if box is pd.Index:
if values.dtype.kind in ["i", "f"]:
class TestNumpyReductions:
# TODO: cases with NAs, axis kwarg for DataFrame

def test_multiply(self, values_for_np_reduce, box_with_array, request):
box = box_with_array
values = values_for_np_reduce

obj = box(values)

if isinstance(values, pd.core.arrays.SparseArray) and box is not pd.Index:
mark = pytest.mark.xfail(reason="SparseArray has no 'mul'")
request.node.add_marker(mark)

if values.dtype.kind in "iuf":
result = np.multiply.reduce(obj)
if box is pd.DataFrame:
expected = obj.prod(numeric_only=False)
tm.assert_series_equal(result, expected)
elif box is pd.Index:
# Int64Index, Index has no 'prod'
expected = obj._values.prod()
assert result == expected
else:

expected = obj.prod()
assert result == expected
else:
msg = "|".join(
[
"does not support reduction",
"unsupported operand type",
"ufunc 'multiply' cannot use operands",
]
)
with pytest.raises(TypeError, match=msg):
np.multiply.reduce(obj)

def test_add(self, values_for_np_reduce, box_with_array):
box = box_with_array
values = values_for_np_reduce

obj = box(values)

if values.dtype.kind in "miuf":
result = np.add.reduce(obj)
if box is pd.DataFrame:
expected = obj.sum(numeric_only=False)
tm.assert_series_equal(result, expected)
elif box is pd.Index:
# Int64Index, Index has no 'sum'
expected = obj._values.sum()
assert result == expected
else:
expected = obj.sum()
assert result == expected
else:
msg = "|".join(
[
"does not support reduction",
"unsupported operand type",
"ufunc 'add' cannot use operands",
]
)
with pytest.raises(TypeError, match=msg):
np.add.reduce(obj)

def test_max(self, values_for_np_reduce, box_with_array):
box = box_with_array
values = values_for_np_reduce

same_type = True
if box is pd.Index and values.dtype.kind in ["i", "f"]:
# ATM Index casts to object, so we get python ints/floats
same_type = False

obj = box(values)
obj = box(values)

result = np.maximum.reduce(obj)
expected = values[1]
if box is pd.DataFrame:
# TODO: cases with axis kwarg
expected = obj.max(numeric_only=False)
tm.assert_series_equal(result, expected)
else:
assert result == expected
if same_type:
# check we have e.g. Timestamp instead of dt64
assert type(result) == type(expected)

result = np.minimum.reduce(obj)
expected = values[0]
if box is pd.DataFrame:
expected = obj.min(numeric_only=False)
tm.assert_series_equal(result, expected)
else:
assert result == expected
if same_type:
# check we have e.g. Timestamp instead of dt64
assert type(result) == type(expected)
result = np.maximum.reduce(obj)
if box is pd.DataFrame:
# TODO: cases with axis kwarg
expected = obj.max(numeric_only=False)
tm.assert_series_equal(result, expected)
else:
expected = values[1]
assert result == expected
if same_type:
# check we have e.g. Timestamp instead of dt64
assert type(result) == type(expected)

def test_min(self, values_for_np_reduce, box_with_array):
box = box_with_array
values = values_for_np_reduce

same_type = True
if box is pd.Index and values.dtype.kind in ["i", "f"]:
# ATM Index casts to object, so we get python ints/floats
same_type = False

obj = box(values)

result = np.minimum.reduce(obj)
if box is pd.DataFrame:
expected = obj.min(numeric_only=False)
tm.assert_series_equal(result, expected)
else:
expected = values[0]
assert result == expected
if same_type:
# check we have e.g. Timestamp instead of dt64
assert type(result) == type(expected)


@pytest.mark.parametrize("type_", [list, deque, tuple])
Expand Down