ENH: implement ExtensionArray.__array_ufunc__ (pandas-dev#43899)

jbrockmendel · rhshadrach · commit cdc7b4a86a1e · 2021-10-10T00:40:12.000-04:00
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -522,7 +522,7 @@ Sparse
 
 ExtensionArray
 ^^^^^^^^^^^^^^
--
+- NumPy ufuncs ``np.abs``, ``np.positive``, ``np.negative`` now correctly preserve dtype when called on ExtensionArrays that implement ``__abs__, __pos__, __neg__``, respectively. In particular this is fixed for :class:`TimedeltaArray` (:issue:`43899`)
 -
 
 Styler
diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py
@@ -371,6 +371,8 @@ def reconstruct(result):
         # * len(inputs) > 1 is doable when we know that we have
         #   aligned blocks / dtypes.
         inputs = tuple(np.asarray(x) for x in inputs)
+        # Note: we can't use default_array_ufunc here bc reindexing means
+        #  that `self` may not be among `inputs`
         result = getattr(ufunc, method)(*inputs, **kwargs)
     elif self.ndim == 1:
         # ufunc(series, ...)
@@ -387,7 +389,7 @@ def reconstruct(result):
         else:
             # otherwise specific ufunc methods (eg np.<ufunc>.accumulate(..))
             # Those can have an axis keyword and thus can't be called block-by-block
-            result = getattr(ufunc, method)(np.asarray(inputs[0]), **kwargs)
+            result = default_array_ufunc(inputs[0], ufunc, method, *inputs, **kwargs)
 
     result = reconstruct(result)
     return result
@@ -452,3 +454,19 @@ def _assign_where(out, result, where) -> None:
         out[:] = result
     else:
         np.putmask(out, where, result)
+
+
+def default_array_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
+    """
+    Fallback to the behavior we would get if we did not define __array_ufunc__.
+
+    Notes
+    -----
+    We are assuming that `self` is among `inputs`.
+    """
+    if not any(x is self for x in inputs):
+        raise NotImplementedError
+
+    new_inputs = [x if x is not self else np.asarray(x) for x in inputs]
+
+    return getattr(ufunc, method)(*new_inputs, **kwargs)
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -65,6 +65,7 @@
 from pandas.core.dtypes.missing import isna
 
 from pandas.core import (
+    arraylike,
     missing,
     ops,
 )
@@ -1366,6 +1367,20 @@ def _empty(cls, shape: Shape, dtype: ExtensionDtype):
             )
         return result
 
+    def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
+        if any(
+            isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)) for other in inputs
+        ):
+            return NotImplemented
+
+        result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
+            self, ufunc, method, *inputs, **kwargs
+        )
+        if result is not NotImplemented:
+            return result
+
+        return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs)
+
 
 class ExtensionOpsMixin:
     """
diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
@@ -604,3 +604,6 @@ def _maybe_mask_result(self, result, mask, other, op_name: str):
         else:
             result[mask] = np.nan
             return result
+
+    def __abs__(self):
+        return self.copy()
diff --git a/pandas/tests/arrays/boolean/test_ops.py b/pandas/tests/arrays/boolean/test_ops.py
@@ -18,3 +18,10 @@ def test_invert(self):
             {"A": expected, "B": [False, True, True]}, index=["a", "b", "c"]
         )
         tm.assert_frame_equal(result, expected)
+
+    def test_abs(self):
+        # matching numpy behavior, abs is the identity function
+        arr = pd.array([True, False, None], dtype="boolean")
+        result = abs(arr)
+
+        tm.assert_extension_array_equal(result, arr)
diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py
@@ -90,6 +90,19 @@ def test_abs(self):
         result = abs(arr)
         tm.assert_timedelta_array_equal(result, expected)
 
+        result2 = np.abs(arr)
+        tm.assert_timedelta_array_equal(result2, expected)
+
+    def test_pos(self):
+        vals = np.array([-3600 * 10 ** 9, "NaT", 7200 * 10 ** 9], dtype="m8[ns]")
+        arr = TimedeltaArray(vals)
+
+        result = +arr
+        tm.assert_timedelta_array_equal(result, arr)
+
+        result2 = np.positive(arr)
+        tm.assert_timedelta_array_equal(result2, arr)
+
     def test_neg(self):
         vals = np.array([-3600 * 10 ** 9, "NaT", 7200 * 10 ** 9], dtype="m8[ns]")
         arr = TimedeltaArray(vals)
@@ -100,6 +113,9 @@ def test_neg(self):
         result = -arr
         tm.assert_timedelta_array_equal(result, expected)
 
+        result2 = np.negative(arr)
+        tm.assert_timedelta_array_equal(result2, expected)
+
     def test_neg_freq(self):
         tdi = pd.timedelta_range("2 Days", periods=4, freq="H")
         arr = TimedeltaArray(tdi, freq=tdi.freq)
@@ -108,3 +124,6 @@ def test_neg_freq(self):
 
         result = -arr
         tm.assert_timedelta_array_equal(result, expected)
+
+        result2 = np.negative(arr)
+        tm.assert_timedelta_array_equal(result2, expected)
diff --git a/pandas/tests/extension/arrow/test_bool.py b/pandas/tests/extension/arrow/test_bool.py
@@ -54,7 +54,10 @@ def test_view(self, data):
         # __setitem__ does not work, so we only have a smoke-test
         data.view()
 
-    @pytest.mark.xfail(raises=AssertionError, reason="Not implemented yet")
+    @pytest.mark.xfail(
+        raises=AttributeError,
+        reason="__eq__ incorrectly returns bool instead of ndarray[bool]",
+    )
     def test_contains(self, data, data_missing):
         super().test_contains(data, data_missing)
 
diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import numpy as np
 import pytest
 
 import pandas as pd
@@ -128,11 +129,13 @@ class BaseComparisonOpsTests(BaseOpsUtil):
     """Various Series and DataFrame comparison ops methods."""
 
     def _compare_other(self, s, data, op_name, other):
+
         op = self.get_op_from_name(op_name)
-        if op_name == "__eq__":
-            assert not op(s, other).all()
-        elif op_name == "__ne__":
-            assert op(s, other).all()
+        if op_name in ["__eq__", "__ne__"]:
+            # comparison should match point-wise comparisons
+            result = op(s, other)
+            expected = s.combine(other, op)
+            self.assert_series_equal(result, expected)
 
         else:
 
@@ -182,3 +185,24 @@ def test_invert(self, data):
         result = ~s
         expected = pd.Series(~data, name="name")
         self.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize("ufunc", [np.positive, np.negative, np.abs])
+    def test_unary_ufunc_dunder_equivalence(self, data, ufunc):
+        # the dunder __pos__ works if and only if np.positive works,
+        #  same for __neg__/np.negative and __abs__/np.abs
+        attr = {np.positive: "__pos__", np.negative: "__neg__", np.abs: "__abs__"}[
+            ufunc
+        ]
+
+        exc = None
+        try:
+            result = getattr(data, attr)()
+        except Exception as err:
+            exc = err
+
+            # if __pos__ raised, then so should the ufunc
+            with pytest.raises((type(exc), TypeError)):
+                ufunc(data)
+        else:
+            alt = ufunc(data)
+            self.assert_extension_array_equal(result, alt)

Original file line number	Diff line number	Diff line change
`@@ -522,7 +522,7 @@ Sparse`
`522`	`522`
`523`	`523`	`ExtensionArray`
`524`	`524`	`^^^^^^^^^^^^^^`
`525`		`--`
	`525`	+- NumPy ufuncs ``np.abs``, ``np.positive``, ``np.negative`` now correctly preserve dtype when called on ExtensionArrays that implement ``__abs__, __pos__, __neg__``, respectively. In particular this is fixed for :class:`TimedeltaArray` (:issue:`43899`)
`526`	`526`	`-`
`527`	`527`
`528`	`528`	`Styler`