From fdb5bb4253d75d14ddbc681b783269bc849bb373 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 5 Oct 2021 19:15:09 -0700 Subject: [PATCH 1/7] ENH: implement ExtensionArray.__array_ufunc__ --- pandas/core/arraylike.py | 20 +++++++++++++- pandas/core/arrays/base.py | 15 +++++++++++ pandas/core/arrays/boolean.py | 3 +++ pandas/tests/arrays/boolean/test_ops.py | 7 +++++ pandas/tests/extension/arrow/test_bool.py | 5 +++- pandas/tests/extension/base/ops.py | 32 ++++++++++++++++++++--- 6 files changed, 76 insertions(+), 6 deletions(-) diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index f114278caf3ee..3d209189d97d8 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -371,6 +371,8 @@ def reconstruct(result): # * len(inputs) > 1 is doable when we know that we have # aligned blocks / dtypes. inputs = tuple(np.asarray(x) for x in inputs) + # Note: we can't use default_array_ufunc here bc reindexing means + # that `self` may not be among `inputs` result = getattr(ufunc, method)(*inputs, **kwargs) elif self.ndim == 1: # ufunc(series, ...) @@ -387,7 +389,7 @@ def reconstruct(result): else: # otherwise specific ufunc methods (eg np..accumulate(..)) # Those can have an axis keyword and thus can't be called block-by-block - result = getattr(ufunc, method)(np.asarray(inputs[0]), **kwargs) + result = default_array_ufunc(inputs[0], ufunc, method, *inputs, **kwargs) result = reconstruct(result) return result @@ -452,3 +454,19 @@ def _assign_where(out, result, where) -> None: out[:] = result else: np.putmask(out, where, result) + + +def default_array_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): + """ + Fallback to the behavior we would get if we did not define __array_ufunc__. + + Notes + ----- + We are assuming that `self` is among `inputs`. + """ + if not any(x is self for x in inputs): + raise NotImplementedError + + new_inputs = [x if x is not self else np.asarray(x) for x in inputs] + + return getattr(ufunc, method)(*new_inputs, **kwargs) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 8ee5a4a2d913a..b17f309e5f9fb 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -65,6 +65,7 @@ from pandas.core.dtypes.missing import isna from pandas.core import ( + arraylike, missing, ops, ) @@ -1366,6 +1367,20 @@ def _empty(cls, shape: Shape, dtype: ExtensionDtype): ) return result + def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): + if any( + isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)) for other in inputs + ): + return NotImplemented + + result = arraylike.maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + + return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs) + class ExtensionOpsMixin: """ diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 69896a389102f..1df7c191bdb68 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -604,3 +604,6 @@ def _maybe_mask_result(self, result, mask, other, op_name: str): else: result[mask] = np.nan return result + + def __abs__(self): + return self.copy() diff --git a/pandas/tests/arrays/boolean/test_ops.py b/pandas/tests/arrays/boolean/test_ops.py index 52f602258a049..95ebe8528c2e5 100644 --- a/pandas/tests/arrays/boolean/test_ops.py +++ b/pandas/tests/arrays/boolean/test_ops.py @@ -18,3 +18,10 @@ def test_invert(self): {"A": expected, "B": [False, True, True]}, index=["a", "b", "c"] ) tm.assert_frame_equal(result, expected) + + def test_abs(self): + # matching numpy behavior, abs is the identity function + arr = pd.array([True, False, None], dtype="boolean") + result = abs(arr) + + tm.assert_extension_array_equal(result, arr) diff --git a/pandas/tests/extension/arrow/test_bool.py b/pandas/tests/extension/arrow/test_bool.py index 6a16433aa0a32..d262f09182a9c 100644 --- a/pandas/tests/extension/arrow/test_bool.py +++ b/pandas/tests/extension/arrow/test_bool.py @@ -54,7 +54,10 @@ def test_view(self, data): # __setitem__ does not work, so we only have a smoke-test data.view() - @pytest.mark.xfail(raises=AssertionError, reason="Not implemented yet") + @pytest.mark.xfail( + raises=AttributeError, + reason="__eq__ incorrectly returns bool instead of ndarray[bool]", + ) def test_contains(self, data, data_missing): super().test_contains(data, data_missing) diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index ca22973d0b4d3..e9ceec3a3d7e6 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -1,5 +1,6 @@ from __future__ import annotations +import numpy as np import pytest import pandas as pd @@ -128,11 +129,13 @@ class BaseComparisonOpsTests(BaseOpsUtil): """Various Series and DataFrame comparison ops methods.""" def _compare_other(self, s, data, op_name, other): + op = self.get_op_from_name(op_name) - if op_name == "__eq__": - assert not op(s, other).all() - elif op_name == "__ne__": - assert op(s, other).all() + if op_name in ["__eq__", "__ne__"]: + # comparison should match point-wise comparisons + result = op(s, other) + expected = s.combine(other, op) + self.assert_series_equal(result, expected) else: @@ -182,3 +185,24 @@ def test_invert(self, data): result = ~s expected = pd.Series(~data, name="name") self.assert_series_equal(result, expected) + + @pytest.mark.parametrize("ufunc", [np.positive, np.negative, np.abs]) + def test_unary_ufunc_dunder_equivalence(self, data, ufunc): + # the dunder __pos__ works if and only if np.positive works, + # same for __neg__/np.negative and __abs__/np.abs + attr = {np.positive: "__pos__", np.negative: "__neg__", np.abs: "__abs__"}[ + ufunc + ] + + exc = None + try: + result = getattr(data, attr)() + except Exception as err: + exc = err + + # if __pos__ raised, then so should the ufunc + with pytest.raises((type(exc), TypeError)): + ufunc(data) + else: + alt = ufunc(data) + self.assert_extension_array_equal(result, alt) From e46e05b0a09af3797a5b0adb9d27aa99564d1d0c Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 5 Oct 2021 19:26:15 -0700 Subject: [PATCH 2/7] tests for TimedeltaArray --- pandas/tests/arrays/test_timedeltas.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index 9e2b8e0f1603e..98329776242f1 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -90,6 +90,19 @@ def test_abs(self): result = abs(arr) tm.assert_timedelta_array_equal(result, expected) + result2 = np.abs(arr) + tm.assert_timedelta_array_equal(result2, expected) + + def test_pos(self): + vals = np.array([-3600 * 10 ** 9, "NaT", 7200 * 10 ** 9], dtype="m8[ns]") + arr = TimedeltaArray(vals) + + result = +arr + tm.assert_timedelta_array_equal(result, arr) + + result2 = np.positive(arr) + tm.assert_timedelta_array_equal(result2, arr) + def test_neg(self): vals = np.array([-3600 * 10 ** 9, "NaT", 7200 * 10 ** 9], dtype="m8[ns]") arr = TimedeltaArray(vals) @@ -100,6 +113,9 @@ def test_neg(self): result = -arr tm.assert_timedelta_array_equal(result, expected) + result2 = np.negative(arr) + tm.assert_timedelta_array_equal(result2, expected) + def test_neg_freq(self): tdi = pd.timedelta_range("2 Days", periods=4, freq="H") arr = TimedeltaArray(tdi, freq=tdi.freq) @@ -108,3 +124,6 @@ def test_neg_freq(self): result = -arr tm.assert_timedelta_array_equal(result, expected) + + result2 = np.negative(arr) + tm.assert_timedelta_array_equal(result2, expected) From ec3100c8338b5441c5d5bc6cbcf61a30db91e44e Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 6 Oct 2021 16:05:35 -0700 Subject: [PATCH 3/7] ENH: implement Index.__array_ufunc__ --- pandas/core/arraylike.py | 4 ++-- pandas/core/arrays/base.py | 5 +++++ pandas/core/arrays/datetimelike.py | 4 ++-- pandas/core/indexes/base.py | 20 ++++++++++++++++++++ pandas/core/indexes/datetimelike.py | 9 --------- pandas/tests/arithmetic/test_datetime64.py | 15 ++++++++++++++- 6 files changed, 43 insertions(+), 14 deletions(-) diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index 3d209189d97d8..fe09a044566f8 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -357,7 +357,7 @@ def reconstruct(result): return result if "out" in kwargs: - result = _dispatch_ufunc_with_out(self, ufunc, method, *inputs, **kwargs) + result = dispatch_ufunc_with_out(self, ufunc, method, *inputs, **kwargs) return reconstruct(result) # We still get here with kwargs `axis` for e.g. np.maximum.accumulate @@ -410,7 +410,7 @@ def _standardize_out_kwarg(**kwargs) -> dict: return kwargs -def _dispatch_ufunc_with_out(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): +def dispatch_ufunc_with_out(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): """ If we have an `out` keyword, then call the ufunc without `out` and then set the result into the given `out`. diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index b17f309e5f9fb..46b0a6873986e 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1379,6 +1379,11 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): if result is not NotImplemented: return result + if "out" in kwargs: + return arraylike.dispatch_ufunc_with_out( + self, ufunc, method, *inputs, **kwargs + ) + return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 2c9796e826825..1f42463cb9f2d 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1414,7 +1414,7 @@ def __iadd__(self, other): if not is_period_dtype(self.dtype): # restore freq, which is invalidated by setitem - self._freq = result._freq + self._freq = result.freq return self def __isub__(self, other): @@ -1423,7 +1423,7 @@ def __isub__(self, other): if not is_period_dtype(self.dtype): # restore freq, which is invalidated by setitem - self._freq = result._freq + self._freq = result.freq return self # -------------------------------------------------------------- diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 2b49a88e27961..cb36884d201a6 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -102,6 +102,7 @@ PeriodDtype, ) from pandas.core.dtypes.generic import ( + ABCDataFrame, ABCDatetimeIndex, ABCMultiIndex, ABCPeriodIndex, @@ -116,6 +117,7 @@ ) from pandas.core import ( + arraylike, missing, ops, ) @@ -844,6 +846,24 @@ def __array__(self, dtype=None) -> np.ndarray: """ return np.asarray(self._data, dtype=dtype) + def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): + if any(isinstance(other, (ABCSeries, ABCDataFrame)) for other in inputs): + return NotImplemented + + result = arraylike.maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + + new_inputs = [x if x is not self else x._values for x in inputs] + result = getattr(ufunc, method)(*new_inputs, **kwargs) + if ufunc.nout == 2: + # i.e. np.divmod, np.modf, np.frexp + return tuple(self.__array_wrap__(x) for x in result) + + return self.__array_wrap__(result) + def __array_wrap__(self, result, context=None): """ Gets called after a ufunc and other functions. diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 063bb4aafeb75..48171bdef24fd 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -672,15 +672,6 @@ def insert(self, loc: int, item): # -------------------------------------------------------------------- # NDArray-Like Methods - def __array_wrap__(self, result, context=None): - """ - Gets called after a ufunc and other functions. - """ - out = super().__array_wrap__(result, context=context) - if isinstance(out, DatetimeTimedeltaMixin) and self.freq is not None: - out = out._with_freq("infer") - return out - @Appender(_index_shared_docs["take"] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take((), kwargs) diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 60a58b7bbea78..da93158d76525 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -2163,6 +2163,15 @@ def test_dti_isub_tdi(self, tz_naive_fixture): result -= tdi tm.assert_index_equal(result, expected) + # DTA.__isub__ + dta = dti._data.copy() + dta -= tdi + tm.assert_datetime_array_equal(dta, expected._data) + + out = dti._data.copy() + np.subtract(out, tdi, out=out) + tm.assert_datetime_array_equal(out, expected._data) + msg = "cannot subtract .* from a TimedeltaArray" with pytest.raises(TypeError, match=msg): tdi -= dti @@ -2172,10 +2181,14 @@ def test_dti_isub_tdi(self, tz_naive_fixture): result -= tdi.values tm.assert_index_equal(result, expected) - msg = "cannot subtract a datelike from a TimedeltaArray" + msg = "cannot subtract DatetimeArray from ndarray" with pytest.raises(TypeError, match=msg): tdi.values -= dti + msg = "cannot subtract a datelike from a TimedeltaArray" + with pytest.raises(TypeError, match=msg): + tdi._values -= dti + # ------------------------------------------------------------- # Binary Operations DatetimeIndex and datetime-like # TODO: A couple other tests belong in this section. Move them in From 406fe87d10017a999fe099a7885d960199b31d8e Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 6 Oct 2021 16:11:24 -0700 Subject: [PATCH 4/7] whatsnew --- doc/source/whatsnew/v1.4.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 22b49c35e0e68..e48108a284147 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -390,6 +390,7 @@ Datetimelike - Bug in :func:`to_datetime` with ``format`` and ``pandas.NA`` was raising ``ValueError`` (:issue:`42957`) - :func:`to_datetime` would silently swap ``MM/DD/YYYY`` and ``DD/MM/YYYY`` formats if the given ``dayfirst`` option could not be respected - now, a warning is raised in the case of delimited date strings (e.g. ``31-12-2012``) (:issue:`12585`) - Bug in :meth:`date_range` and :meth:`bdate_range` do not return right bound when ``start`` = ``end`` and set is closed on one side (:issue:`43394`) +- Bug in inplace inplace addition and subtraction of :class:`DatetimeIndex` or :class:`TimedeltaIndex` with :class:`DatetimeArray` or :class:`TimedeltaArray` (:issue:`??`) - Timedelta From d017603824ff3b63866233fd23b57c4d70bf7730 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 6 Oct 2021 16:12:46 -0700 Subject: [PATCH 5/7] GH ref --- doc/source/whatsnew/v1.4.0.rst | 2 +- pandas/tests/arithmetic/test_datetime64.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index e48108a284147..40784cb6b5d5c 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -390,7 +390,7 @@ Datetimelike - Bug in :func:`to_datetime` with ``format`` and ``pandas.NA`` was raising ``ValueError`` (:issue:`42957`) - :func:`to_datetime` would silently swap ``MM/DD/YYYY`` and ``DD/MM/YYYY`` formats if the given ``dayfirst`` option could not be respected - now, a warning is raised in the case of delimited date strings (e.g. ``31-12-2012``) (:issue:`12585`) - Bug in :meth:`date_range` and :meth:`bdate_range` do not return right bound when ``start`` = ``end`` and set is closed on one side (:issue:`43394`) -- Bug in inplace inplace addition and subtraction of :class:`DatetimeIndex` or :class:`TimedeltaIndex` with :class:`DatetimeArray` or :class:`TimedeltaArray` (:issue:`??`) +- Bug in inplace inplace addition and subtraction of :class:`DatetimeIndex` or :class:`TimedeltaIndex` with :class:`DatetimeArray` or :class:`TimedeltaArray` (:issue:`43904`) - Timedelta diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index da93158d76525..0d3f7dcaaf65b 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -2163,7 +2163,7 @@ def test_dti_isub_tdi(self, tz_naive_fixture): result -= tdi tm.assert_index_equal(result, expected) - # DTA.__isub__ + # DTA.__isub__ GH#43904 dta = dti._data.copy() dta -= tdi tm.assert_datetime_array_equal(dta, expected._data) From 72b1ef8c56335f485c8ab9726be8bd473de5d5bd Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 6 Oct 2021 18:00:59 -0700 Subject: [PATCH 6/7] mypy fixup --- pandas/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index cb36884d201a6..da953fe46ef1d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -846,7 +846,7 @@ def __array__(self, dtype=None) -> np.ndarray: """ return np.asarray(self._data, dtype=dtype) - def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): + def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs): if any(isinstance(other, (ABCSeries, ABCDataFrame)) for other in inputs): return NotImplemented From caf1b223c9e2cc07f74dd60a6733efe8a6f1e7d8 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 6 Oct 2021 22:17:19 -0700 Subject: [PATCH 7/7] Update doc/source/whatsnew/v1.4.0.rst Co-authored-by: Matthew Zeitlin <37011898+mzeitlin11@users.noreply.github.com> --- doc/source/whatsnew/v1.4.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 40784cb6b5d5c..722d0dcc10041 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -390,7 +390,7 @@ Datetimelike - Bug in :func:`to_datetime` with ``format`` and ``pandas.NA`` was raising ``ValueError`` (:issue:`42957`) - :func:`to_datetime` would silently swap ``MM/DD/YYYY`` and ``DD/MM/YYYY`` formats if the given ``dayfirst`` option could not be respected - now, a warning is raised in the case of delimited date strings (e.g. ``31-12-2012``) (:issue:`12585`) - Bug in :meth:`date_range` and :meth:`bdate_range` do not return right bound when ``start`` = ``end`` and set is closed on one side (:issue:`43394`) -- Bug in inplace inplace addition and subtraction of :class:`DatetimeIndex` or :class:`TimedeltaIndex` with :class:`DatetimeArray` or :class:`TimedeltaArray` (:issue:`43904`) +- Bug in inplace addition and subtraction of :class:`DatetimeIndex` or :class:`TimedeltaIndex` with :class:`DatetimeArray` or :class:`TimedeltaArray` (:issue:`43904`) - Timedelta