From 1a03084c01b7efbbe8d3cf7be65e0c734269256a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 19 Jan 2020 11:43:44 -0600 Subject: [PATCH 1/6] COMPAT: Return NotImplemented for subclassing This changes index ops to check the *type* of the argument in index ops, rather than just the dtype. This lets index subclasses take control of binary ops when they know better what the result should be. Closes https://github.com/pandas-dev/pandas/issues/31109 --- pandas/core/arrays/datetimelike.py | 24 +++++++++++++++---- pandas/tests/arithmetic/test_object.py | 33 ++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 70637026c278d..7f5f0652c67e0 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1299,8 +1299,16 @@ def __add__(self, other): # TimedeltaIndex, ndarray[timedelta64] result = self._add_delta(other) elif is_object_dtype(other): - # e.g. Array/Index of DateOffset objects - result = self._addsub_object_array(other, operator.add) + from pandas import Index + + if type(other) is Index: + # e.g. Array/Index of DateOffset objects + # We deliberately check for other being exactly an Index, rather + # than a subclass, to let Index subclasses take control of binary + # operations. See https://github.com/pandas-dev/pandas/issues/31109 + result = self._addsub_object_array(other, operator.add) + else: + return NotImplemented elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): # DatetimeIndex, ndarray[datetime64] return self._add_datetime_arraylike(other) @@ -1354,8 +1362,16 @@ def __sub__(self, other): # TimedeltaIndex, ndarray[timedelta64] result = self._add_delta(-other) elif is_object_dtype(other): - # e.g. Array/Index of DateOffset objects - result = self._addsub_object_array(other, operator.sub) + from pandas import Index + + if type(other) is Index: + # e.g. Array/Index of DateOffset objects + # We deliberately check for other being exactly an Index, rather + # than a subclass, to let Index subclasses take control of binary + # operations. See https://github.com/pandas-dev/pandas/issues/31109 + result = self._addsub_object_array(other, operator.sub) + else: + return NotImplemented elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): # DatetimeIndex, ndarray[datetime64] result = self._sub_datetime_arraylike(other) diff --git a/pandas/tests/arithmetic/test_object.py b/pandas/tests/arithmetic/test_object.py index c0d3c9d4977bd..5bf694e574dc2 100644 --- a/pandas/tests/arithmetic/test_object.py +++ b/pandas/tests/arithmetic/test_object.py @@ -1,6 +1,7 @@ # Arithmetic tests for DataFrame/Series/Index/Array classes that should # behave identically. # Specifically for object dtype +import datetime from decimal import Decimal import operator @@ -323,3 +324,35 @@ def test_rsub_object(self): with pytest.raises(TypeError): np.array([True, pd.Timestamp.now()]) - index + + def test_index_ops_defer_to_unknown_subclasses(self): + # https://github.com/pandas-dev/pandas/issues/31109 + class MyIndex(pd.Index): + + _calls: int + + @classmethod + def _simple_new(cls, values, name=None, dtype=None): + result = object.__new__(cls) + result._data = values + result._index_data = values + result._name = name + result._calls = 0 + + return result._reset_identity() + + def __add__(self, other): + self._calls += 1 + return self._simple_new(self._index_data + other.to_list()) + + def __radd__(self, other): + return self.__add__(other) + + values = np.array( + [datetime.date(2000, 1, 1), datetime.date(2000, 1, 2)], dtype=object + ) + a = MyIndex._simple_new(values) + b = pd.timedelta_range("1D", periods=2, freq="D") + result = b + a + assert isinstance(result, MyIndex) + assert a._calls == 1 From db950c9d94f9db5d5bd43f11bc830af4de3a095d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 19 Jan 2020 11:52:28 -0600 Subject: [PATCH 2/6] fixup --- pandas/core/arrays/datetimelike.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 7f5f0652c67e0..94dd86bb32b70 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1301,14 +1301,14 @@ def __add__(self, other): elif is_object_dtype(other): from pandas import Index - if type(other) is Index: - # e.g. Array/Index of DateOffset objects + if isinstance(other, Index) and type(other) is not Index: # We deliberately check for other being exactly an Index, rather # than a subclass, to let Index subclasses take control of binary # operations. See https://github.com/pandas-dev/pandas/issues/31109 - result = self._addsub_object_array(other, operator.add) - else: return NotImplemented + else: + # e.g. Array/Index of DateOffset objects + result = self._addsub_object_array(other, operator.add) elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): # DatetimeIndex, ndarray[datetime64] return self._add_datetime_arraylike(other) @@ -1364,14 +1364,14 @@ def __sub__(self, other): elif is_object_dtype(other): from pandas import Index - if type(other) is Index: - # e.g. Array/Index of DateOffset objects + if isinstance(other, Index) and type(other) is not Index: # We deliberately check for other being exactly an Index, rather # than a subclass, to let Index subclasses take control of binary # operations. See https://github.com/pandas-dev/pandas/issues/31109 - result = self._addsub_object_array(other, operator.sub) - else: return NotImplemented + else: + # e.g. Array/Index of DateOffset objects + result = self._addsub_object_array(other, operator.sub) elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): # DatetimeIndex, ndarray[datetime64] result = self._sub_datetime_arraylike(other) From 900dacb8618602c39a15ec015e00393ec8044fc6 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 19 Jan 2020 15:05:57 -0600 Subject: [PATCH 3/6] fixup --- pandas/core/arrays/datetimelike.py | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 94dd86bb32b70..70637026c278d 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1299,16 +1299,8 @@ def __add__(self, other): # TimedeltaIndex, ndarray[timedelta64] result = self._add_delta(other) elif is_object_dtype(other): - from pandas import Index - - if isinstance(other, Index) and type(other) is not Index: - # We deliberately check for other being exactly an Index, rather - # than a subclass, to let Index subclasses take control of binary - # operations. See https://github.com/pandas-dev/pandas/issues/31109 - return NotImplemented - else: - # e.g. Array/Index of DateOffset objects - result = self._addsub_object_array(other, operator.add) + # e.g. Array/Index of DateOffset objects + result = self._addsub_object_array(other, operator.add) elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): # DatetimeIndex, ndarray[datetime64] return self._add_datetime_arraylike(other) @@ -1362,16 +1354,8 @@ def __sub__(self, other): # TimedeltaIndex, ndarray[timedelta64] result = self._add_delta(-other) elif is_object_dtype(other): - from pandas import Index - - if isinstance(other, Index) and type(other) is not Index: - # We deliberately check for other being exactly an Index, rather - # than a subclass, to let Index subclasses take control of binary - # operations. See https://github.com/pandas-dev/pandas/issues/31109 - return NotImplemented - else: - # e.g. Array/Index of DateOffset objects - result = self._addsub_object_array(other, operator.sub) + # e.g. Array/Index of DateOffset objects + result = self._addsub_object_array(other, operator.sub) elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): # DatetimeIndex, ndarray[datetime64] result = self._sub_datetime_arraylike(other) From 7d057bec1f1c5a363968425bb400e46878885c7a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 19 Jan 2020 15:10:47 -0600 Subject: [PATCH 4/6] fixup --- pandas/core/indexes/extension.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index 58fcce7e59be7..97cbac1022424 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -8,7 +8,11 @@ from pandas.compat.numpy import function as nv from pandas.util._decorators import cache_readonly -from pandas.core.dtypes.common import ensure_platform_int, is_dtype_equal +from pandas.core.dtypes.common import ( + ensure_platform_int, + is_dtype_equal, + is_object_dtype, +) from pandas.core.dtypes.generic import ABCSeries from pandas.core.arrays import ExtensionArray @@ -110,6 +114,15 @@ def wrapper(self, other): def make_wrapped_arith_op(opname): def method(self, other): + if ( + isinstance(other, Index) + and is_object_dtype(other.dtype) + and type(other) is not Index + ): + # We return NotImplemented for object-dtype index *subclasses* so they have + # a chance to implement ops before we unwrap them. + # See https://github.com/pandas-dev/pandas/issues/31109 + return NotImplemented meth = getattr(self._data, opname) result = meth(_maybe_unwrap_index(other)) return _wrap_arithmetic_op(self, other, result) From ec512078726755f7c0735fd37c0e53fb8ae6248b Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 20 Jan 2020 15:14:36 -0600 Subject: [PATCH 5/6] added xfails --- pandas/tests/arithmetic/test_object.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/pandas/tests/arithmetic/test_object.py b/pandas/tests/arithmetic/test_object.py index 5bf694e574dc2..a28565add6cf4 100644 --- a/pandas/tests/arithmetic/test_object.py +++ b/pandas/tests/arithmetic/test_object.py @@ -325,7 +325,23 @@ def test_rsub_object(self): with pytest.raises(TypeError): np.array([True, pd.Timestamp.now()]) - index - def test_index_ops_defer_to_unknown_subclasses(self): + @pytest.mark.parametrize( + "other", + [ + [datetime.timedelta(1), datetime.timedelta(2)], + [datetime.datetime(2000, 1, 1), datetime.datetime(2000, 1, 2)], + [pd.Period("2000"), pd.Period("2001")], + pytest.param( + [0, 1], marks=pytest.mark.xfail(reason="Expected is unknown.") + ), + pytest.param( + [0.0, 1.0], marks=pytest.mark.xfail(reason="Expected is unknown.") + ), + ["a", "b"], + ], + ids=["timedelta", "datetime", "period", "int", "float", "object"], + ) + def test_index_ops_defer_to_unknown_subclasses(self, other): # https://github.com/pandas-dev/pandas/issues/31109 class MyIndex(pd.Index): @@ -343,7 +359,7 @@ def _simple_new(cls, values, name=None, dtype=None): def __add__(self, other): self._calls += 1 - return self._simple_new(self._index_data + other.to_list()) + return self._simple_new(self._index_data) def __radd__(self, other): return self.__add__(other) @@ -352,7 +368,7 @@ def __radd__(self, other): [datetime.date(2000, 1, 1), datetime.date(2000, 1, 2)], dtype=object ) a = MyIndex._simple_new(values) - b = pd.timedelta_range("1D", periods=2, freq="D") - result = b + a + other = pd.Index(other) + result = other + a assert isinstance(result, MyIndex) assert a._calls == 1 From a3b5c7ad5fa5af033c07c31adec9be88f6c10d7f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 24 Jan 2020 09:24:59 -0600 Subject: [PATCH 6/6] object --- pandas/tests/arithmetic/test_object.py | 89 +++++++++++++------------- 1 file changed, 43 insertions(+), 46 deletions(-) diff --git a/pandas/tests/arithmetic/test_object.py b/pandas/tests/arithmetic/test_object.py index a28565add6cf4..a729715f11b2a 100644 --- a/pandas/tests/arithmetic/test_object.py +++ b/pandas/tests/arithmetic/test_object.py @@ -325,50 +325,47 @@ def test_rsub_object(self): with pytest.raises(TypeError): np.array([True, pd.Timestamp.now()]) - index - @pytest.mark.parametrize( - "other", - [ - [datetime.timedelta(1), datetime.timedelta(2)], - [datetime.datetime(2000, 1, 1), datetime.datetime(2000, 1, 2)], - [pd.Period("2000"), pd.Period("2001")], - pytest.param( - [0, 1], marks=pytest.mark.xfail(reason="Expected is unknown.") - ), - pytest.param( - [0.0, 1.0], marks=pytest.mark.xfail(reason="Expected is unknown.") - ), - ["a", "b"], - ], - ids=["timedelta", "datetime", "period", "int", "float", "object"], - ) - def test_index_ops_defer_to_unknown_subclasses(self, other): - # https://github.com/pandas-dev/pandas/issues/31109 - class MyIndex(pd.Index): - - _calls: int - - @classmethod - def _simple_new(cls, values, name=None, dtype=None): - result = object.__new__(cls) - result._data = values - result._index_data = values - result._name = name - result._calls = 0 - return result._reset_identity() - - def __add__(self, other): - self._calls += 1 - return self._simple_new(self._index_data) - - def __radd__(self, other): - return self.__add__(other) - - values = np.array( - [datetime.date(2000, 1, 1), datetime.date(2000, 1, 2)], dtype=object - ) - a = MyIndex._simple_new(values) - other = pd.Index(other) - result = other + a - assert isinstance(result, MyIndex) - assert a._calls == 1 +class MyIndex(pd.Index): + # Simple index subclass that tracks ops calls. + + _calls: int + + @classmethod + def _simple_new(cls, values, name=None, dtype=None): + result = object.__new__(cls) + result._data = values + result._index_data = values + result._name = name + result._calls = 0 + + return result._reset_identity() + + def __add__(self, other): + self._calls += 1 + return self._simple_new(self._index_data) + + def __radd__(self, other): + return self.__add__(other) + + +@pytest.mark.parametrize( + "other", + [ + [datetime.timedelta(1), datetime.timedelta(2)], + [datetime.datetime(2000, 1, 1), datetime.datetime(2000, 1, 2)], + [pd.Period("2000"), pd.Period("2001")], + ["a", "b"], + ], + ids=["timedelta", "datetime", "period", "object"], +) +def test_index_ops_defer_to_unknown_subclasses(other): + # https://github.com/pandas-dev/pandas/issues/31109 + values = np.array( + [datetime.date(2000, 1, 1), datetime.date(2000, 1, 2)], dtype=object + ) + a = MyIndex._simple_new(values) + other = pd.Index(other) + result = other + a + assert isinstance(result, MyIndex) + assert a._calls == 1