From 36c8b885373e13ef98545bc7a638936ee8c1a297 Mon Sep 17 00:00:00 2001 From: Dave hughes <2043653+dwhu@users.noreply.github.com> Date: Fri, 3 Jan 2020 09:23:37 -0800 Subject: [PATCH 01/15] ENH: .equals for Extension Arrays --- pandas/core/arrays/base.py | 50 ++++++++++++++++++++++++++++ pandas/core/arrays/boolean.py | 14 ++++++++ pandas/core/arrays/categorical.py | 13 ++++++++ pandas/core/arrays/integer.py | 13 ++++++++ pandas/core/arrays/interval.py | 15 +++++++++ pandas/tests/extension/json/array.py | 10 ++++++ 6 files changed, 115 insertions(+) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 96a4eb1b3bf32..edbf1e62f89fc 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -78,6 +78,7 @@ class ExtensionArray: dropna factorize fillna + equals isna ravel repeat @@ -350,6 +351,38 @@ def __iter__(self): for i in range(len(self)): yield self[i] + def __eq__(self, other: ABCExtensionArray) -> bool: + """ + Whether the two arrays are equivalent. + + Parameters + ---------- + other: ExtensionArray + The array to compare to this array. + + Returns + ------- + bool + """ + + raise AbstractMethodError(self) + + def __ne__(self, other: ABCExtensionArray) -> bool: + """ + Whether the two arrays are not equivalent. + + Parameters + ---------- + other: ExtensionArray + The array to compare to this array. + + Returns + ------- + bool + """ + + raise AbstractMethodError(self) + # ------------------------------------------------------------------------ # Required attributes # ------------------------------------------------------------------------ @@ -657,6 +690,23 @@ def searchsorted(self, value, side="left", sorter=None): arr = self.astype(object) return arr.searchsorted(value, side=side, sorter=sorter) + def equals(self, other: ABCExtensionArray) -> bool: + """ + Return if another array is equivalent to this array. + + Parameters + ---------- + other: ExtensionArray + Array to compare to this Array. + + Returns + ------- + boolean + Whether the arrays are equivalent. + + """ + return ((self == other) | (self.isna() == other.isna())).all() + def _values_for_factorize(self) -> Tuple[np.ndarray, Any]: """ Return an array and missing value suitable for factorization. diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 102150b1cbce1..762bedbd67a43 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -327,6 +327,20 @@ def __getitem__(self, item): return type(self)(self._data[item], self._mask[item]) + def __eq__(self, other): + if not isinstance(other, BooleanArray): + return NotImplemented + return ( + hasattr(other, "_data") + and self._data == other._data + and hasattr(other, "_mask") + and self._mask == other._mask + and hasattr(other, "_dtype") & self._dtype == other._dtype + ) + + def __ne__(self, other): + return not self.__eq__(other) + def _coerce_to_ndarray(self, dtype=None, na_value: "Scalar" = libmissing.NA): """ Coerce to an ndarray of object dtype or bool dtype (if force_bool=True). diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index f4e75364ae932..6b8260076f70a 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2071,6 +2071,19 @@ def __setitem__(self, key, value): lindexer = self._maybe_coerce_indexer(lindexer) self._codes[key] = lindexer + def __eq__(self, other): + if not isinstance(other, Categorical): + return NotImplemented + return ( + hasattr(other, "_codes") + and self._codes == other._codes + and hasattr(other, "_dtype") + and self._dtype == other._dtype + ) + + def __ne__(self, other): + return not self.__eq__(other) + def _reverse_indexer(self) -> Dict[Hashable, np.ndarray]: """ Compute the inverse of a categorical, returning diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 0922f4ac6f71d..e0306a4c4eec9 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -376,6 +376,19 @@ def __getitem__(self, item): return type(self)(self._data[item], self._mask[item]) + def __eq__(self, other): + if not isinstance(other, IntegerArray): + return NotImplemented + return ( + hasattr(other, "_data") + and self._data == other._data + and hasattr(other, "_mask") + and self._mask == other._mask + ) + + def __ne__(self, other): + return not self.__eq__(other) + def _coerce_to_ndarray(self, dtype=None, na_value=lib._no_default): """ coerce to an ndarary of object dtype diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index cea059fb22be1..e839ba7e23e77 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -547,6 +547,21 @@ def __setitem__(self, key, value): right.values[key] = value_right self._right = right + def __eq__(self, other): + if not isinstance(other, IntervalArray): + return NotImplementedError + return ( + hasattr(other, "_left") + and self._left == other._left + and hasattr(other, "_right") + and self._right == other._right + and hasattr(other, "_closed") + and self._closed == other._closed + ) + + def __ne__(self, other): + return not self.__eq__(other) + def fillna(self, value=None, method=None, limit=None): """ Fill NA/NaN values using the specified method. diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 17bc2773aad19..89e55e1b0e7cd 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -110,6 +110,16 @@ def __setitem__(self, key, value): assert isinstance(v, self.dtype.type) self.data[k] = v + def __eq__(self, other): + return ( + isinstance(other, JSONArray) + and hasattr(other, "data") + and self.data == other.data + ) + + def __ne__(self, other): + return not self.__eq__(other) + def __len__(self) -> int: return len(self.data) From 786963c756780285268f94e66b761a3c3f0e3f78 Mon Sep 17 00:00:00 2001 From: Dave hughes <2043653+dwhu@users.noreply.github.com> Date: Fri, 3 Jan 2020 11:18:01 -0800 Subject: [PATCH 02/15] ENH: Updating eq and ne methods for extension arrays. Adding __eq__ to ExtensionArray Abstract method doc string. Adding ne implementation to EA base class. Also removing other implementations. Updating EA equals method and adding tests. GH-27081 --- pandas/core/arrays/base.py | 7 +++++-- pandas/core/arrays/boolean.py | 14 -------------- pandas/core/arrays/categorical.py | 13 ------------- pandas/core/arrays/integer.py | 8 ++------ pandas/core/arrays/interval.py | 12 ++++-------- pandas/tests/extension/base/methods.py | 15 +++++++++++++++ pandas/tests/extension/base/ops.py | 12 ------------ pandas/tests/extension/json/array.py | 3 --- pandas/tests/extension/test_sparse.py | 4 ++++ 9 files changed, 30 insertions(+), 58 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index edbf1e62f89fc..6aabd6df24e32 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -105,6 +105,7 @@ class ExtensionArray: * _from_sequence * _from_factorized * __getitem__ + * __eq__ * __len__ * dtype * nbytes @@ -381,7 +382,7 @@ def __ne__(self, other: ABCExtensionArray) -> bool: bool """ - raise AbstractMethodError(self) + return ~(self == other) # ------------------------------------------------------------------------ # Required attributes @@ -705,7 +706,9 @@ def equals(self, other: ABCExtensionArray) -> bool: Whether the arrays are equivalent. """ - return ((self == other) | (self.isna() == other.isna())).all() + return isinstance(other, self.__class__) and ( + ((self == other) | (self.isna() == other.isna())).all() + ) def _values_for_factorize(self) -> Tuple[np.ndarray, Any]: """ diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 762bedbd67a43..102150b1cbce1 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -327,20 +327,6 @@ def __getitem__(self, item): return type(self)(self._data[item], self._mask[item]) - def __eq__(self, other): - if not isinstance(other, BooleanArray): - return NotImplemented - return ( - hasattr(other, "_data") - and self._data == other._data - and hasattr(other, "_mask") - and self._mask == other._mask - and hasattr(other, "_dtype") & self._dtype == other._dtype - ) - - def __ne__(self, other): - return not self.__eq__(other) - def _coerce_to_ndarray(self, dtype=None, na_value: "Scalar" = libmissing.NA): """ Coerce to an ndarray of object dtype or bool dtype (if force_bool=True). diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 6b8260076f70a..f4e75364ae932 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2071,19 +2071,6 @@ def __setitem__(self, key, value): lindexer = self._maybe_coerce_indexer(lindexer) self._codes[key] = lindexer - def __eq__(self, other): - if not isinstance(other, Categorical): - return NotImplemented - return ( - hasattr(other, "_codes") - and self._codes == other._codes - and hasattr(other, "_dtype") - and self._dtype == other._dtype - ) - - def __ne__(self, other): - return not self.__eq__(other) - def _reverse_indexer(self) -> Dict[Hashable, np.ndarray]: """ Compute the inverse of a categorical, returning diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index e0306a4c4eec9..4b0d4d65924cd 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -377,18 +377,14 @@ def __getitem__(self, item): return type(self)(self._data[item], self._mask[item]) def __eq__(self, other): - if not isinstance(other, IntegerArray): - return NotImplemented return ( - hasattr(other, "_data") + isinstance(other, IntegerArray) + and hasattr(other, "_data") and self._data == other._data and hasattr(other, "_mask") and self._mask == other._mask ) - def __ne__(self, other): - return not self.__eq__(other) - def _coerce_to_ndarray(self, dtype=None, na_value=lib._no_default): """ coerce to an ndarary of object dtype diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index e839ba7e23e77..430c04e00fac8 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -548,20 +548,16 @@ def __setitem__(self, key, value): self._right = right def __eq__(self, other): - if not isinstance(other, IntervalArray): - return NotImplementedError return ( - hasattr(other, "_left") - and self._left == other._left + isinstance(other, IntervalArray) + and hasattr(other, "_left") + and np.array_equal(self._left, other._left) and hasattr(other, "_right") - and self._right == other._right + and np.array_equal(self._right, other._right) and hasattr(other, "_closed") and self._closed == other._closed ) - def __ne__(self, other): - return not self.__eq__(other) - def fillna(self, value=None, method=None, limit=None): """ Fill NA/NaN values using the specified method. diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 973088cb72e7a..6b2144563a74e 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -358,3 +358,18 @@ def test_repeat_raises(self, data, repeats, kwargs, error, msg, use_numpy): np.repeat(data, repeats, **kwargs) else: data.repeat(repeats, **kwargs) + + def test_equals(self, data, na_value): + cls = type(data) + ser = pd.Series(cls._from_sequence(data, dtype=data.dtype)) + na_ser = pd.Series(cls._from_sequence([na_value], dtype=data.dtype)) + + assert data.equals(data) + assert ser.equals(ser) + assert na_ser.equals(na_ser) + + assert not data.equals(na_value) + assert not na_ser.equals(ser) + assert not ser.equals(na_ser) + assert not ser.equals(0) + assert not na_ser.equals(0) diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index 20d06ef2e5647..0ba5795ea891d 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -132,10 +132,8 @@ class BaseComparisonOpsTests(BaseOpsUtil): def _compare_other(self, s, data, op_name, other): op = self.get_op_from_name(op_name) if op_name == "__eq__": - assert getattr(data, op_name)(other) is NotImplemented assert not op(s, other).all() elif op_name == "__ne__": - assert getattr(data, op_name)(other) is NotImplemented assert op(s, other).all() else: @@ -158,13 +156,3 @@ def test_compare_array(self, data, all_compare_operators): s = pd.Series(data) other = pd.Series([data[0]] * len(data)) self._compare_other(s, data, op_name, other) - - def test_direct_arith_with_series_returns_not_implemented(self, data): - # EAs should return NotImplemented for ops with Series. - # Pandas takes care of unboxing the series and calling the EA's op. - other = pd.Series(data) - if hasattr(data, "__eq__"): - result = data.__eq__(other) - assert result is NotImplemented - else: - raise pytest.skip(f"{type(data).__name__} does not implement __eq__") diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 89e55e1b0e7cd..7c22667e449ff 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -117,9 +117,6 @@ def __eq__(self, other): and self.data == other.data ) - def __ne__(self, other): - return not self.__eq__(other) - def __len__(self) -> int: return len(self.data) diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 6ebe71e173ec2..3fe6335ec63f6 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -302,6 +302,10 @@ def test_searchsorted(self, data_for_sorting, as_series): with tm.assert_produces_warning(PerformanceWarning): super().test_searchsorted(data_for_sorting, as_series) + def test_equals(self, data, na_value): + self._check_unsupported(data) + super().test_equals(data, na_value) + class TestCasting(BaseSparseTests, base.BaseCastingTests): pass From 6800315753deb7166d68faf511520c61650b9822 Mon Sep 17 00:00:00 2001 From: Dave hughes <2043653+dwhu@users.noreply.github.com> Date: Fri, 3 Jan 2020 13:50:46 -0800 Subject: [PATCH 03/15] Removing interval.py's __eq__ implementation due to conflict with @jschendel change. --- pandas/core/arrays/interval.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 430c04e00fac8..cea059fb22be1 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -547,17 +547,6 @@ def __setitem__(self, key, value): right.values[key] = value_right self._right = right - def __eq__(self, other): - return ( - isinstance(other, IntervalArray) - and hasattr(other, "_left") - and np.array_equal(self._left, other._left) - and hasattr(other, "_right") - and np.array_equal(self._right, other._right) - and hasattr(other, "_closed") - and self._closed == other._closed - ) - def fillna(self, value=None, method=None, limit=None): """ Fill NA/NaN values using the specified method. From a3e7b7fd6aa4ea32021fed9d2e3bc308d2b997f1 Mon Sep 17 00:00:00 2001 From: Dave hughes <2043653+dwhu@users.noreply.github.com> Date: Fri, 3 Jan 2020 14:26:59 -0800 Subject: [PATCH 04/15] ENH: Making EA eq and ne typed as Any. GH-27081 --- pandas/core/arrays/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 6aabd6df24e32..bb6d0ddb70789 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -352,7 +352,7 @@ def __iter__(self): for i in range(len(self)): yield self[i] - def __eq__(self, other: ABCExtensionArray) -> bool: + def __eq__(self, other: Any) -> bool: """ Whether the two arrays are equivalent. @@ -368,7 +368,7 @@ def __eq__(self, other: ABCExtensionArray) -> bool: raise AbstractMethodError(self) - def __ne__(self, other: ABCExtensionArray) -> bool: + def __ne__(self, other: Any) -> bool: """ Whether the two arrays are not equivalent. From 860013f100de5da09de7d0897b0307b02abac0fb Mon Sep 17 00:00:00 2001 From: Dave hughes <2043653+dwhu@users.noreply.github.com> Date: Fri, 3 Jan 2020 15:31:04 -0800 Subject: [PATCH 05/15] ENH: Adding default implementation to ExtensionArray equals() and tests. GH-27081 --- pandas/core/arrays/base.py | 21 +++++++++++++-------- pandas/core/arrays/integer.py | 9 --------- pandas/tests/extension/base/methods.py | 3 +++ pandas/tests/extension/json/array.py | 7 ------- 4 files changed, 16 insertions(+), 24 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index bb6d0ddb70789..4c7d20386cb19 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -105,7 +105,6 @@ class ExtensionArray: * _from_sequence * _from_factorized * __getitem__ - * __eq__ * __len__ * dtype * nbytes @@ -358,15 +357,19 @@ def __eq__(self, other: Any) -> bool: Parameters ---------- - other: ExtensionArray - The array to compare to this array. + other: Any + The object to compare to this array. Returns ------- bool """ - raise AbstractMethodError(self) + return ( + type(self) == type(other) + and (self.isna() == other.isna()).all() + and np.all(np.array(self) == np.array(other)) + ) def __ne__(self, other: Any) -> bool: """ @@ -374,8 +377,8 @@ def __ne__(self, other: Any) -> bool: Parameters ---------- - other: ExtensionArray - The array to compare to this array. + other: Any + The object to compare to this array. Returns ------- @@ -706,8 +709,10 @@ def equals(self, other: ABCExtensionArray) -> bool: Whether the arrays are equivalent. """ - return isinstance(other, self.__class__) and ( - ((self == other) | (self.isna() == other.isna())).all() + return ( + type(self) == type(other) + and (((self == other) | (self.isna() == other.isna())).all()) + and len(self) == len(other) ) def _values_for_factorize(self) -> Tuple[np.ndarray, Any]: diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 4b0d4d65924cd..0922f4ac6f71d 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -376,15 +376,6 @@ def __getitem__(self, item): return type(self)(self._data[item], self._mask[item]) - def __eq__(self, other): - return ( - isinstance(other, IntegerArray) - and hasattr(other, "_data") - and self._data == other._data - and hasattr(other, "_mask") - and self._mask == other._mask - ) - def _coerce_to_ndarray(self, dtype=None, na_value=lib._no_default): """ coerce to an ndarary of object dtype diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 6b2144563a74e..7657b2f787cf8 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -362,6 +362,7 @@ def test_repeat_raises(self, data, repeats, kwargs, error, msg, use_numpy): def test_equals(self, data, na_value): cls = type(data) ser = pd.Series(cls._from_sequence(data, dtype=data.dtype)) + smaller_ser = pd.Series(cls._from_sequence(data[:5], dtype=data.dtype)) na_ser = pd.Series(cls._from_sequence([na_value], dtype=data.dtype)) assert data.equals(data) @@ -371,5 +372,7 @@ def test_equals(self, data, na_value): assert not data.equals(na_value) assert not na_ser.equals(ser) assert not ser.equals(na_ser) + assert not ser.equals(smaller_ser) + assert not ser.equals(np.asarray(data)) assert not ser.equals(0) assert not na_ser.equals(0) diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 7c22667e449ff..17bc2773aad19 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -110,13 +110,6 @@ def __setitem__(self, key, value): assert isinstance(v, self.dtype.type) self.data[k] = v - def __eq__(self, other): - return ( - isinstance(other, JSONArray) - and hasattr(other, "data") - and self.data == other.data - ) - def __len__(self) -> int: return len(self.data) From c5027dd0c75bf0d1b364a129440c02bddbc84ffa Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 1 May 2020 17:39:25 +0200 Subject: [PATCH 06/15] correct __eq/ne__ to be element-wise --- pandas/core/arrays/base.py | 37 +++++++------------------------------ 1 file changed, 7 insertions(+), 30 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index f403bb751d7d9..fe6b49f7202b3 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -85,6 +85,7 @@ class ExtensionArray: * _from_factorized * __getitem__ * __len__ + * __eq__ * dtype * nbytes * isna @@ -334,40 +335,16 @@ def __iter__(self): for i in range(len(self)): yield self[i] - def __eq__(self, other: Any) -> bool: + def __eq__(self, other: Any) -> Union[np.ndarray, "ExtensionArray"]: """ - Whether the two arrays are equivalent. - - Parameters - ---------- - other: Any - The object to compare to this array. - - Returns - ------- - bool + Return for `self == other` (element-wise equality). """ + raise AbstractMethodError(self) - return ( - type(self) == type(other) - and (self.isna() == other.isna()).all() - and np.all(np.array(self) == np.array(other)) - ) - - def __ne__(self, other: Any) -> bool: + def __ne__(self, other: Any) -> Union[np.ndarray, "ExtensionArray"]: """ - Whether the two arrays are not equivalent. - - Parameters - ---------- - other: Any - The object to compare to this array. - - Returns - ------- - bool + Return for `self != other` (element-wise in-equality). """ - return ~(self == other) def to_numpy( @@ -719,7 +696,7 @@ def searchsorted(self, value, side="left", sorter=None): arr = self.astype(object) return arr.searchsorted(value, side=side, sorter=sorter) - def equals(self, other: ABCExtensionArray) -> bool: + def equals(self, other: "ExtensionArray") -> bool: """ Return if another array is equivalent to this array. From 375664c0907ae698cda9ef22689bee401bd7f1fb Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 1 May 2020 18:08:43 +0200 Subject: [PATCH 07/15] fix equals implementation (& instead of ==) --- pandas/core/arrays/base.py | 22 ++++++---- pandas/tests/extension/base/methods.py | 51 ++++++++++++++++-------- pandas/tests/extension/json/test_json.py | 4 ++ pandas/tests/extension/test_numpy.py | 5 +++ 4 files changed, 58 insertions(+), 24 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index fe6b49f7202b3..6620357402434 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -711,11 +711,17 @@ def equals(self, other: "ExtensionArray") -> bool: Whether the arrays are equivalent. """ - return ( - type(self) == type(other) - and (((self == other) | (self.isna() == other.isna())).all()) - and len(self) == len(other) - ) + if not type(self) == type(other): + return False + elif not len(self) == len(other): + return False + else: + equal_values = self == other + if isinstance(equal_values, ExtensionArray): + # boolean array with NA -> fill with False + equal_values = equal_values.fillna(False) + equal_na = self.isna() & other.isna() + return (equal_values | equal_na).all().item() def _values_for_factorize(self) -> Tuple[np.ndarray, Any]: """ @@ -1164,7 +1170,7 @@ class ExtensionScalarOpsMixin(ExtensionOpsMixin): """ @classmethod - def _create_method(cls, op, coerce_to_dtype=True): + def _create_method(cls, op, coerce_to_dtype=True, result_dtype=None): """ A class method that returns a method that will correspond to an operator for an ExtensionArray subclass, by dispatching to the @@ -1232,7 +1238,7 @@ def _maybe_convert(arr): # exception raised in _from_sequence; ensure we have ndarray res = np.asarray(arr) else: - res = np.asarray(arr) + res = np.asarray(arr, dtype=result_dtype) return res if op.__name__ in {"divmod", "rdivmod"}: @@ -1250,4 +1256,4 @@ def _create_arithmetic_method(cls, op): @classmethod def _create_comparison_method(cls, op): - return cls._create_method(op, coerce_to_dtype=False) + return cls._create_method(op, coerce_to_dtype=False, result_dtype=bool) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index e19dd9db2c7d5..04290319bc592 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -423,19 +423,38 @@ def test_repeat_raises(self, data, repeats, kwargs, error, msg, use_numpy): data.repeat(repeats, **kwargs) def test_equals(self, data, na_value): - cls = type(data) - ser = pd.Series(cls._from_sequence(data, dtype=data.dtype)) - smaller_ser = pd.Series(cls._from_sequence(data[:5], dtype=data.dtype)) - na_ser = pd.Series(cls._from_sequence([na_value], dtype=data.dtype)) - - assert data.equals(data) - assert ser.equals(ser) - assert na_ser.equals(na_ser) - - assert not data.equals(na_value) - assert not na_ser.equals(ser) - assert not ser.equals(na_ser) - assert not ser.equals(smaller_ser) - assert not ser.equals(np.asarray(data)) - assert not ser.equals(0) - assert not na_ser.equals(0) + data2 = type(data)._from_sequence([data[0]] * len(data), dtype=data.dtype) + data_na = type(data)._from_sequence([na_value] * len(data), dtype=data.dtype) + + assert data.equals(data) is True + assert data.equals(data.copy()) is True + + # other data + assert data.equals(data2) is False + assert data.equals(data_na) is False + + # different length + assert data[:2].equals(data[:3]) is False + + # emtpy are equal + assert data[:0].equals(data[:0]) is True + + # other types + assert data.equals(None) is False + assert data[[0]].equals(data[0]) is False + + # TODO test series + # ser = pd.Series(data) + # smaller_ser = pd.Series(data[:5]) + # na_ser = pd.Series(type(data)._from_sequence([na_value], dtype=data.dtype)) + + # assert ser.equals(ser) + # assert na_ser.equals(na_ser) + + # assert not data.equals(na_value) + # assert not na_ser.equals(ser) + # assert not ser.equals(na_ser) + # assert not ser.equals(smaller_ser) + # assert not ser.equals(np.asarray(data)) + # assert not ser.equals(0) + # assert not na_ser.equals(0) diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index 745488770e09c..b2b202039b8de 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -247,6 +247,10 @@ def test_where_series(self, data, na_value): def test_searchsorted(self, data_for_sorting): super().test_searchsorted(data_for_sorting) + @pytest.mark.skip(reason="Can't compare dicts.") + def test_equals(self, data, na_value): + pass + class TestCasting(BaseJSON, base.BaseCastingTests): @pytest.mark.skip(reason="failing on np.array(self, dtype=str)") diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 1c887cc4371b6..aa2f9ea3257f4 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -261,6 +261,11 @@ def test_repeat(self, data, repeats, as_series, use_numpy): def test_diff(self, data, periods): return super().test_diff(data, periods) + @skip_nested + def test_equals(self, data, na_value): + # Fails creating with _from_sequence + super().test_equals(data, na_value) + @skip_nested class TestArithmetics(BaseNumPyTests, base.BaseArithmeticOpsTests): From b6ad2fbc9d96586fe440e6b23effb6154378b1a1 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 1 May 2020 20:32:54 +0200 Subject: [PATCH 08/15] base tests --- pandas/tests/extension/base/ops.py | 10 ++++++++++ pandas/tests/extension/json/array.py | 6 ++++++ 2 files changed, 16 insertions(+) diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index f9ab9cb9c28b5..e446ff8178226 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -164,6 +164,16 @@ def test_compare_array(self, data, all_compare_operators): other = pd.Series([data[0]] * len(data)) self._compare_other(s, data, op_name, other) + def test_direct_arith_with_series_returns_not_implemented(self, data): + # EAs should return NotImplemented for ops with Series. + # Pandas takes care of unboxing the series and calling the EA's op. + other = pd.Series(data) + if hasattr(data, "__eq__"): + result = data.__eq__(other) + assert result is NotImplemented + else: + raise pytest.skip(f"{type(data).__name__} does not implement __eq__") + class BaseUnaryOpsTests(BaseOpsUtil): def test_invert(self, data): diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 1f026e405dc17..94f971938b690 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -105,6 +105,12 @@ def __setitem__(self, key, value): def __len__(self) -> int: return len(self.data) + def __eq__(self, other): + return NotImplemented + + def __ne__(self, other): + return NotImplemented + def __array__(self, dtype=None): if dtype is None: dtype = object From 365362ab116a6890bf871ed7115e52c0c63094e0 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 1 May 2020 20:58:24 +0200 Subject: [PATCH 09/15] ensure to dispatch Series.equals to EA.equals --- pandas/core/internals/blocks.py | 5 +++++ pandas/tests/extension/base/methods.py | 25 +++++++----------------- pandas/tests/extension/json/test_json.py | 2 +- pandas/tests/extension/test_numpy.py | 4 ++-- pandas/tests/extension/test_sparse.py | 4 ++-- 5 files changed, 17 insertions(+), 23 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index afca4ca86bd3f..306d2f5832754 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1861,6 +1861,11 @@ def where( return [self.make_block_same_class(result, placement=self.mgr_locs)] + def equals(self, other) -> bool: + if self.dtype != other.dtype or self.shape != other.shape: + return False + return self.values.equals(other.values) + def _unstack(self, unstacker, fill_value, new_placement): # ExtensionArray-safe unstack. # We override ObjectBlock._unstack, which unstacks directly on the diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 04290319bc592..0260e31786ce3 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -422,14 +422,19 @@ def test_repeat_raises(self, data, repeats, kwargs, error, msg, use_numpy): else: data.repeat(repeats, **kwargs) - def test_equals(self, data, na_value): + def test_equals(self, data, na_value, as_series): data2 = type(data)._from_sequence([data[0]] * len(data), dtype=data.dtype) data_na = type(data)._from_sequence([na_value] * len(data), dtype=data.dtype) + if as_series: + data = pd.Series(data) + data2 = pd.Series(data2) + data_na = pd.Series(data_na) + assert data.equals(data) is True assert data.equals(data.copy()) is True - # other data + # unequal other data assert data.equals(data2) is False assert data.equals(data_na) is False @@ -442,19 +447,3 @@ def test_equals(self, data, na_value): # other types assert data.equals(None) is False assert data[[0]].equals(data[0]) is False - - # TODO test series - # ser = pd.Series(data) - # smaller_ser = pd.Series(data[:5]) - # na_ser = pd.Series(type(data)._from_sequence([na_value], dtype=data.dtype)) - - # assert ser.equals(ser) - # assert na_ser.equals(na_ser) - - # assert not data.equals(na_value) - # assert not na_ser.equals(ser) - # assert not ser.equals(na_ser) - # assert not ser.equals(smaller_ser) - # assert not ser.equals(np.asarray(data)) - # assert not ser.equals(0) - # assert not na_ser.equals(0) diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index b2b202039b8de..847be7515377c 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -248,7 +248,7 @@ def test_searchsorted(self, data_for_sorting): super().test_searchsorted(data_for_sorting) @pytest.mark.skip(reason="Can't compare dicts.") - def test_equals(self, data, na_value): + def test_equals(self, data, na_value, as_series): pass diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index aa2f9ea3257f4..cc959e86684e8 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -262,9 +262,9 @@ def test_diff(self, data, periods): return super().test_diff(data, periods) @skip_nested - def test_equals(self, data, na_value): + def test_equals(self, data, na_value, as_series): # Fails creating with _from_sequence - super().test_equals(data, na_value) + super().test_equals(data, na_value, as_series) @skip_nested diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index bd6b766f4bebb..8ede30a37a26a 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -316,9 +316,9 @@ def test_shift_0_periods(self, data): data._sparse_values[0] = data._sparse_values[1] assert result._sparse_values[0] != result._sparse_values[1] - def test_equals(self, data, na_value): + def test_equals(self, data, na_value, as_series): self._check_unsupported(data) - super().test_equals(data, na_value) + super().test_equals(data, na_value, as_series) class TestCasting(BaseSparseTests, base.BaseCastingTests): From 8d052adab6476b5b13cba293486f234a5a3f99e6 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 2 May 2020 09:56:03 +0200 Subject: [PATCH 10/15] feedback: docs, whatsnew, dataframe test, strict dtype test --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/_testing.py | 4 +++- pandas/core/arrays/base.py | 12 ++++++++++-- pandas/tests/arrays/integer/test_comparison.py | 10 ++++++++++ pandas/tests/extension/base/methods.py | 13 ++++++++----- pandas/tests/extension/test_numpy.py | 5 +++-- pandas/tests/extension/test_sparse.py | 5 +++-- 7 files changed, 38 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 4ad186d0ea6ea..cdc631b764365 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -150,6 +150,7 @@ Other enhancements such as ``dict`` and ``list``, mirroring the behavior of :meth:`DataFrame.update` (:issue:`33215`) - :meth:`~pandas.core.groupby.GroupBy.transform` and :meth:`~pandas.core.groupby.GroupBy.aggregate` has gained ``engine`` and ``engine_kwargs`` arguments that supports executing functions with ``Numba`` (:issue:`32854`, :issue:`33388`) - :meth:`~pandas.core.resample.Resampler.interpolate` now supports SciPy interpolation method :class:`scipy.interpolate.CubicSpline` as method ``cubicspline`` (:issue:`33670`) +- The ``ExtensionArray`` class has now an ``equals`` method, similarly to ``Series.equals()`` (:issue:`27081`). - .. --------------------------------------------------------------------------- diff --git a/pandas/_testing.py b/pandas/_testing.py index eb4eb86c78b2d..1d105ebf0e1af 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -1490,7 +1490,9 @@ def box_expected(expected, box_cls, transpose=True): ------- subclass of box_cls """ - if box_cls is pd.Index: + if box_cls is pd.array: + expected = pd.array(expected) + elif box_cls is pd.Index: expected = pd.Index(expected) elif box_cls is pd.Series: expected = pd.Series(expected) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 6620357402434..63e041a4dfa59 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -335,13 +335,19 @@ def __iter__(self): for i in range(len(self)): yield self[i] - def __eq__(self, other: Any) -> Union[np.ndarray, "ExtensionArray"]: + def __eq__(self, other: Any) -> ArrayLike: """ Return for `self == other` (element-wise equality). """ + # Implementer note: this should return a boolean numpy ndarray or + # a boolean ExtensionArray. + # When `other` is one of Series, Index, or DataFrame, this method should + # return NotImplemented (to ensure that those objects are responsible for + # first unpacking the arrays, and then dispatch the operation to the + # underlying arrays) raise AbstractMethodError(self) - def __ne__(self, other: Any) -> Union[np.ndarray, "ExtensionArray"]: + def __ne__(self, other: Any) -> ArrayLike: """ Return for `self != other` (element-wise in-equality). """ @@ -713,6 +719,8 @@ def equals(self, other: "ExtensionArray") -> bool: """ if not type(self) == type(other): return False + elif not self.dtype == other.dtype: + return False elif not len(self) == len(other): return False else: diff --git a/pandas/tests/arrays/integer/test_comparison.py b/pandas/tests/arrays/integer/test_comparison.py index d76ed2c21ca0e..1767250af09b0 100644 --- a/pandas/tests/arrays/integer/test_comparison.py +++ b/pandas/tests/arrays/integer/test_comparison.py @@ -104,3 +104,13 @@ def test_compare_to_int(self, any_nullable_int_dtype, all_compare_operators): expected[s2.isna()] = pd.NA self.assert_series_equal(result, expected) + + +def test_equals(): + # GH-30652 + # equals is generally tested in /tests/extension/base/methods, but this + # specifically tests that two arrays of the same class but different dtype + # do not evaluate equal + a1 = pd.array([1, 2, None], dtype="Int64") + a2 = pd.array([1, 2, None], dtype="Int32") + assert a1.equals(a2) is False diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 0260e31786ce3..4a6d827b36b02 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -422,14 +422,17 @@ def test_repeat_raises(self, data, repeats, kwargs, error, msg, use_numpy): else: data.repeat(repeats, **kwargs) - def test_equals(self, data, na_value, as_series): + @pytest.mark.parametrize("box", [pd.array, pd.Series, pd.DataFrame]) + def test_equals(self, data, na_value, as_series, box): data2 = type(data)._from_sequence([data[0]] * len(data), dtype=data.dtype) data_na = type(data)._from_sequence([na_value] * len(data), dtype=data.dtype) - if as_series: - data = pd.Series(data) - data2 = pd.Series(data2) - data_na = pd.Series(data_na) + data = tm.box_expected(data, box, transpose=False) + data2 = tm.box_expected(data2, box, transpose=False) + data_na = tm.box_expected(data_na, box, transpose=False) + + # we are asserting with `is True/False` explicitly, to test that the + # result is an actual Python bool, and not something "truthy" assert data.equals(data) is True assert data.equals(data.copy()) is True diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index cc959e86684e8..8e2a3a4a2c784 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -262,9 +262,10 @@ def test_diff(self, data, periods): return super().test_diff(data, periods) @skip_nested - def test_equals(self, data, na_value, as_series): + @pytest.mark.parametrize("box", [pd.array, pd.Series, pd.DataFrame]) + def test_equals(self, data, na_value, as_series, box): # Fails creating with _from_sequence - super().test_equals(data, na_value, as_series) + super().test_equals(data, na_value, as_series, box) @skip_nested diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 8ede30a37a26a..e59b3f0600867 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -316,9 +316,10 @@ def test_shift_0_periods(self, data): data._sparse_values[0] = data._sparse_values[1] assert result._sparse_values[0] != result._sparse_values[1] - def test_equals(self, data, na_value, as_series): + @pytest.mark.parametrize("box", [pd.array, pd.Series, pd.DataFrame]) + def test_equals(self, data, na_value, as_series, box): self._check_unsupported(data) - super().test_equals(data, na_value, as_series) + super().test_equals(data, na_value, as_series, box) class TestCasting(BaseSparseTests, base.BaseCastingTests): From 9ee034e4794dbf29410d34a1d2c3df8858cdb990 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 2 May 2020 09:58:36 +0200 Subject: [PATCH 11/15] add to reference docs --- doc/source/reference/extensions.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/reference/extensions.rst b/doc/source/reference/extensions.rst index 4c0763e091b75..fe4113d100abf 100644 --- a/doc/source/reference/extensions.rst +++ b/doc/source/reference/extensions.rst @@ -45,6 +45,7 @@ objects. api.extensions.ExtensionArray.copy api.extensions.ExtensionArray.view api.extensions.ExtensionArray.dropna + api.extensions.ExtensionArray.equals api.extensions.ExtensionArray.factorize api.extensions.ExtensionArray.fillna api.extensions.ExtensionArray.isna From 38501e60c0c2ca159fc868742fcdc8ed01ec4443 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 2 May 2020 10:12:54 +0200 Subject: [PATCH 12/15] remove IntervalArray.__ne__ --- pandas/core/arrays/interval.py | 3 --- pandas/tests/extension/base/ops.py | 6 ++++++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 66faca29670cb..8cac909b70802 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -606,9 +606,6 @@ def __eq__(self, other): return result - def __ne__(self, other): - return ~self.__eq__(other) - def fillna(self, value=None, method=None, limit=None): """ Fill NA/NaN values using the specified method. diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index e446ff8178226..188893c8b067c 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -174,6 +174,12 @@ def test_direct_arith_with_series_returns_not_implemented(self, data): else: raise pytest.skip(f"{type(data).__name__} does not implement __eq__") + if hasattr(data, "__ne__"): + result = data.__ne__(other) + assert result is NotImplemented + else: + raise pytest.skip(f"{type(data).__name__} does not implement __ne__") + class BaseUnaryOpsTests(BaseOpsUtil): def test_invert(self, data): From dccec7fcca1c3038e8a0211c443adcdfd3812332 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 5 May 2020 13:49:11 +0200 Subject: [PATCH 13/15] type ignore following mypy issue (mypy/2783) --- pandas/core/arrays/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 63e041a4dfa59..84e1e6187482b 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -335,7 +335,7 @@ def __iter__(self): for i in range(len(self)): yield self[i] - def __eq__(self, other: Any) -> ArrayLike: + def __eq__(self, other: Any) -> ArrayLike: # type: ignore[override] # NOQA """ Return for `self == other` (element-wise equality). """ @@ -347,7 +347,7 @@ def __eq__(self, other: Any) -> ArrayLike: # underlying arrays) raise AbstractMethodError(self) - def __ne__(self, other: Any) -> ArrayLike: + def __ne__(self, other: Any) -> ArrayLike: # type: ignore[override] # NOQA """ Return for `self != other` (element-wise in-equality). """ From b8be8589c962453f5639d48002aad79c522eaf5c Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 7 May 2020 22:23:13 +0200 Subject: [PATCH 14/15] try again without type: ignore --- pandas/core/arrays/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 00becfd43adc2..1761b49d77165 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -335,7 +335,7 @@ def __iter__(self): for i in range(len(self)): yield self[i] - def __eq__(self, other: Any) -> ArrayLike: # type: ignore[override] # NOQA + def __eq__(self, other: Any) -> ArrayLike: """ Return for `self == other` (element-wise equality). """ @@ -347,7 +347,7 @@ def __eq__(self, other: Any) -> ArrayLike: # type: ignore[override] # NOQA # underlying arrays) raise AbstractMethodError(self) - def __ne__(self, other: Any) -> ArrayLike: # type: ignore[override] # NOQA + def __ne__(self, other: Any) -> ArrayLike: """ Return for `self != other` (element-wise in-equality). """ From 4c7273f7c8d938668d64f27de2021ee2343858da Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 8 May 2020 13:10:12 +0200 Subject: [PATCH 15/15] updates --- doc/source/whatsnew/v1.1.0.rst | 3 ++- pandas/core/arrays/base.py | 7 +++++-- pandas/core/internals/blocks.py | 2 -- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 351e3c7ad4ce9..3ce0db2cf38d0 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -150,7 +150,8 @@ Other enhancements such as ``dict`` and ``list``, mirroring the behavior of :meth:`DataFrame.update` (:issue:`33215`) - :meth:`~pandas.core.groupby.GroupBy.transform` and :meth:`~pandas.core.groupby.GroupBy.aggregate` has gained ``engine`` and ``engine_kwargs`` arguments that supports executing functions with ``Numba`` (:issue:`32854`, :issue:`33388`) - :meth:`~pandas.core.resample.Resampler.interpolate` now supports SciPy interpolation method :class:`scipy.interpolate.CubicSpline` as method ``cubicspline`` (:issue:`33670`) -- The ``ExtensionArray`` class has now an ``equals`` method, similarly to ``Series.equals()`` (:issue:`27081`). +- The ``ExtensionArray`` class has now an :meth:`~pandas.arrays.ExtensionArray.equals` + method, similarly to :meth:`Series.equals` (:issue:`27081`). - .. --------------------------------------------------------------------------- diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 1761b49d77165..0c5634a932e12 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -706,16 +706,19 @@ def equals(self, other: "ExtensionArray") -> bool: """ Return if another array is equivalent to this array. + Equivalent means that both arrays have the same shape and dtype, and + all values compare equal. Missing values in the same location are + considered equal (in contrast with normal equality). + Parameters ---------- - other: ExtensionArray + other : ExtensionArray Array to compare to this Array. Returns ------- boolean Whether the arrays are equivalent. - """ if not type(self) == type(other): return False diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index c1e0fa7237185..d22adf2aaf179 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1865,8 +1865,6 @@ def where( return [self.make_block_same_class(result, placement=self.mgr_locs)] def equals(self, other) -> bool: - if self.dtype != other.dtype or self.shape != other.shape: - return False return self.values.equals(other.values) def _unstack(self, unstacker, fill_value, new_placement):