diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 8db9be21ca4ef..466c8b21e89bf 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -692,6 +692,7 @@ Other - Bug in :meth:`RangeIndex.difference` with ``sort=None`` and ``step<0`` failing to sort (:issue:`44085`) - Bug in :meth:`Series.to_frame` and :meth:`Index.to_frame` ignoring the ``name`` argument when ``name=None`` is explicitly passed (:issue:`44212`) - Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` with ``value=None`` and ExtensionDtypes (:issue:`44270`) +- Bug in :meth:`FloatingArray.equals` failing to consider two arrays equal if they contain ``np.nan`` values (:issue:`44382`) - .. ***DO NOT USE THIS SECTION*** diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index b11b11ded2f22..1797f1aff4235 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -627,6 +627,21 @@ def value_counts(self, dropna: bool = True) -> Series: return Series(counts, index=index) + @doc(ExtensionArray.equals) + def equals(self, other) -> bool: + if type(self) != type(other): + return False + if other.dtype != self.dtype: + return False + + # GH#44382 if e.g. self[1] is np.nan and other[1] is pd.NA, we are NOT + # equal. + return np.array_equal(self._mask, other._mask) and np.array_equal( + self._data[~self._mask], + other._data[~other._mask], + equal_nan=True, + ) + def _reduce(self, name: str, *, skipna: bool = True, **kwargs): if name in {"any", "all"}: return getattr(self, name)(skipna=skipna, **kwargs) diff --git a/pandas/tests/arrays/floating/test_comparison.py b/pandas/tests/arrays/floating/test_comparison.py index c4163c25ae74d..a429649f1ce1d 100644 --- a/pandas/tests/arrays/floating/test_comparison.py +++ b/pandas/tests/arrays/floating/test_comparison.py @@ -1,7 +1,9 @@ +import numpy as np import pytest import pandas as pd import pandas._testing as tm +from pandas.core.arrays import FloatingArray from pandas.tests.arrays.masked_shared import ( ComparisonOps, NumericOps, @@ -34,3 +36,30 @@ def test_equals(): a1 = pd.array([1, 2, None], dtype="Float64") a2 = pd.array([1, 2, None], dtype="Float32") assert a1.equals(a2) is False + + +def test_equals_nan_vs_na(): + # GH#44382 + + mask = np.zeros(3, dtype=bool) + data = np.array([1.0, np.nan, 3.0], dtype=np.float64) + + left = FloatingArray(data, mask) + assert left.equals(left) + tm.assert_extension_array_equal(left, left) + + assert left.equals(left.copy()) + assert left.equals(FloatingArray(data.copy(), mask.copy())) + + mask2 = np.array([False, True, False], dtype=bool) + data2 = np.array([1.0, 2.0, 3.0], dtype=np.float64) + right = FloatingArray(data2, mask2) + assert right.equals(right) + tm.assert_extension_array_equal(right, right) + + assert not left.equals(right) + + # with mask[1] = True, the only difference is data[1], which should + # not matter for equals + mask[1] = True + assert left.equals(right)