From c39d527be5f3dc78e3a6bc4aed97d80bbc2a6e84 Mon Sep 17 00:00:00 2001 From: Tobias Pitters Date: Tue, 26 Oct 2021 20:11:19 +0200 Subject: [PATCH 1/4] add nan-case check in missing.pyx --- pandas/_libs/lib.pyx | 9 +++++++++ pandas/_libs/missing.pyx | 9 ++++++++- pandas/tests/series/methods/test_equals.py | 7 +++++++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index ec89e52e2eff7..e070e332ff1d4 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -620,30 +620,39 @@ def array_equivalent_object(left: object[:], right: object[:]) -> bool: object x, y for i in range(n): + # print('in array_equivalent_object') x = left[i] y = right[i] # we are either not equal or both nan # I think None == None will be true here try: + # print('PyObject_RichCompareBool(x, y, Py_EQ): ' + str(PyObject_RichCompareBool(x, y, Py_EQ))) + # print('is_matching_na(x, y, nan_matches_none=True)' + str(is_matching_na(x, y, nan_matches_none=True))) if PyArray_Check(x) and PyArray_Check(y): if not array_equivalent_object(x, y): + # print('line 631, returning False, this is the index: ' + str(i)) return False elif (x is C_NA) ^ (y is C_NA): + # print('first elif') return False elif not ( PyObject_RichCompareBool(x, y, Py_EQ) or is_matching_na(x, y, nan_matches_none=True) ): + # print('second elif') return False except ValueError: + # print('raise Value Error') # Avoid raising ValueError when comparing Numpy arrays to other types if cnp.PyArray_IsAnyScalar(x) != cnp.PyArray_IsAnyScalar(y): # Only compare scalars to scalars and non-scalars to non-scalars + # print('cnp.PyArray_IsAnyScalar(x) != cnp.PyArray_IsAnyScalar(y)') return False elif (not (cnp.PyArray_IsPythonScalar(x) or cnp.PyArray_IsPythonScalar(y)) and not (isinstance(x, type(y)) or isinstance(y, type(x)))): # Check if non-scalars have the same type + # print('(not (cnp.PyArray_IsPythonScalar(x) or cnp.PyArray_IsPythonScalar(y)) and not (isinstance(x, type(y)) or isinstance(y, type(x)))):') return False raise return True diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index 90f409d371e6b..f02211926b3b0 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -59,16 +59,23 @@ cpdef bint is_matching_na(object left, object right, bint nan_matches_none=False bool """ if left is None: + # print('is matching na left is None') if nan_matches_none and util.is_nan(right): return True return right is None elif left is C_NA: + # print('is matching na left is C_NA') return right is C_NA elif left is NaT: + # print('is matching na left is NaT') return right is NaT elif util.is_float_object(left): - if nan_matches_none and right is None: + # print('is matching na left is float object') + if nan_matches_none and right is None and util.is_nan(left): + # print('is matching na left is float and right is None: actually right is ' + str(right)) return True + # print('is matching na left is float and right is NOT None') + # print('str(util.is_nan(left)): ' + str(str(util.is_nan(left))) + ' util.is_float_object(right) ' + str(util.is_float_object(right)) + ' util.is_nan(right) ' + str(util.is_nan(right))) return ( util.is_nan(left) and util.is_float_object(right) diff --git a/pandas/tests/series/methods/test_equals.py b/pandas/tests/series/methods/test_equals.py index 052aef4ac1bab..ea059973b17ee 100644 --- a/pandas/tests/series/methods/test_equals.py +++ b/pandas/tests/series/methods/test_equals.py @@ -125,3 +125,10 @@ def test_equals_none_vs_nan(): assert ser.equals(ser2) assert Index(ser, dtype=ser.dtype).equals(Index(ser2, dtype=ser2.dtype)) assert ser.array.equals(ser2.array) + +def test_equals_None_vs_float(): + left = Series([-np.inf, np.nan, -1.0, 0.0, 1.0, 10 / 3, np.inf], dtype=object) + right = Series([None] * len(left)) + + assert not left.equals(right) + assert not right.equals(left) From f208db4b2afc63368f69bcc94bacdee701fb39e3 Mon Sep 17 00:00:00 2001 From: Tobias Pitters Date: Tue, 26 Oct 2021 20:15:20 +0200 Subject: [PATCH 2/4] add test for failing case --- pandas/_libs/lib.pyx | 9 --------- pandas/_libs/missing.pyx | 7 ------- pandas/tests/series/methods/test_equals.py | 1 + 3 files changed, 1 insertion(+), 16 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index e070e332ff1d4..ec89e52e2eff7 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -620,39 +620,30 @@ def array_equivalent_object(left: object[:], right: object[:]) -> bool: object x, y for i in range(n): - # print('in array_equivalent_object') x = left[i] y = right[i] # we are either not equal or both nan # I think None == None will be true here try: - # print('PyObject_RichCompareBool(x, y, Py_EQ): ' + str(PyObject_RichCompareBool(x, y, Py_EQ))) - # print('is_matching_na(x, y, nan_matches_none=True)' + str(is_matching_na(x, y, nan_matches_none=True))) if PyArray_Check(x) and PyArray_Check(y): if not array_equivalent_object(x, y): - # print('line 631, returning False, this is the index: ' + str(i)) return False elif (x is C_NA) ^ (y is C_NA): - # print('first elif') return False elif not ( PyObject_RichCompareBool(x, y, Py_EQ) or is_matching_na(x, y, nan_matches_none=True) ): - # print('second elif') return False except ValueError: - # print('raise Value Error') # Avoid raising ValueError when comparing Numpy arrays to other types if cnp.PyArray_IsAnyScalar(x) != cnp.PyArray_IsAnyScalar(y): # Only compare scalars to scalars and non-scalars to non-scalars - # print('cnp.PyArray_IsAnyScalar(x) != cnp.PyArray_IsAnyScalar(y)') return False elif (not (cnp.PyArray_IsPythonScalar(x) or cnp.PyArray_IsPythonScalar(y)) and not (isinstance(x, type(y)) or isinstance(y, type(x)))): # Check if non-scalars have the same type - # print('(not (cnp.PyArray_IsPythonScalar(x) or cnp.PyArray_IsPythonScalar(y)) and not (isinstance(x, type(y)) or isinstance(y, type(x)))):') return False raise return True diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index f02211926b3b0..b77db2aec4a08 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -59,23 +59,16 @@ cpdef bint is_matching_na(object left, object right, bint nan_matches_none=False bool """ if left is None: - # print('is matching na left is None') if nan_matches_none and util.is_nan(right): return True return right is None elif left is C_NA: - # print('is matching na left is C_NA') return right is C_NA elif left is NaT: - # print('is matching na left is NaT') return right is NaT elif util.is_float_object(left): - # print('is matching na left is float object') if nan_matches_none and right is None and util.is_nan(left): - # print('is matching na left is float and right is None: actually right is ' + str(right)) return True - # print('is matching na left is float and right is NOT None') - # print('str(util.is_nan(left)): ' + str(str(util.is_nan(left))) + ' util.is_float_object(right) ' + str(util.is_float_object(right)) + ' util.is_nan(right) ' + str(util.is_nan(right))) return ( util.is_nan(left) and util.is_float_object(right) diff --git a/pandas/tests/series/methods/test_equals.py b/pandas/tests/series/methods/test_equals.py index ea059973b17ee..11da210ceed7b 100644 --- a/pandas/tests/series/methods/test_equals.py +++ b/pandas/tests/series/methods/test_equals.py @@ -126,6 +126,7 @@ def test_equals_none_vs_nan(): assert Index(ser, dtype=ser.dtype).equals(Index(ser2, dtype=ser2.dtype)) assert ser.array.equals(ser2.array) + def test_equals_None_vs_float(): left = Series([-np.inf, np.nan, -1.0, 0.0, 1.0, 10 / 3, np.inf], dtype=object) right = Series([None] * len(left)) From 64566eb8f23a9eacc80462dc828f088602f3d3b6 Mon Sep 17 00:00:00 2001 From: Tobias Pitters Date: Thu, 28 Oct 2021 05:47:04 +0200 Subject: [PATCH 3/4] add whatsnew and adjust test case to PR discussions --- doc/source/whatsnew/v1.3.5.rst | 2 +- pandas/tests/series/methods/test_equals.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.5.rst b/doc/source/whatsnew/v1.3.5.rst index 0f1997de2166a..a54776ca0dbfe 100644 --- a/doc/source/whatsnew/v1.3.5.rst +++ b/doc/source/whatsnew/v1.3.5.rst @@ -14,7 +14,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ -- +- Fixed regression in :meth:`Series.equals` when comparing floats with dtype object to None (:issue:`44190`) - .. --------------------------------------------------------------------------- diff --git a/pandas/tests/series/methods/test_equals.py b/pandas/tests/series/methods/test_equals.py index 11da210ceed7b..77690271c79de 100644 --- a/pandas/tests/series/methods/test_equals.py +++ b/pandas/tests/series/methods/test_equals.py @@ -128,8 +128,16 @@ def test_equals_none_vs_nan(): def test_equals_None_vs_float(): + # GH#44190 left = Series([-np.inf, np.nan, -1.0, 0.0, 1.0, 10 / 3, np.inf], dtype=object) right = Series([None] * len(left)) + # these series were found to be equal due to a bug, check that they are correctly + # found to not equal assert not left.equals(right) assert not right.equals(left) + assert not left.to_frame().equals(right.to_frame()) + assert not right.to_frame().equals(left.to_frame()) + assert not Index(left, dtype='object').equals(Index(right, dtype='object')) + assert not Index(right, dtype='object').equals(Index(left, dtype='object')) + From a1dc3042cb50e90f528cab6f1f2736c403e50898 Mon Sep 17 00:00:00 2001 From: Tobias Pitters Date: Thu, 28 Oct 2021 05:51:42 +0200 Subject: [PATCH 4/4] use black to reformat --- pandas/tests/series/methods/test_equals.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/tests/series/methods/test_equals.py b/pandas/tests/series/methods/test_equals.py index 77690271c79de..22e27c271df88 100644 --- a/pandas/tests/series/methods/test_equals.py +++ b/pandas/tests/series/methods/test_equals.py @@ -138,6 +138,5 @@ def test_equals_None_vs_float(): assert not right.equals(left) assert not left.to_frame().equals(right.to_frame()) assert not right.to_frame().equals(left.to_frame()) - assert not Index(left, dtype='object').equals(Index(right, dtype='object')) - assert not Index(right, dtype='object').equals(Index(left, dtype='object')) - + assert not Index(left, dtype="object").equals(Index(right, dtype="object")) + assert not Index(right, dtype="object").equals(Index(left, dtype="object"))