diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 9d1e0c7485092..475583e5277a7 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -218,6 +218,8 @@ Indexing - Bug in :meth:`DataFrame.reindex` filling with wrong values when indexing columns and index for ``uint`` dtypes (:issue:`48184`) - Bug in :meth:`DataFrame.reindex` casting dtype to ``object`` when :class:`DataFrame` has single extension array column when re-indexing ``columns`` and ``index`` (:issue:`48190`) - Bug in :func:`~DataFrame.describe` when formatting percentiles in the resulting index showed more decimals than needed (:issue:`46362`) +- Bug in :meth:`DataFrame.compare` does not recognize differences when comparing ``NA`` with value in nullable dtypes (:issue:`48939`) +- Missing ^^^^^^^ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ce828ba4a0af4..ca4c5fdeee565 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9262,6 +9262,7 @@ def compare( ) mask = ~((self == other) | (self.isna() & other.isna())) + mask.fillna(True, inplace=True) if not keep_equal: self = self.where(mask) diff --git a/pandas/tests/frame/methods/test_compare.py b/pandas/tests/frame/methods/test_compare.py index 55e5db9603fe5..2c47285d7c507 100644 --- a/pandas/tests/frame/methods/test_compare.py +++ b/pandas/tests/frame/methods/test_compare.py @@ -238,17 +238,51 @@ def test_invalid_input_result_names(result_names): df1.compare(df2, result_names=result_names) -def test_compare_ea_and_np_dtype(): - # GH#44014 - df1 = pd.DataFrame({"a": [4.0, 4], "b": [1.0, 2]}) - df2 = pd.DataFrame({"a": pd.Series([1, pd.NA], dtype="Int64"), "b": [1.0, 2]}) +@pytest.mark.parametrize( + "val1,val2", + [(4, pd.NA), (pd.NA, pd.NA), (pd.NA, 4)], +) +def test_compare_ea_and_np_dtype(val1, val2): + # GH 48966 + arr = [4.0, val1] + ser = pd.Series([1, val2], dtype="Int64") + + df1 = pd.DataFrame({"a": arr, "b": [1.0, 2]}) + df2 = pd.DataFrame({"a": ser, "b": [1.0, 2]}) + expected = pd.DataFrame( + { + ("a", "self"): arr, + ("a", "other"): ser, + ("b", "self"): np.nan, + ("b", "other"): np.nan, + } + ) result = df1.compare(df2, keep_shape=True) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "df1_val,df2_val,diff_self,diff_other", + [ + (4, 3, 4, 3), + (4, 4, pd.NA, pd.NA), + (4, pd.NA, 4, pd.NA), + (pd.NA, pd.NA, pd.NA, pd.NA), + ], +) +def test_compare_nullable_int64_dtype(df1_val, df2_val, diff_self, diff_other): + # GH 48966 + df1 = pd.DataFrame({"a": pd.Series([df1_val, pd.NA], dtype="Int64"), "b": [1.0, 2]}) + df2 = df1.copy() + df2.loc[0, "a"] = df2_val + expected = pd.DataFrame( { - ("a", "self"): [4.0, np.nan], - ("a", "other"): pd.Series([1, pd.NA], dtype="Int64"), + ("a", "self"): pd.Series([diff_self, pd.NA], dtype="Int64"), + ("a", "other"): pd.Series([diff_other, pd.NA], dtype="Int64"), ("b", "self"): np.nan, ("b", "other"): np.nan, } ) + result = df1.compare(df2, keep_shape=True) tm.assert_frame_equal(result, expected)