pandas-dev · phofl · Oct 14, 2022 · Oct 5, 2022 · Oct 6, 2022 · Oct 6, 2022
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -218,6 +218,8 @@ Indexing
 - Bug in :meth:`DataFrame.reindex` filling with wrong values when indexing columns and index for ``uint`` dtypes (:issue:`48184`)
 - Bug in :meth:`DataFrame.reindex` casting dtype to ``object`` when :class:`DataFrame` has single extension array column when re-indexing ``columns`` and ``index`` (:issue:`48190`)
 - Bug in :func:`~DataFrame.describe` when formatting percentiles in the resulting index showed more decimals than needed (:issue:`46362`)
+- Bug in :meth:`DataFrame.compare` does not recognize differences when comparing ``NA`` with value in nullable dtypes (:issue:`48939`)
+-
 
 Missing
 ^^^^^^^

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -9262,6 +9262,7 @@ def compare(
             )
 
         mask = ~((self == other) | (self.isna() & other.isna()))
+        mask.fillna(True, inplace=True)
 
         if not keep_equal:
             self = self.where(mask)

diff --git a/pandas/tests/frame/methods/test_compare.py b/pandas/tests/frame/methods/test_compare.py
@@ -238,17 +238,50 @@ def test_invalid_input_result_names(result_names):
         df1.compare(df2, result_names=result_names)
 
 
-def test_compare_ea_and_np_dtype():
-    # GH#44014
-    df1 = pd.DataFrame({"a": [4.0, 4], "b": [1.0, 2]})
-    df2 = pd.DataFrame({"a": pd.Series([1, pd.NA], dtype="Int64"), "b": [1.0, 2]})
-    result = df1.compare(df2, keep_shape=True)
+@pytest.mark.parametrize(
+    "ea_val,np_dtype_val",
-    "ea_val,np_dtype_val",
+    "df1_val,df2_val",
-    "ea_val,np_dtype_val",
+    "df1_val,df2_val",
+    [(4, pd.NA), (pd.NA, pd.NA), (pd.NA, 4)],
+)
+def test_compare_ea_and_np_dtype(ea_val, np_dtype_val):
+    ea = [4.0, ea_val]
-    ea = [4.0, ea_val]
+    arr = [4.0, df1_val]
-    ea = [4.0, ea_val]
+    arr = [4.0, df1_val]
+    np_dtype = pd.Series([1, np_dtype_val], dtype="Int64")
+
+    ea_df = pd.DataFrame({"a": ea, "b": [1.0, 2]})
+    np_dtype_df = pd.DataFrame({"a": np_dtype, "b": [1.0, 2]})
+    expected = pd.DataFrame(
+        {
+            ("a", "self"): ea,
+            ("a", "other"): np_dtype,
+            ("b", "self"): np.nan,
+            ("b", "other"): np.nan,
+        }
+    )
+    result = ea_df.compare(np_dtype_df, keep_shape=True)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "df1_val,df2_val,diff_self,diff_other",
+    [
+        (4, 3, 4, 3),
+        (4, 4, pd.NA, pd.NA),
+        (4, pd.NA, 4, pd.NA),
+        (pd.NA, pd.NA, pd.NA, pd.NA),
+    ],
+)
+def test_compare_nullable_int64_dtype(df1_val, df2_val, diff_self, diff_other):
+
+    df1 = pd.DataFrame({"a": pd.Series([df1_val, pd.NA], dtype="Int64"), "b": [1.0, 2]})
+    df2 = df1.copy()
+    df2.loc[0, "a"] = df2_val
+
     expected = pd.DataFrame(
         {
-            ("a", "self"): [4.0, np.nan],
-            ("a", "other"): pd.Series([1, pd.NA], dtype="Int64"),
+            ("a", "self"): pd.Series([diff_self, pd.NA], dtype="Int64"),
+            ("a", "other"): pd.Series([diff_other, pd.NA], dtype="Int64"),
             ("b", "self"): np.nan,
             ("b", "other"): np.nan,
         }
     )
+    result = df1.compare(df2, keep_shape=True)
     tm.assert_frame_equal(result, expected)