diff --git a/doc/source/whatsnew/v1.3.4.rst b/doc/source/whatsnew/v1.3.4.rst index 273686f0aaa8f..519a3ea711f54 100644 --- a/doc/source/whatsnew/v1.3.4.rst +++ b/doc/source/whatsnew/v1.3.4.rst @@ -14,7 +14,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ -- +- Fixed regression in :meth:`merge` with integer and ``NaN`` keys failing with ``outer`` merge (:issue:`43550`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index fd30dea23e1a4..55d8dfa94f89e 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -36,6 +36,7 @@ Substitution, ) +from pandas.core.dtypes.cast import find_common_type from pandas.core.dtypes.common import ( ensure_float64, ensure_int64, @@ -911,7 +912,7 @@ def _maybe_add_join_keys( result_dtype = lvals.dtype else: key_col = Index(lvals).where(~mask_left, rvals) - result_dtype = lvals.dtype + result_dtype = find_common_type([lvals.dtype, rvals.dtype]) if result._is_label_reference(name): result[name] = Series( diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 52a0fd9ed81ca..51d7b6ae1af2a 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2542,3 +2542,32 @@ def test_mergeerror_on_left_index_mismatched_dtypes(): df_2 = DataFrame(data=["X"], columns=["C"], index=[999]) with pytest.raises(MergeError, match="Can only pass argument"): merge(df_1, df_2, on=["C"], left_index=True) + + +@pytest.mark.parametrize("dtype", [None, "Int64"]) +def test_merge_outer_with_NaN(dtype): + # GH#43550 + left = DataFrame({"key": [1, 2], "col1": [1, 2]}, dtype=dtype) + right = DataFrame({"key": [np.nan, np.nan], "col2": [3, 4]}, dtype=dtype) + result = merge(left, right, on="key", how="outer") + expected = DataFrame( + { + "key": [1, 2, np.nan, np.nan], + "col1": [1, 2, np.nan, np.nan], + "col2": [np.nan, np.nan, 3, 4], + }, + dtype=dtype, + ) + tm.assert_frame_equal(result, expected) + + # switch left and right + result = merge(right, left, on="key", how="outer") + expected = DataFrame( + { + "key": [np.nan, np.nan, 1, 2], + "col2": [3, 4, np.nan, np.nan], + "col1": [np.nan, np.nan, 1, 2], + }, + dtype=dtype, + ) + tm.assert_frame_equal(result, expected)