From 75ba47c469f44b75158a7ead931c2d7e70a81d97 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Mon, 13 Sep 2021 22:24:38 +0530 Subject: [PATCH 1/3] REGR: Outer merge failing with integer and NaN keys --- doc/source/whatsnew/v1.3.4.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.4.rst b/doc/source/whatsnew/v1.3.4.rst index 273686f0aaa8f..519a3ea711f54 100644 --- a/doc/source/whatsnew/v1.3.4.rst +++ b/doc/source/whatsnew/v1.3.4.rst @@ -14,7 +14,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ -- +- Fixed regression in :meth:`merge` with integer and ``NaN`` keys failing with ``outer`` merge (:issue:`43550`) - .. --------------------------------------------------------------------------- From 74f8fcf54a21b5a8acb41866e38518e7ff04da86 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Mon, 13 Sep 2021 22:25:47 +0530 Subject: [PATCH 2/3] REGR: Outer merge failing with integer and NaN keys --- pandas/core/reshape/merge.py | 3 ++- pandas/tests/reshape/merge/test_merge.py | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index bdba1249ffafe..6dc95a19d8d53 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -37,6 +37,7 @@ Substitution, ) +from pandas.core.dtypes.cast import find_common_type from pandas.core.dtypes.common import ( ensure_float64, ensure_int64, @@ -912,7 +913,7 @@ def _maybe_add_join_keys( result_dtype = lvals.dtype else: key_col = Index(lvals).where(~mask_left, rvals) - result_dtype = lvals.dtype + result_dtype = find_common_type([lvals.dtype, rvals.dtype]) if result._is_label_reference(name): result[name] = Series( diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 5c07a9662359e..147575df08a31 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2571,3 +2571,18 @@ def test_mergeerror_on_left_index_mismatched_dtypes(): df_2 = DataFrame(data=["X"], columns=["C"], index=[999]) with pytest.raises(MergeError, match="Can only pass argument"): merge(df_1, df_2, on=["C"], left_index=True) + + +def test_merge_outer_with_NaN(): + # GH#43550 + left = DataFrame({"key": [1, 2], "col1": [1, 2]}) + right = DataFrame({"key": [np.nan, np.nan], "col2": [3, 4]}) + result = merge(left, right, on="key", how="outer") + expected = DataFrame( + { + "key": [1, 2, np.nan, np.nan], + "col1": [1, 2, np.nan, np.nan], + "col2": [np.nan, np.nan, 3, 4], + } + ) + tm.assert_frame_equal(result, expected) From 3aa7c392312a82d1435d942fad3a06c7a95c6ad7 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Tue, 14 Sep 2021 10:33:35 +0530 Subject: [PATCH 3/3] changed test as per suggestion --- pandas/tests/reshape/merge/test_merge.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 147575df08a31..060c37c207a2a 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2573,16 +2573,30 @@ def test_mergeerror_on_left_index_mismatched_dtypes(): merge(df_1, df_2, on=["C"], left_index=True) -def test_merge_outer_with_NaN(): +@pytest.mark.parametrize("dtype", [None, "Int64"]) +def test_merge_outer_with_NaN(dtype): # GH#43550 - left = DataFrame({"key": [1, 2], "col1": [1, 2]}) - right = DataFrame({"key": [np.nan, np.nan], "col2": [3, 4]}) + left = DataFrame({"key": [1, 2], "col1": [1, 2]}, dtype=dtype) + right = DataFrame({"key": [np.nan, np.nan], "col2": [3, 4]}, dtype=dtype) result = merge(left, right, on="key", how="outer") expected = DataFrame( { "key": [1, 2, np.nan, np.nan], "col1": [1, 2, np.nan, np.nan], "col2": [np.nan, np.nan, 3, 4], - } + }, + dtype=dtype, + ) + tm.assert_frame_equal(result, expected) + + # switch left and right + result = merge(right, left, on="key", how="outer") + expected = DataFrame( + { + "key": [np.nan, np.nan, 1, 2], + "col2": [3, 4, np.nan, np.nan], + "col1": [np.nan, np.nan, 1, 2], + }, + dtype=dtype, ) tm.assert_frame_equal(result, expected)