diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index f5758a079b1b5..19a8500928ab7 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -816,6 +816,7 @@ Reshaping - Bug in :func:`merge_asof` raising ``KeyError`` for extension dtypes (:issue:`52904`) - Bug in :func:`merge_asof` raising ``ValueError`` for data backed by read-only ndarrays (:issue:`53513`) - Bug in :func:`merge_asof` with ``left_index=True`` or ``right_index=True`` with mismatched index dtypes giving incorrect results in some cases instead of raising ``MergeError`` (:issue:`53870`) +- Bug in :func:`merge` when merging on integer ``ExtensionDtype`` and float NumPy dtype raising ``TypeError`` (:issue:`46178`) - Bug in :meth:`DataFrame.agg` and :meth:`Series.agg` on non-unique columns would return incorrect type when dist-like argument passed in (:issue:`51099`) - Bug in :meth:`DataFrame.combine_first` ignoring other's columns if ``other`` is empty (:issue:`53792`) - Bug in :meth:`DataFrame.idxmin` and :meth:`DataFrame.idxmax`, where the axis dtype would be lost for empty frames (:issue:`53265`) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 140a3024a8684..13bc1008698b2 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -53,6 +53,7 @@ ensure_object, is_bool, is_bool_dtype, + is_extension_array_dtype, is_float_dtype, is_integer, is_integer_dtype, @@ -1385,6 +1386,21 @@ def _maybe_coerce_merge_keys(self) -> None: if lk.dtype.kind == rk.dtype.kind: continue + if is_extension_array_dtype(lk.dtype) and not is_extension_array_dtype( + rk.dtype + ): + ct = find_common_type([lk.dtype, rk.dtype]) + if is_extension_array_dtype(ct): + rk = ct.construct_array_type()._from_sequence(rk) # type: ignore[union-attr] # noqa: E501 + else: + rk = rk.astype(ct) # type: ignore[arg-type] + elif is_extension_array_dtype(rk.dtype): + ct = find_common_type([lk.dtype, rk.dtype]) + if is_extension_array_dtype(ct): + lk = ct.construct_array_type()._from_sequence(lk) # type: ignore[union-attr] # noqa: E501 + else: + lk = lk.astype(ct) # type: ignore[arg-type] + # check whether ints and floats if is_integer_dtype(rk.dtype) and is_float_dtype(lk.dtype): # GH 47391 numpy > 1.24 will raise a RuntimeError for nan -> int diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 50a534ad36bcc..02d7e2059e8e1 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2924,3 +2924,26 @@ def test_merge_combinations( expected = expected.reset_index(drop=True) tm.assert_frame_equal(result, expected) + + +def test_merge_ea_int_and_float_numpy(): + # GH#46178 + df1 = DataFrame([1.0, np.nan], dtype=pd.Int64Dtype()) + df2 = DataFrame([1.5]) + expected = DataFrame(columns=[0], dtype="Int64") + + with tm.assert_produces_warning(UserWarning, match="You are merging"): + result = df1.merge(df2) + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(UserWarning, match="You are merging"): + result = df2.merge(df1) + tm.assert_frame_equal(result, expected.astype("float64")) + + df2 = DataFrame([1.0]) + expected = DataFrame([1], columns=[0], dtype="Int64") + result = df1.merge(df2) + tm.assert_frame_equal(result, expected) + + result = df2.merge(df1) + tm.assert_frame_equal(result, expected.astype("float64"))