From 3c7261db44a6961c601db3ea92770ba93aa76fa6 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Fri, 25 Aug 2023 16:28:59 +0200 Subject: [PATCH 1/2] BUG: merge raising for ea int and numpy float --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/core/reshape/merge.py | 10 ++++++++++ pandas/tests/reshape/merge/test_merge.py | 23 +++++++++++++++++++++++ 3 files changed, 34 insertions(+) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index fabe910261c3d..cf1cfc9f0aa42 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -815,6 +815,7 @@ Reshaping - Bug in :func:`merge_asof` raising ``KeyError`` for extension dtypes (:issue:`52904`) - Bug in :func:`merge_asof` raising ``ValueError`` for data backed by read-only ndarrays (:issue:`53513`) - Bug in :func:`merge_asof` with ``left_index=True`` or ``right_index=True`` with mismatched index dtypes giving incorrect results in some cases instead of raising ``MergeError`` (:issue:`53870`) +- Bug in :func:`merge` when merging on integer ``ExtensionDtype`` and float NumPy dtype raising ``TypeError`` (:issue:`46178`) - Bug in :meth:`DataFrame.agg` and :meth:`Series.agg` on non-unique columns would return incorrect type when dist-like argument passed in (:issue:`51099`) - Bug in :meth:`DataFrame.combine_first` ignoring other's columns if ``other`` is empty (:issue:`53792`) - Bug in :meth:`DataFrame.idxmin` and :meth:`DataFrame.idxmax`, where the axis dtype would be lost for empty frames (:issue:`53265`) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 140a3024a8684..a936903ff3ffa 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -53,6 +53,7 @@ ensure_object, is_bool, is_bool_dtype, + is_extension_array_dtype, is_float_dtype, is_integer, is_integer_dtype, @@ -1385,6 +1386,15 @@ def _maybe_coerce_merge_keys(self) -> None: if lk.dtype.kind == rk.dtype.kind: continue + if is_extension_array_dtype(lk.dtype) and not is_extension_array_dtype( + rk.dtype + ): + ct = find_common_type([lk.dtype, rk.dtype]) + rk = ct.construct_array_type()._from_sequence(rk) + elif is_extension_array_dtype(rk.dtype): + ct = find_common_type([lk.dtype, rk.dtype]) + lk = ct.construct_array_type()._from_sequence(lk) + # check whether ints and floats if is_integer_dtype(rk.dtype) and is_float_dtype(lk.dtype): # GH 47391 numpy > 1.24 will raise a RuntimeError for nan -> int diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 50a534ad36bcc..02d7e2059e8e1 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2924,3 +2924,26 @@ def test_merge_combinations( expected = expected.reset_index(drop=True) tm.assert_frame_equal(result, expected) + + +def test_merge_ea_int_and_float_numpy(): + # GH#46178 + df1 = DataFrame([1.0, np.nan], dtype=pd.Int64Dtype()) + df2 = DataFrame([1.5]) + expected = DataFrame(columns=[0], dtype="Int64") + + with tm.assert_produces_warning(UserWarning, match="You are merging"): + result = df1.merge(df2) + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(UserWarning, match="You are merging"): + result = df2.merge(df1) + tm.assert_frame_equal(result, expected.astype("float64")) + + df2 = DataFrame([1.0]) + expected = DataFrame([1], columns=[0], dtype="Int64") + result = df1.merge(df2) + tm.assert_frame_equal(result, expected) + + result = df2.merge(df1) + tm.assert_frame_equal(result, expected.astype("float64")) From cf773a8fc3dc17a045b8e08e004175dafe79254b Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sat, 26 Aug 2023 00:51:23 +0200 Subject: [PATCH 2/2] Fix up mypy and add check --- pandas/core/reshape/merge.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index a936903ff3ffa..13bc1008698b2 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1390,10 +1390,16 @@ def _maybe_coerce_merge_keys(self) -> None: rk.dtype ): ct = find_common_type([lk.dtype, rk.dtype]) - rk = ct.construct_array_type()._from_sequence(rk) + if is_extension_array_dtype(ct): + rk = ct.construct_array_type()._from_sequence(rk) # type: ignore[union-attr] # noqa: E501 + else: + rk = rk.astype(ct) # type: ignore[arg-type] elif is_extension_array_dtype(rk.dtype): ct = find_common_type([lk.dtype, rk.dtype]) - lk = ct.construct_array_type()._from_sequence(lk) + if is_extension_array_dtype(ct): + lk = ct.construct_array_type()._from_sequence(lk) # type: ignore[union-attr] # noqa: E501 + else: + lk = lk.astype(ct) # type: ignore[arg-type] # check whether ints and floats if is_integer_dtype(rk.dtype) and is_float_dtype(lk.dtype):