Skip to content

Commit f7eb2cc

Browse files
authored
Backport PR #54755 on branch 2.1.x (BUG: merge raising for ea int and numpy float) (#54779)
BUG: merge raising for ea int and numpy float (#54755) * BUG: merge raising for ea int and numpy float * Fix up mypy and add check (cherry picked from commit 9939c32)
1 parent d42fbed commit f7eb2cc

File tree

3 files changed

+40
-0
lines changed

3 files changed

+40
-0
lines changed

doc/source/whatsnew/v2.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -816,6 +816,7 @@ Reshaping
816816
- Bug in :func:`merge_asof` raising ``KeyError`` for extension dtypes (:issue:`52904`)
817817
- Bug in :func:`merge_asof` raising ``ValueError`` for data backed by read-only ndarrays (:issue:`53513`)
818818
- Bug in :func:`merge_asof` with ``left_index=True`` or ``right_index=True`` with mismatched index dtypes giving incorrect results in some cases instead of raising ``MergeError`` (:issue:`53870`)
819+
- Bug in :func:`merge` when merging on integer ``ExtensionDtype`` and float NumPy dtype raising ``TypeError`` (:issue:`46178`)
819820
- Bug in :meth:`DataFrame.agg` and :meth:`Series.agg` on non-unique columns would return incorrect type when dist-like argument passed in (:issue:`51099`)
820821
- Bug in :meth:`DataFrame.combine_first` ignoring other's columns if ``other`` is empty (:issue:`53792`)
821822
- Bug in :meth:`DataFrame.idxmin` and :meth:`DataFrame.idxmax`, where the axis dtype would be lost for empty frames (:issue:`53265`)

pandas/core/reshape/merge.py

+16
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
ensure_object,
5454
is_bool,
5555
is_bool_dtype,
56+
is_extension_array_dtype,
5657
is_float_dtype,
5758
is_integer,
5859
is_integer_dtype,
@@ -1385,6 +1386,21 @@ def _maybe_coerce_merge_keys(self) -> None:
13851386
if lk.dtype.kind == rk.dtype.kind:
13861387
continue
13871388

1389+
if is_extension_array_dtype(lk.dtype) and not is_extension_array_dtype(
1390+
rk.dtype
1391+
):
1392+
ct = find_common_type([lk.dtype, rk.dtype])
1393+
if is_extension_array_dtype(ct):
1394+
rk = ct.construct_array_type()._from_sequence(rk) # type: ignore[union-attr] # noqa: E501
1395+
else:
1396+
rk = rk.astype(ct) # type: ignore[arg-type]
1397+
elif is_extension_array_dtype(rk.dtype):
1398+
ct = find_common_type([lk.dtype, rk.dtype])
1399+
if is_extension_array_dtype(ct):
1400+
lk = ct.construct_array_type()._from_sequence(lk) # type: ignore[union-attr] # noqa: E501
1401+
else:
1402+
lk = lk.astype(ct) # type: ignore[arg-type]
1403+
13881404
# check whether ints and floats
13891405
if is_integer_dtype(rk.dtype) and is_float_dtype(lk.dtype):
13901406
# GH 47391 numpy > 1.24 will raise a RuntimeError for nan -> int

pandas/tests/reshape/merge/test_merge.py

+23
Original file line numberDiff line numberDiff line change
@@ -2847,3 +2847,26 @@ def test_merge_multiindex_single_level():
28472847

28482848
result = df.merge(df2, left_on=["col"], right_index=True, how="left")
28492849
tm.assert_frame_equal(result, expected)
2850+
2851+
2852+
def test_merge_ea_int_and_float_numpy():
2853+
# GH#46178
2854+
df1 = DataFrame([1.0, np.nan], dtype=pd.Int64Dtype())
2855+
df2 = DataFrame([1.5])
2856+
expected = DataFrame(columns=[0], dtype="Int64")
2857+
2858+
with tm.assert_produces_warning(UserWarning, match="You are merging"):
2859+
result = df1.merge(df2)
2860+
tm.assert_frame_equal(result, expected)
2861+
2862+
with tm.assert_produces_warning(UserWarning, match="You are merging"):
2863+
result = df2.merge(df1)
2864+
tm.assert_frame_equal(result, expected.astype("float64"))
2865+
2866+
df2 = DataFrame([1.0])
2867+
expected = DataFrame([1], columns=[0], dtype="Int64")
2868+
result = df1.merge(df2)
2869+
tm.assert_frame_equal(result, expected)
2870+
2871+
result = df2.merge(df1)
2872+
tm.assert_frame_equal(result, expected.astype("float64"))

0 commit comments

Comments
 (0)