Skip to content

Commit a469624

Browse files
committed
fix bug #GH23020
1 parent 85dc171 commit a469624

File tree

3 files changed

+16
-0
lines changed

3 files changed

+16
-0
lines changed

doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -946,6 +946,7 @@ Reshaping
946946
- Bug in :func:`merge` when merging ``datetime64[ns, tz]`` data that contained a DST transition (:issue:`18885`)
947947
- Bug in :func:`merge_asof` when merging on float values within defined tolerance (:issue:`22981`)
948948
- Bug in :func:`pandas.concat` when concatenating a multicolumn DataFrame with tz-aware data against a DataFrame with a different number of columns (:issue`22796`)
949+
- Bug in :func:`pandas.merge` when merging on an Integer extension array (:issue:`23020`)
949950

950951
.. _whatsnew_0240.bug_fixes.sparse:
951952

pandas/core/reshape/merge.py

+5
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
is_number,
2727
is_numeric_dtype,
2828
is_integer,
29+
is_extension_array_dtype,
2930
is_int_or_datetime_dtype,
3031
is_dtype_equal,
3132
is_bool,
@@ -1551,6 +1552,10 @@ def _factorize_keys(lk, rk, sort=True):
15511552

15521553
lk = ensure_int64(lk.codes)
15531554
rk = ensure_int64(rk)
1555+
elif is_extension_array_dtype(lk) and is_extension_array_dtype(rk):
1556+
klass = libhashtable.Factorizer
1557+
lk = ensure_object(lk)
1558+
rk = ensure_object(rk)
15541559
elif is_int_or_datetime_dtype(lk) and is_int_or_datetime_dtype(rk):
15551560
klass = libhashtable.Int64Factorizer
15561561
lk = ensure_int64(com.values_from_object(lk))

pandas/tests/reshape/merge/test_merge.py

+10
Original file line numberDiff line numberDiff line change
@@ -1842,6 +1842,16 @@ def test_merging_with_bool_or_int_cateorical_column(self, category_column,
18421842
CDT(categories, ordered=ordered))
18431843
assert_frame_equal(expected, result)
18441844

1845+
def test_merge_on_int_array(self):
1846+
# GH 23020
1847+
df = pd.DataFrame({'A': pd.Series([1, 2, np.nan], dtype='Int64'),
1848+
'B': 1})
1849+
result = pd.merge(df, df, on='A')
1850+
expected = pd.DataFrame({'A': pd.Series([1, 2, np.nan], dtype='Int64'),
1851+
'B_x': 1,
1852+
'B_y': 1})
1853+
assert_frame_equal(result, expected, check_dtype=True)
1854+
18451855

18461856
@pytest.fixture
18471857
def left_df():

0 commit comments

Comments
 (0)