Skip to content

Commit f99a907

Browse files
committed
fix bug #GH23020
1 parent 669cb27 commit f99a907

File tree

3 files changed

+21
-1
lines changed

3 files changed

+21
-1
lines changed

doc/source/whatsnew/v0.24.0.rst

+6
Original file line numberDiff line numberDiff line change
@@ -1564,6 +1564,12 @@ Sparse
15641564
- Bug in :meth:`SparseArray.nonzero` and :meth:`SparseDataFrame.dropna` returning shifted/incorrect results (:issue:`21172`)
15651565
- Bug in :meth:`DataFrame.apply` where dtypes would lose sparseness (:issue:`23744`)
15661566

1567+
IntegerArray
1568+
^^^^^^^^^^^^
1569+
1570+
- Bug in :func:`pandas.merge` when merging on an Integer extension array (:issue:`23020`)
1571+
1572+
15671573
Build Changes
15681574
^^^^^^^^^^^^^
15691575

pandas/core/reshape/merge.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
is_bool_dtype, is_categorical_dtype, is_datetime64_dtype,
2020
is_datetime64tz_dtype, is_datetimelike, is_dtype_equal, is_float_dtype,
2121
is_int64_dtype, is_integer, is_integer_dtype, is_list_like, is_number,
22-
is_numeric_dtype, needs_i8_conversion)
22+
is_numeric_dtype, needs_i8_conversion, is_extension_array_dtype)
2323
from pandas.core.dtypes.missing import isnull, na_value_for_dtype
2424

2525
from pandas import Categorical, DataFrame, Index, MultiIndex, Series, Timedelta
@@ -1605,6 +1605,10 @@ def _factorize_keys(lk, rk, sort=True):
16051605

16061606
lk = ensure_int64(lk.codes)
16071607
rk = ensure_int64(rk)
1608+
elif is_extension_array_dtype(lk) and is_extension_array_dtype(rk):
1609+
klass = libhashtable.Factorizer
1610+
lk = ensure_object(lk)
1611+
rk = ensure_object(rk)
16081612
elif is_integer_dtype(lk) and is_integer_dtype(rk):
16091613
# GH#23917 TODO: needs tests for case where lk is integer-dtype
16101614
# and rk is datetime-dtype

pandas/tests/reshape/merge/test_merge.py

+10
Original file line numberDiff line numberDiff line change
@@ -1326,6 +1326,16 @@ def test_merging_with_bool_or_int_cateorical_column(self, category_column,
13261326
CDT(categories, ordered=ordered))
13271327
assert_frame_equal(expected, result)
13281328

1329+
def test_merge_on_int_array(self):
1330+
# GH 23020
1331+
df = pd.DataFrame({'A': pd.Series([1, 2, np.nan], dtype='Int64'),
1332+
'B': 1})
1333+
result = pd.merge(df, df, on='A')
1334+
expected = pd.DataFrame({'A': pd.Series([1, 2, np.nan], dtype='Int64'),
1335+
'B_x': 1,
1336+
'B_y': 1})
1337+
assert_frame_equal(result, expected, check_dtype=True)
1338+
13291339

13301340
@pytest.fixture
13311341
def left_df():

0 commit comments

Comments
 (0)