Skip to content

Commit f027cb8

Browse files
committed
fix bug #GH23020
1 parent 8586644 commit f027cb8

File tree

3 files changed

+53
-0
lines changed

3 files changed

+53
-0
lines changed

doc/source/whatsnew/v0.24.0.txt

+6
Original file line numberDiff line numberDiff line change
@@ -1332,6 +1332,12 @@ Sparse
13321332
- Bug in unary inversion operator (``~``) on a ``SparseSeries`` with boolean values. The performance of this has also been improved (:issue:`22835`)
13331333
- Bug in :meth:`SparseArary.unique` not returning the unique values (:issue:`19595`)
13341334

1335+
IntegerArray
1336+
^^^^^^^^^^^^
1337+
1338+
- Bug in :func:`pandas.merge` when merging on an Integer extension array (:issue:`23020`)
1339+
1340+
13351341
Build Changes
13361342
^^^^^^^^^^^^^
13371343

pandas/core/reshape/merge.py

+37
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,43 @@
88

99
import numpy as np
1010

11+
<<<<<<< HEAD
1112
from pandas._libs import hashtable as libhashtable, join as libjoin, lib
1213
import pandas.compat as compat
1314
from pandas.compat import filter, lzip, map, range, zip
1415
from pandas.errors import MergeError
16+
=======
17+
from pandas import (Categorical, DataFrame,
18+
Index, MultiIndex, Timedelta, Series)
19+
from pandas.core.arrays.categorical import _recode_for_categories
20+
from pandas.core.frame import _merge_doc
21+
from pandas.core.dtypes.common import (
22+
is_datetime64tz_dtype,
23+
is_datetime64_dtype,
24+
needs_i8_conversion,
25+
is_int64_dtype,
26+
is_array_like,
27+
is_categorical_dtype,
28+
is_integer_dtype,
29+
is_float_dtype,
30+
is_number,
31+
is_numeric_dtype,
32+
is_integer,
33+
is_extension_array_dtype,
34+
is_int_or_datetime_dtype,
35+
is_dtype_equal,
36+
is_bool,
37+
is_bool_dtype,
38+
is_list_like,
39+
is_datetimelike,
40+
ensure_int64,
41+
ensure_float64,
42+
ensure_object,
43+
_get_dtype)
44+
from pandas.core.dtypes.missing import na_value_for_dtype
45+
from pandas.core.internals import (items_overlap_with_suffix,
46+
concatenate_block_managers)
47+
>>>>>>> fix bug #GH23020
1548
from pandas.util._decorators import Appender, Substitution
1649

1750
from pandas.core.dtypes.common import (
@@ -1518,6 +1551,10 @@ def _factorize_keys(lk, rk, sort=True):
15181551

15191552
lk = ensure_int64(lk.codes)
15201553
rk = ensure_int64(rk)
1554+
elif is_extension_array_dtype(lk) and is_extension_array_dtype(rk):
1555+
klass = libhashtable.Factorizer
1556+
lk = ensure_object(lk)
1557+
rk = ensure_object(rk)
15211558
elif is_int_or_datetime_dtype(lk) and is_int_or_datetime_dtype(rk):
15221559
klass = libhashtable.Int64Factorizer
15231560
lk = ensure_int64(com.values_from_object(lk))

pandas/tests/reshape/merge/test_merge.py

+10
Original file line numberDiff line numberDiff line change
@@ -1842,6 +1842,16 @@ def test_merging_with_bool_or_int_cateorical_column(self, category_column,
18421842
CDT(categories, ordered=ordered))
18431843
assert_frame_equal(expected, result)
18441844

1845+
def test_merge_on_int_array(self):
1846+
# GH 23020
1847+
df = pd.DataFrame({'A': pd.Series([1, 2, np.nan], dtype='Int64'),
1848+
'B': 1})
1849+
result = pd.merge(df, df, on='A')
1850+
expected = pd.DataFrame({'A': pd.Series([1, 2, np.nan], dtype='Int64'),
1851+
'B_x': 1,
1852+
'B_y': 1})
1853+
assert_frame_equal(result, expected, check_dtype=True)
1854+
18451855

18461856
@pytest.fixture
18471857
def left_df():

0 commit comments

Comments
 (0)