Skip to content

Commit 19f5a8c

Browse files
committed
change after review
1 parent f99a907 commit 19f5a8c

File tree

4 files changed

+21
-14
lines changed

4 files changed

+21
-14
lines changed

pandas/core/reshape/merge.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -1605,7 +1605,9 @@ def _factorize_keys(lk, rk, sort=True):
16051605

16061606
lk = ensure_int64(lk.codes)
16071607
rk = ensure_int64(rk)
1608-
elif is_extension_array_dtype(lk) and is_extension_array_dtype(rk):
1608+
elif (is_extension_array_dtype(lk) and
1609+
is_extension_array_dtype(rk) and
1610+
lk.dtype == rk.dtype):
16091611
klass = libhashtable.Factorizer
16101612
lk = ensure_object(lk)
16111613
rk = ensure_object(rk)
@@ -1618,9 +1620,8 @@ def _factorize_keys(lk, rk, sort=True):
16181620
elif (issubclass(lk.dtype.type, (np.timedelta64, np.datetime64)) and
16191621
issubclass(rk.dtype.type, (np.timedelta64, np.datetime64))):
16201622
# GH#23917 TODO: Needs tests for non-matching dtypes
1621-
klass = libhashtable.Int64Factorizer
1622-
lk = ensure_int64(com.values_from_object(lk))
1623-
rk = ensure_int64(com.values_from_object(rk))
1623+
lk, _ = lk._values_for_factorize()
1624+
rk, _ = rk._values_for_factorize()
16241625
else:
16251626
klass = libhashtable.Factorizer
16261627
lk = ensure_object(lk)

pandas/tests/extension/base/reshaping.py

+8
Original file line numberDiff line numberDiff line change
@@ -237,3 +237,11 @@ def test_unstack(self, data, index, obj):
237237
result = result.astype(object)
238238

239239
self.assert_frame_equal(result, expected)
240+
241+
def test_merge_on_int_array(self, df_merge_on_int_array):
242+
# GH 23020
243+
result = pd.merge(df_merge_on_int_array, df_merge_on_int_array, on='A')
244+
expected = pd.DataFrame({'A': pd.Series([1, 2, np.nan], dtype='Int64'),
245+
'B_x': 1,
246+
'B_y': 1})
247+
self.assert_frame_equal(result, expected, check_dtype=True)

pandas/tests/extension/conftest.py

+8
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import pandas as pd
2+
import numpy as np
13
import operator
24

35
import pytest
@@ -104,3 +106,9 @@ def data_for_grouping():
104106
def box_in_series(request):
105107
"""Whether to box the data in a Series"""
106108
return request.param
109+
110+
111+
@pytest.fixture
112+
def df_merge_on_int_array():
113+
return pd.DataFrame({'A': pd.Series([1, 2, np.nan], dtype='Int64'),
114+
'B': 1})

pandas/tests/reshape/merge/test_merge.py

-10
Original file line numberDiff line numberDiff line change
@@ -1326,16 +1326,6 @@ def test_merging_with_bool_or_int_cateorical_column(self, category_column,
13261326
CDT(categories, ordered=ordered))
13271327
assert_frame_equal(expected, result)
13281328

1329-
def test_merge_on_int_array(self):
1330-
# GH 23020
1331-
df = pd.DataFrame({'A': pd.Series([1, 2, np.nan], dtype='Int64'),
1332-
'B': 1})
1333-
result = pd.merge(df, df, on='A')
1334-
expected = pd.DataFrame({'A': pd.Series([1, 2, np.nan], dtype='Int64'),
1335-
'B_x': 1,
1336-
'B_y': 1})
1337-
assert_frame_equal(result, expected, check_dtype=True)
1338-
13391329

13401330
@pytest.fixture
13411331
def left_df():

0 commit comments

Comments
 (0)