Skip to content

Commit 2557e68

Browse files
phoflmroeschke
authored andcommitted
REGR: Merge raising when left merging on arrow string index (pandas-dev#54895)
1 parent bbede50 commit 2557e68

File tree

3 files changed

+19
-2
lines changed

3 files changed

+19
-2
lines changed

doc/source/whatsnew/v2.1.1.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ including other versions of pandas.
1313

1414
Fixed regressions
1515
~~~~~~~~~~~~~~~~~
16+
- Fixed regression in :func:`merge` when merging over a PyArrow string index (:issue:`54894`)
1617
- Fixed regression in :func:`read_csv` when ``usecols`` is given and ``dtypes`` is a dict for ``engine="python"`` (:issue:`54868`)
1718
- Fixed regression in :meth:`.GroupBy.get_group` raising for ``axis=1`` (:issue:`54858`)
1819
- Fixed regression in :meth:`DataFrame.__setitem__` raising ``AssertionError`` when setting a :class:`Series` with a partial :class:`MultiIndex` (:issue:`54875`)

pandas/core/reshape/merge.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2442,8 +2442,12 @@ def _factorize_keys(
24422442
length = len(dc.dictionary)
24432443

24442444
llab, rlab, count = (
2445-
pc.fill_null(dc.indices[slice(len_lk)], length).to_numpy(),
2446-
pc.fill_null(dc.indices[slice(len_lk, None)], length).to_numpy(),
2445+
pc.fill_null(dc.indices[slice(len_lk)], length)
2446+
.to_numpy()
2447+
.astype(np.intp, copy=False),
2448+
pc.fill_null(dc.indices[slice(len_lk, None)], length)
2449+
.to_numpy()
2450+
.astype(np.intp, copy=False),
24472451
len(dc.dictionary),
24482452
)
24492453
if how == "right":

pandas/tests/reshape/merge/test_merge.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2947,3 +2947,15 @@ def test_merge_ea_int_and_float_numpy():
29472947

29482948
result = df2.merge(df1)
29492949
tm.assert_frame_equal(result, expected.astype("float64"))
2950+
2951+
2952+
def test_merge_arrow_string_index():
2953+
# GH#54894
2954+
pytest.importorskip("pyarrow")
2955+
left = DataFrame({"a": ["a", "b"]}, dtype="string[pyarrow]")
2956+
right = DataFrame({"b": 1}, index=Index(["a", "c"], dtype="string[pyarrow]"))
2957+
result = left.merge(right, left_on="a", right_index=True, how="left")
2958+
expected = DataFrame(
2959+
{"a": Series(["a", "b"], dtype="string[pyarrow]"), "b": [1, np.nan]}
2960+
)
2961+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)