Skip to content

Commit b736912

Browse files
Backport PR #54974 on branch 2.1.x (Include pyarrow_numpy string in efficient merge implementation) (#55021)
Backport PR #54974: Include pyarrow_numpy string in efficient merge implementation Co-authored-by: Patrick Hoefler <[email protected]>
1 parent eab54ee commit b736912

File tree

2 files changed

+6
-5
lines changed

2 files changed

+6
-5
lines changed

pandas/core/reshape/merge.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -2417,7 +2417,8 @@ def _factorize_keys(
24172417

24182418
elif isinstance(lk, ExtensionArray) and lk.dtype == rk.dtype:
24192419
if (isinstance(lk.dtype, ArrowDtype) and is_string_dtype(lk.dtype)) or (
2420-
isinstance(lk.dtype, StringDtype) and lk.dtype.storage == "pyarrow"
2420+
isinstance(lk.dtype, StringDtype)
2421+
and lk.dtype.storage in ["pyarrow", "pyarrow_numpy"]
24212422
):
24222423
import pyarrow as pa
24232424
import pyarrow.compute as pc

pandas/tests/reshape/merge/test_merge.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -2872,13 +2872,13 @@ def test_merge_ea_int_and_float_numpy():
28722872
tm.assert_frame_equal(result, expected.astype("float64"))
28732873

28742874

2875-
def test_merge_arrow_string_index():
2875+
def test_merge_arrow_string_index(any_string_dtype):
28762876
# GH#54894
28772877
pytest.importorskip("pyarrow")
2878-
left = DataFrame({"a": ["a", "b"]}, dtype="string[pyarrow]")
2879-
right = DataFrame({"b": 1}, index=Index(["a", "c"], dtype="string[pyarrow]"))
2878+
left = DataFrame({"a": ["a", "b"]}, dtype=any_string_dtype)
2879+
right = DataFrame({"b": 1}, index=Index(["a", "c"], dtype=any_string_dtype))
28802880
result = left.merge(right, left_on="a", right_index=True, how="left")
28812881
expected = DataFrame(
2882-
{"a": Series(["a", "b"], dtype="string[pyarrow]"), "b": [1, np.nan]}
2882+
{"a": Series(["a", "b"], dtype=any_string_dtype), "b": [1, np.nan]}
28832883
)
28842884
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)