Skip to content

Commit 2b4d955

Browse files
String dtype: fix alignment sorting in case of python storage (pandas-dev#59448)
* String dtype: fix alignment sorting in case of python storage * add test
1 parent 2cf5002 commit 2b4d955

File tree

2 files changed

+17
-1
lines changed

2 files changed

+17
-1
lines changed

pandas/core/indexes/base.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -5072,7 +5072,10 @@ def _can_use_libjoin(self) -> bool:
50725072
return (
50735073
isinstance(self.dtype, np.dtype)
50745074
or isinstance(self._values, (ArrowExtensionArray, BaseMaskedArray))
5075-
or self.dtype == "string[python]"
5075+
or (
5076+
isinstance(self.dtype, StringDtype)
5077+
and self.dtype.storage == "python"
5078+
)
50765079
)
50775080
# Exclude index types where the conversion to numpy converts to object dtype,
50785081
# which negates the performance benefit of libjoin

pandas/tests/series/methods/test_align.py

+13
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,19 @@ def test_align_periodindex(join_type):
211211
ts.align(ts[::2], join=join_type)
212212

213213

214+
def test_align_stringindex(any_string_dtype):
215+
left = Series(range(3), index=pd.Index(["a", "b", "d"], dtype=any_string_dtype))
216+
right = Series(range(3), index=pd.Index(["a", "b", "c"], dtype=any_string_dtype))
217+
result_left, result_right = left.align(right)
218+
219+
expected_idx = pd.Index(["a", "b", "c", "d"], dtype=any_string_dtype)
220+
expected_left = Series([0, 1, np.nan, 2], index=expected_idx)
221+
expected_right = Series([0, 1, 2, np.nan], index=expected_idx)
222+
223+
tm.assert_series_equal(result_left, expected_left)
224+
tm.assert_series_equal(result_right, expected_right)
225+
226+
214227
def test_align_left_fewer_levels():
215228
# GH#45224
216229
left = Series([2], index=pd.MultiIndex.from_tuples([(1, 3)], names=["a", "c"]))

0 commit comments

Comments
 (0)