Skip to content

Commit fbd021d

Browse files
committed
BUG: Series.str.slice with negative step
1 parent 4f1052e commit fbd021d

File tree

4 files changed

+8
-14
lines changed

4 files changed

+8
-14
lines changed

doc/source/whatsnew/v2.3.0.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,9 @@ Conversion
103103
Strings
104104
^^^^^^^
105105
- Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`59628`)
106+
- Bug in ``ser.str.slice`` with negative ``step`` with :class:`ArrowDtype` giving incorrect results (:issue:`59710`)
106107
- Bug in the ``center`` method on :class:`Series` and :class:`Index` object ``str`` accessors with pyarrow-backed dtype not matching the python behavior in corner cases with an odd number of fill characters (:issue:`54792`)
107-
108+
-
108109

109110
Interval
110111
^^^^^^^^

pandas/core/arrays/arrow/array.py

+4
Original file line numberDiff line numberDiff line change
@@ -2434,6 +2434,10 @@ def _str_rpartition(self, sep: str, expand: bool) -> Self:
24342434
def _str_slice(
24352435
self, start: int | None = None, stop: int | None = None, step: int | None = None
24362436
) -> Self:
2437+
if step is not None and step < 0:
2438+
# GH#59710
2439+
result = self._apply_elementwise(lambda x: x[start:stop:step])
2440+
return type(self)(pa.chunked_array(result))
24372441
if start is None:
24382442
start = 0
24392443
if step is None:

pandas/core/arrays/string_arrow.py

+1-13
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,7 @@ def astype(self, dtype, copy: bool = True):
295295
_str_startswith = ArrowStringArrayMixin._str_startswith
296296
_str_endswith = ArrowStringArrayMixin._str_endswith
297297
_str_pad = ArrowStringArrayMixin._str_pad
298+
_str_slice = ArrowExtensionArray._str_slice
298299

299300
def _str_contains(
300301
self, pat, case: bool = True, flags: int = 0, na=np.nan, regex: bool = True
@@ -357,19 +358,6 @@ def _str_fullmatch(
357358
pat = f"{pat}$"
358359
return self._str_match(pat, case, flags, na)
359360

360-
def _str_slice(
361-
self, start: int | None = None, stop: int | None = None, step: int | None = None
362-
) -> Self:
363-
if stop is None:
364-
return super()._str_slice(start, stop, step)
365-
if start is None:
366-
start = 0
367-
if step is None:
368-
step = 1
369-
return type(self)(
370-
pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
371-
)
372-
373361
def _str_len(self):
374362
result = pc.utf8_length(self._pa_array)
375363
return self._convert_int_result(result)

pandas/tests/extension/test_arrow.py

+1
Original file line numberDiff line numberDiff line change
@@ -2045,6 +2045,7 @@ def test_str_join_string_type():
20452045
[None, 2, None, ["ab", None]],
20462046
[None, 2, 1, ["ab", None]],
20472047
[1, 3, 1, ["bc", None]],
2048+
[None, None, -1, ["dcba", None]],
20482049
],
20492050
)
20502051
def test_str_slice(start, stop, step, exp):

0 commit comments

Comments
 (0)