Fallback for older pyarrow

jbrockmendel · jbrockmendel · commit b28f3527f099 · 2024-09-09T08:51:51.000-07:00
diff --git a/pandas/core/arrays/_arrow_string_mixins.py b/pandas/core/arrays/_arrow_string_mixins.py
@@ -11,6 +11,7 @@
 
 from pandas.compat import (
     pa_version_under10p1,
+    pa_version_under11p0,
     pa_version_under13p0,
     pa_version_under17p0,
 )
@@ -103,6 +104,25 @@ def _str_get(self, i: int) -> Self:
         result = pc.if_else(not_out_of_bounds, selected, null_value)
         return type(self)(result)
 
+    def _str_slice(
+        self, start: int | None = None, stop: int | None = None, step: int | None = None
+    ) -> Self:
+        if pa_version_under11p0:
+            # GH#59724
+            res_list = self._apply_elementwise(lambda val: val[start:stop:step])
+            return self._convert_int_result(pa.chunked_array(res_list))
+        if start is None:
+            if step is not None and step < 0:
+                # GH#59710
+                start = -1
+            else:
+                start = 0
+        if step is None:
+            step = 1
+        return type(self)(
+            pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
+        )
+
     def _str_slice_replace(
         self, start: int | None = None, stop: int | None = None, repl: str | None = None
     ) -> Self:
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
@@ -2393,21 +2393,6 @@ def _str_rpartition(self, sep: str, expand: bool) -> Self:
         result = self._apply_elementwise(predicate)
         return type(self)(pa.chunked_array(result))
 
-    def _str_slice(
-        self, start: int | None = None, stop: int | None = None, step: int | None = None
-    ) -> Self:
-        if start is None:
-            if step is not None and step < 0:
-                # GH#59710
-                start = -1
-            else:
-                start = 0
-        if step is None:
-            step = 1
-        return type(self)(
-            pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
-        )
-
     def _str_len(self) -> Self:
         return type(self)(pc.utf8_length(self._pa_array))
 
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
@@ -293,7 +293,7 @@ def astype(self, dtype, copy: bool = True):
     _str_startswith = ArrowStringArrayMixin._str_startswith
     _str_endswith = ArrowStringArrayMixin._str_endswith
     _str_pad = ArrowStringArrayMixin._str_pad
-    _str_slice = ArrowExtensionArray._str_slice
+    _str_slice = ArrowStringArrayMixin._str_slice
 
     def _str_contains(
         self, pat, case: bool = True, flags: int = 0, na=np.nan, regex: bool = True
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
@@ -2036,13 +2036,7 @@ def test_str_join_string_type():
         [None, 2, None, ["ab", None]],
         [None, 2, 1, ["ab", None]],
         [1, 3, 1, ["bc", None]],
-        pytest.param(
-            None,
-            None,
-            -1,
-            ["dcba", None],
-            marks=pytest.mark.xfail(pa_version_under11p0, reason="Empty result"),
-        ),
+        [None, None, -1, ["dcba", None]],
     ],
 )
 def test_str_slice(start, stop, step, exp):
diff --git a/pandas/tests/strings/test_string_array.py b/pandas/tests/strings/test_string_array.py
@@ -2,7 +2,6 @@
 import pytest
 
 from pandas._libs import lib
-from pandas.compat import pa_version_under11p0
 
 from pandas import (
     NA,
@@ -14,7 +13,7 @@
 
 
 @pytest.mark.filterwarnings("ignore:Falling back")
-def test_string_array(nullable_string_dtype, any_string_method, request):
+def test_string_array(nullable_string_dtype, any_string_method):
     method_name, args, kwargs = any_string_method
 
     data = ["a", "bb", np.nan, "ccc"]
@@ -26,10 +25,6 @@ def test_string_array(nullable_string_dtype, any_string_method, request):
             getattr(b.str, method_name)(*args, **kwargs)
         return
 
-    if b.dtype.storage == "pyarrow" and pa_version_under11p0 and method_name == "slice":
-        mark = pytest.mark.xfail(reason="Negative buffer resize")
-        request.applymarker(mark)
-
     expected = getattr(a.str, method_name)(*args, **kwargs)
     result = getattr(b.str, method_name)(*args, **kwargs)
 
diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py
@@ -6,8 +6,6 @@
 import numpy as np
 import pytest
 
-from pandas.compat import pa_version_under11p0
-
 from pandas import (
     DataFrame,
     Index,
@@ -395,22 +393,13 @@ def test_pipe_failures(any_string_dtype):
     [
         (2, 5, None, ["foo", "bar", np.nan, "baz"]),
         (0, 3, -1, ["", "", np.nan, ""]),
-        (
-            None,
-            None,
-            -1,
-            ["owtoofaa", "owtrabaa", np.nan, "xuqzabaa"],
-        ),
+        (None, None, -1, ["owtoofaa", "owtrabaa", np.nan, "xuqzabaa"]),
         (3, 10, 2, ["oto", "ato", np.nan, "aqx"]),
         (3, 0, -1, ["ofa", "aba", np.nan, "aba"]),
     ],
 )
-def test_slice(start, stop, step, expected, any_string_dtype, request):
+def test_slice(start, stop, step, expected, any_string_dtype):
     ser = Series(["aafootwo", "aabartwo", np.nan, "aabazqux"], dtype=any_string_dtype)
-    if any_string_dtype == "string[pyarrow]" and pa_version_under11p0:
-        mark = pytest.mark.xfail(reason="Empty result")
-        request.applymarker(mark)
-
     result = ser.str.slice(start, stop, step)
     expected = Series(expected, dtype=any_string_dtype)
     tm.assert_series_equal(result, expected)