Skip to content

Commit b28f352

Browse files
committed
Fallback for older pyarrow
1 parent c20e800 commit b28f352

File tree

6 files changed

+25
-42
lines changed

6 files changed

+25
-42
lines changed

pandas/core/arrays/_arrow_string_mixins.py

+20
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
from pandas.compat import (
1313
pa_version_under10p1,
14+
pa_version_under11p0,
1415
pa_version_under13p0,
1516
pa_version_under17p0,
1617
)
@@ -103,6 +104,25 @@ def _str_get(self, i: int) -> Self:
103104
result = pc.if_else(not_out_of_bounds, selected, null_value)
104105
return type(self)(result)
105106

107+
def _str_slice(
108+
self, start: int | None = None, stop: int | None = None, step: int | None = None
109+
) -> Self:
110+
if pa_version_under11p0:
111+
# GH#59724
112+
res_list = self._apply_elementwise(lambda val: val[start:stop:step])
113+
return self._convert_int_result(pa.chunked_array(res_list))
114+
if start is None:
115+
if step is not None and step < 0:
116+
# GH#59710
117+
start = -1
118+
else:
119+
start = 0
120+
if step is None:
121+
step = 1
122+
return type(self)(
123+
pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
124+
)
125+
106126
def _str_slice_replace(
107127
self, start: int | None = None, stop: int | None = None, repl: str | None = None
108128
) -> Self:

pandas/core/arrays/arrow/array.py

-15
Original file line numberDiff line numberDiff line change
@@ -2393,21 +2393,6 @@ def _str_rpartition(self, sep: str, expand: bool) -> Self:
23932393
result = self._apply_elementwise(predicate)
23942394
return type(self)(pa.chunked_array(result))
23952395

2396-
def _str_slice(
2397-
self, start: int | None = None, stop: int | None = None, step: int | None = None
2398-
) -> Self:
2399-
if start is None:
2400-
if step is not None and step < 0:
2401-
# GH#59710
2402-
start = -1
2403-
else:
2404-
start = 0
2405-
if step is None:
2406-
step = 1
2407-
return type(self)(
2408-
pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
2409-
)
2410-
24112396
def _str_len(self) -> Self:
24122397
return type(self)(pc.utf8_length(self._pa_array))
24132398

pandas/core/arrays/string_arrow.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ def astype(self, dtype, copy: bool = True):
293293
_str_startswith = ArrowStringArrayMixin._str_startswith
294294
_str_endswith = ArrowStringArrayMixin._str_endswith
295295
_str_pad = ArrowStringArrayMixin._str_pad
296-
_str_slice = ArrowExtensionArray._str_slice
296+
_str_slice = ArrowStringArrayMixin._str_slice
297297

298298
def _str_contains(
299299
self, pat, case: bool = True, flags: int = 0, na=np.nan, regex: bool = True

pandas/tests/extension/test_arrow.py

+1-7
Original file line numberDiff line numberDiff line change
@@ -2036,13 +2036,7 @@ def test_str_join_string_type():
20362036
[None, 2, None, ["ab", None]],
20372037
[None, 2, 1, ["ab", None]],
20382038
[1, 3, 1, ["bc", None]],
2039-
pytest.param(
2040-
None,
2041-
None,
2042-
-1,
2043-
["dcba", None],
2044-
marks=pytest.mark.xfail(pa_version_under11p0, reason="Empty result"),
2045-
),
2039+
[None, None, -1, ["dcba", None]],
20462040
],
20472041
)
20482042
def test_str_slice(start, stop, step, exp):

pandas/tests/strings/test_string_array.py

+1-6
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import pytest
33

44
from pandas._libs import lib
5-
from pandas.compat import pa_version_under11p0
65

76
from pandas import (
87
NA,
@@ -14,7 +13,7 @@
1413

1514

1615
@pytest.mark.filterwarnings("ignore:Falling back")
17-
def test_string_array(nullable_string_dtype, any_string_method, request):
16+
def test_string_array(nullable_string_dtype, any_string_method):
1817
method_name, args, kwargs = any_string_method
1918

2019
data = ["a", "bb", np.nan, "ccc"]
@@ -26,10 +25,6 @@ def test_string_array(nullable_string_dtype, any_string_method, request):
2625
getattr(b.str, method_name)(*args, **kwargs)
2726
return
2827

29-
if b.dtype.storage == "pyarrow" and pa_version_under11p0 and method_name == "slice":
30-
mark = pytest.mark.xfail(reason="Negative buffer resize")
31-
request.applymarker(mark)
32-
3328
expected = getattr(a.str, method_name)(*args, **kwargs)
3429
result = getattr(b.str, method_name)(*args, **kwargs)
3530

pandas/tests/strings/test_strings.py

+2-13
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
import numpy as np
77
import pytest
88

9-
from pandas.compat import pa_version_under11p0
10-
119
from pandas import (
1210
DataFrame,
1311
Index,
@@ -395,22 +393,13 @@ def test_pipe_failures(any_string_dtype):
395393
[
396394
(2, 5, None, ["foo", "bar", np.nan, "baz"]),
397395
(0, 3, -1, ["", "", np.nan, ""]),
398-
(
399-
None,
400-
None,
401-
-1,
402-
["owtoofaa", "owtrabaa", np.nan, "xuqzabaa"],
403-
),
396+
(None, None, -1, ["owtoofaa", "owtrabaa", np.nan, "xuqzabaa"]),
404397
(3, 10, 2, ["oto", "ato", np.nan, "aqx"]),
405398
(3, 0, -1, ["ofa", "aba", np.nan, "aba"]),
406399
],
407400
)
408-
def test_slice(start, stop, step, expected, any_string_dtype, request):
401+
def test_slice(start, stop, step, expected, any_string_dtype):
409402
ser = Series(["aafootwo", "aabartwo", np.nan, "aabazqux"], dtype=any_string_dtype)
410-
if any_string_dtype == "string[pyarrow]" and pa_version_under11p0:
411-
mark = pytest.mark.xfail(reason="Empty result")
412-
request.applymarker(mark)
413-
414403
result = ser.str.slice(start, stop, step)
415404
expected = Series(expected, dtype=any_string_dtype)
416405
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)