From 00e59bffd2f3237541391d06604a5f2972aff919 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 5 Sep 2024 14:15:45 -0700 Subject: [PATCH 01/15] BUG: Series.str.slice with negative step --- doc/source/whatsnew/v2.3.0.rst | 3 ++- pandas/core/arrays/arrow/array.py | 4 ++++ pandas/core/arrays/string_arrow.py | 14 +------------- pandas/tests/extension/test_arrow.py | 1 + 4 files changed, 8 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst index 03355f655eb28..da55e827246f1 100644 --- a/doc/source/whatsnew/v2.3.0.rst +++ b/doc/source/whatsnew/v2.3.0.rst @@ -103,8 +103,9 @@ Conversion Strings ^^^^^^^ - Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`59628`) +- Bug in ``ser.str.slice`` with negative ``step`` with :class:`ArrowDtype` giving incorrect results (:issue:`59710`) - Bug in the ``center`` method on :class:`Series` and :class:`Index` object ``str`` accessors with pyarrow-backed dtype not matching the python behavior in corner cases with an odd number of fill characters (:issue:`54792`) - +- Interval ^^^^^^^^ diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 15f9ba611a642..36be35596ff8d 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -2396,6 +2396,10 @@ def _str_rpartition(self, sep: str, expand: bool) -> Self: def _str_slice( self, start: int | None = None, stop: int | None = None, step: int | None = None ) -> Self: + if step is not None and step < 0: + # GH#59710 + result = self._apply_elementwise(lambda x: x[start:stop:step]) + return type(self)(pa.chunked_array(result)) if start is None: start = 0 if step is None: diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 1e5adf106752f..2e0c9b081f8df 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -293,6 +293,7 @@ def astype(self, dtype, copy: bool = True): _str_startswith = ArrowStringArrayMixin._str_startswith _str_endswith = ArrowStringArrayMixin._str_endswith _str_pad = ArrowStringArrayMixin._str_pad + _str_slice = ArrowExtensionArray._str_slice def _str_contains( self, pat, case: bool = True, flags: int = 0, na=np.nan, regex: bool = True @@ -351,19 +352,6 @@ def _str_fullmatch( pat = f"{pat}$" return self._str_match(pat, case, flags, na) - def _str_slice( - self, start: int | None = None, stop: int | None = None, step: int | None = None - ) -> Self: - if stop is None: - return super()._str_slice(start, stop, step) - if start is None: - start = 0 - if step is None: - step = 1 - return type(self)( - pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step) - ) - def _str_len(self): result = pc.utf8_length(self._pa_array) return self._convert_int_result(result) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index fc4f14882b9d7..97283adf98e9d 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -2036,6 +2036,7 @@ def test_str_join_string_type(): [None, 2, None, ["ab", None]], [None, 2, 1, ["ab", None]], [1, 3, 1, ["bc", None]], + [None, None, -1, ["dcba", None]], ], ) def test_str_slice(start, stop, step, exp): From 7c055a57f151d4d6a6fead7bb613a9fd6ba6712c Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 5 Sep 2024 18:10:32 -0700 Subject: [PATCH 02/15] xfail on min version --- pandas/tests/strings/test_string_array.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pandas/tests/strings/test_string_array.py b/pandas/tests/strings/test_string_array.py index 0b3f368afea5e..cd32c1bcb021b 100644 --- a/pandas/tests/strings/test_string_array.py +++ b/pandas/tests/strings/test_string_array.py @@ -2,6 +2,7 @@ import pytest from pandas._libs import lib +from pandas.compat import pa_version_under10p1 from pandas import ( NA, @@ -13,7 +14,7 @@ @pytest.mark.filterwarnings("ignore:Falling back") -def test_string_array(nullable_string_dtype, any_string_method): +def test_string_array(nullable_string_dtype, any_string_method, request): method_name, args, kwargs = any_string_method data = ["a", "bb", np.nan, "ccc"] @@ -25,6 +26,14 @@ def test_string_array(nullable_string_dtype, any_string_method): getattr(b.str, method_name)(*args, **kwargs) return + if ( + method_name == "slice" + and nullable_string_dtype == "string[pyarrow]" + and pa_version_under10p1 + ): + mark = pytest.mark.xfail(reason="Raises with Negative buffer resize: -16") + request.applymarker(mark) + expected = getattr(a.str, method_name)(*args, **kwargs) result = getattr(b.str, method_name)(*args, **kwargs) From 2bc134771b7dcccac838f74293b423310fbd44b7 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 6 Sep 2024 09:02:02 -0700 Subject: [PATCH 03/15] bump version --- pandas/tests/strings/test_string_array.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/strings/test_string_array.py b/pandas/tests/strings/test_string_array.py index cd32c1bcb021b..2d8dbd413b3aa 100644 --- a/pandas/tests/strings/test_string_array.py +++ b/pandas/tests/strings/test_string_array.py @@ -2,7 +2,7 @@ import pytest from pandas._libs import lib -from pandas.compat import pa_version_under10p1 +from pandas.compat import pa_version_under11p0 from pandas import ( NA, @@ -29,7 +29,7 @@ def test_string_array(nullable_string_dtype, any_string_method, request): if ( method_name == "slice" and nullable_string_dtype == "string[pyarrow]" - and pa_version_under10p1 + and pa_version_under11p0 ): mark = pytest.mark.xfail(reason="Raises with Negative buffer resize: -16") request.applymarker(mark) From 5fded806721d682c85661e26805a5f2a47789f8a Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 6 Sep 2024 09:05:27 -0700 Subject: [PATCH 04/15] set start=-1 --- pandas/core/arrays/arrow/array.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 36be35596ff8d..526342e084f70 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -2396,12 +2396,12 @@ def _str_rpartition(self, sep: str, expand: bool) -> Self: def _str_slice( self, start: int | None = None, stop: int | None = None, step: int | None = None ) -> Self: - if step is not None and step < 0: - # GH#59710 - result = self._apply_elementwise(lambda x: x[start:stop:step]) - return type(self)(pa.chunked_array(result)) if start is None: - start = 0 + if step is not None and step < 0: + # GH#59710 + start = -1 + else: + start = 0 if step is None: step = 1 return type(self)( From 88208466ce8bc57ea26d30bbc776033ec83bb946 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 6 Sep 2024 13:52:38 -0700 Subject: [PATCH 05/15] xfail on min pyarrow --- pandas/tests/extension/test_arrow.py | 5 ++++- pandas/tests/strings/test_string_array.py | 11 +---------- pandas/tests/strings/test_strings.py | 7 ++++++- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 97283adf98e9d..d86460941287c 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -2036,7 +2036,10 @@ def test_str_join_string_type(): [None, 2, None, ["ab", None]], [None, 2, 1, ["ab", None]], [1, 3, 1, ["bc", None]], - [None, None, -1, ["dcba", None]], + pytest.param( + [None, None, -1, ["dcba", None]], + marks=pytest.mark.xfail(pa_version_under11p0, reason="Empty result"), + ), ], ) def test_str_slice(start, stop, step, exp): diff --git a/pandas/tests/strings/test_string_array.py b/pandas/tests/strings/test_string_array.py index 2d8dbd413b3aa..0b3f368afea5e 100644 --- a/pandas/tests/strings/test_string_array.py +++ b/pandas/tests/strings/test_string_array.py @@ -2,7 +2,6 @@ import pytest from pandas._libs import lib -from pandas.compat import pa_version_under11p0 from pandas import ( NA, @@ -14,7 +13,7 @@ @pytest.mark.filterwarnings("ignore:Falling back") -def test_string_array(nullable_string_dtype, any_string_method, request): +def test_string_array(nullable_string_dtype, any_string_method): method_name, args, kwargs = any_string_method data = ["a", "bb", np.nan, "ccc"] @@ -26,14 +25,6 @@ def test_string_array(nullable_string_dtype, any_string_method, request): getattr(b.str, method_name)(*args, **kwargs) return - if ( - method_name == "slice" - and nullable_string_dtype == "string[pyarrow]" - and pa_version_under11p0 - ): - mark = pytest.mark.xfail(reason="Raises with Negative buffer resize: -16") - request.applymarker(mark) - expected = getattr(a.str, method_name)(*args, **kwargs) result = getattr(b.str, method_name)(*args, **kwargs) diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index 1ce46497c3c22..872e913bcc4c1 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -6,6 +6,8 @@ import numpy as np import pytest +from pandas.compat import pa_version_under11p0 + from pandas import ( DataFrame, Index, @@ -393,7 +395,10 @@ def test_pipe_failures(any_string_dtype): [ (2, 5, None, ["foo", "bar", np.nan, "baz"]), (0, 3, -1, ["", "", np.nan, ""]), - (None, None, -1, ["owtoofaa", "owtrabaa", np.nan, "xuqzabaa"]), + pytest.param( + (None, None, -1, ["owtoofaa", "owtrabaa", np.nan, "xuqzabaa"]), + marks=pytest.mark.xfail(pa_version_under11p0, reason="Empty result"), + ), (3, 10, 2, ["oto", "ato", np.nan, "aqx"]), (3, 0, -1, ["ofa", "aba", np.nan, "aba"]), ], From bbf49648f157ebf6a60edde211ae4c0866c3c4d6 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 6 Sep 2024 15:53:00 -0700 Subject: [PATCH 06/15] fix pytest.param usage --- pandas/tests/extension/test_arrow.py | 5 ++++- pandas/tests/strings/test_strings.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index d86460941287c..47a671c481da5 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -2037,7 +2037,10 @@ def test_str_join_string_type(): [None, 2, 1, ["ab", None]], [1, 3, 1, ["bc", None]], pytest.param( - [None, None, -1, ["dcba", None]], + None, + None, + -1, + ["dcba", None], marks=pytest.mark.xfail(pa_version_under11p0, reason="Empty result"), ), ], diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index 872e913bcc4c1..24d6faa3693a2 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -396,7 +396,10 @@ def test_pipe_failures(any_string_dtype): (2, 5, None, ["foo", "bar", np.nan, "baz"]), (0, 3, -1, ["", "", np.nan, ""]), pytest.param( - (None, None, -1, ["owtoofaa", "owtrabaa", np.nan, "xuqzabaa"]), + None, + None, + -1, + ["owtoofaa", "owtrabaa", np.nan, "xuqzabaa"], marks=pytest.mark.xfail(pa_version_under11p0, reason="Empty result"), ), (3, 10, 2, ["oto", "ato", np.nan, "aqx"]), From c20e80080eacd7c3599300b536b5a3deec22c63f Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 9 Sep 2024 07:43:18 -0700 Subject: [PATCH 07/15] Fix tests --- pandas/tests/strings/test_string_array.py | 7 ++++++- pandas/tests/strings/test_strings.py | 9 ++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/pandas/tests/strings/test_string_array.py b/pandas/tests/strings/test_string_array.py index 0b3f368afea5e..6e8431016d868 100644 --- a/pandas/tests/strings/test_string_array.py +++ b/pandas/tests/strings/test_string_array.py @@ -2,6 +2,7 @@ import pytest from pandas._libs import lib +from pandas.compat import pa_version_under11p0 from pandas import ( NA, @@ -13,7 +14,7 @@ @pytest.mark.filterwarnings("ignore:Falling back") -def test_string_array(nullable_string_dtype, any_string_method): +def test_string_array(nullable_string_dtype, any_string_method, request): method_name, args, kwargs = any_string_method data = ["a", "bb", np.nan, "ccc"] @@ -25,6 +26,10 @@ def test_string_array(nullable_string_dtype, any_string_method): getattr(b.str, method_name)(*args, **kwargs) return + if b.dtype.storage == "pyarrow" and pa_version_under11p0 and method_name == "slice": + mark = pytest.mark.xfail(reason="Negative buffer resize") + request.applymarker(mark) + expected = getattr(a.str, method_name)(*args, **kwargs) result = getattr(b.str, method_name)(*args, **kwargs) diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index 24d6faa3693a2..c9195733d9dcf 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -395,19 +395,22 @@ def test_pipe_failures(any_string_dtype): [ (2, 5, None, ["foo", "bar", np.nan, "baz"]), (0, 3, -1, ["", "", np.nan, ""]), - pytest.param( + ( None, None, -1, ["owtoofaa", "owtrabaa", np.nan, "xuqzabaa"], - marks=pytest.mark.xfail(pa_version_under11p0, reason="Empty result"), ), (3, 10, 2, ["oto", "ato", np.nan, "aqx"]), (3, 0, -1, ["ofa", "aba", np.nan, "aba"]), ], ) -def test_slice(start, stop, step, expected, any_string_dtype): +def test_slice(start, stop, step, expected, any_string_dtype, request): ser = Series(["aafootwo", "aabartwo", np.nan, "aabazqux"], dtype=any_string_dtype) + if any_string_dtype == "string[pyarrow]" and pa_version_under11p0: + mark = pytest.mark.xfail(reason="Empty result") + request.applymarker(mark) + result = ser.str.slice(start, stop, step) expected = Series(expected, dtype=any_string_dtype) tm.assert_series_equal(result, expected) From b6bcbfc37ac8b180647841715e9eefc0f683eb0b Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 9 Sep 2024 12:48:22 -0700 Subject: [PATCH 08/15] Re-restore elementwise fallback --- pandas/core/arrays/arrow/array.py | 4 ++++ pandas/tests/extension/test_arrow.py | 3 +-- pandas/tests/strings/test_string_array.py | 5 ----- pandas/tests/strings/test_strings.py | 6 ------ 4 files changed, 5 insertions(+), 13 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 526342e084f70..05a17e1dff594 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -2396,6 +2396,10 @@ def _str_rpartition(self, sep: str, expand: bool) -> Self: def _str_slice( self, start: int | None = None, stop: int | None = None, step: int | None = None ) -> Self: + if pa_version_under11p0: + # GH#59724 + result = self._apply_elementwise(lambda val: val[start:stop:step]) + return type(self)(pa.chunked_array(result)) if start is None: if step is not None and step < 0: # GH#59710 diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 47a671c481da5..5c15a9f9e6346 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -2036,12 +2036,11 @@ def test_str_join_string_type(): [None, 2, None, ["ab", None]], [None, 2, 1, ["ab", None]], [1, 3, 1, ["bc", None]], - pytest.param( + ( None, None, -1, ["dcba", None], - marks=pytest.mark.xfail(pa_version_under11p0, reason="Empty result"), ), ], ) diff --git a/pandas/tests/strings/test_string_array.py b/pandas/tests/strings/test_string_array.py index 6e8431016d868..352063318ff65 100644 --- a/pandas/tests/strings/test_string_array.py +++ b/pandas/tests/strings/test_string_array.py @@ -2,7 +2,6 @@ import pytest from pandas._libs import lib -from pandas.compat import pa_version_under11p0 from pandas import ( NA, @@ -26,10 +25,6 @@ def test_string_array(nullable_string_dtype, any_string_method, request): getattr(b.str, method_name)(*args, **kwargs) return - if b.dtype.storage == "pyarrow" and pa_version_under11p0 and method_name == "slice": - mark = pytest.mark.xfail(reason="Negative buffer resize") - request.applymarker(mark) - expected = getattr(a.str, method_name)(*args, **kwargs) result = getattr(b.str, method_name)(*args, **kwargs) diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index c9195733d9dcf..ed80816054d43 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -6,8 +6,6 @@ import numpy as np import pytest -from pandas.compat import pa_version_under11p0 - from pandas import ( DataFrame, Index, @@ -407,10 +405,6 @@ def test_pipe_failures(any_string_dtype): ) def test_slice(start, stop, step, expected, any_string_dtype, request): ser = Series(["aafootwo", "aabartwo", np.nan, "aabazqux"], dtype=any_string_dtype) - if any_string_dtype == "string[pyarrow]" and pa_version_under11p0: - mark = pytest.mark.xfail(reason="Empty result") - request.applymarker(mark) - result = ser.str.slice(start, stop, step) expected = Series(expected, dtype=any_string_dtype) tm.assert_series_equal(result, expected) From a0bb4400b5dd6219e7f5fc1d930aca798dcad71f Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 9 Sep 2024 13:27:07 -0700 Subject: [PATCH 09/15] Specify type for pa.chunked_array --- pandas/core/arrays/_arrow_string_mixins.py | 20 ++++++++++++++++++++ pandas/core/arrays/arrow/array.py | 19 ------------------- pandas/core/arrays/string_arrow.py | 2 +- pandas/tests/extension/test_arrow.py | 7 +------ pandas/tests/strings/test_string_array.py | 2 +- pandas/tests/strings/test_strings.py | 2 +- 6 files changed, 24 insertions(+), 28 deletions(-) diff --git a/pandas/core/arrays/_arrow_string_mixins.py b/pandas/core/arrays/_arrow_string_mixins.py index 950d4cd7cc92e..ad0efb7f64fb2 100644 --- a/pandas/core/arrays/_arrow_string_mixins.py +++ b/pandas/core/arrays/_arrow_string_mixins.py @@ -11,6 +11,7 @@ from pandas.compat import ( pa_version_under10p1, + pa_version_under11p0, pa_version_under13p0, pa_version_under17p0, ) @@ -103,6 +104,25 @@ def _str_get(self, i: int) -> Self: result = pc.if_else(not_out_of_bounds, selected, null_value) return type(self)(result) + def _str_slice( + self, start: int | None = None, stop: int | None = None, step: int | None = None + ) -> Self: + if pa_version_under11p0: + # GH#59724 + result = self._apply_elementwise(lambda val: val[start:stop:step]) + return type(self)(pa.chunked_array(result, type=self._pa_array.type)) + if start is None: + if step is not None and step < 0: + # GH#59710 + start = -1 + else: + start = 0 + if step is None: + step = 1 + return type(self)( + pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step) + ) + def _str_slice_replace( self, start: int | None = None, stop: int | None = None, repl: str | None = None ) -> Self: diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 05a17e1dff594..8353ad53bb13c 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -2393,25 +2393,6 @@ def _str_rpartition(self, sep: str, expand: bool) -> Self: result = self._apply_elementwise(predicate) return type(self)(pa.chunked_array(result)) - def _str_slice( - self, start: int | None = None, stop: int | None = None, step: int | None = None - ) -> Self: - if pa_version_under11p0: - # GH#59724 - result = self._apply_elementwise(lambda val: val[start:stop:step]) - return type(self)(pa.chunked_array(result)) - if start is None: - if step is not None and step < 0: - # GH#59710 - start = -1 - else: - start = 0 - if step is None: - step = 1 - return type(self)( - pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step) - ) - def _str_len(self) -> Self: return type(self)(pc.utf8_length(self._pa_array)) diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 2e0c9b081f8df..5be293d73d446 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -293,7 +293,7 @@ def astype(self, dtype, copy: bool = True): _str_startswith = ArrowStringArrayMixin._str_startswith _str_endswith = ArrowStringArrayMixin._str_endswith _str_pad = ArrowStringArrayMixin._str_pad - _str_slice = ArrowExtensionArray._str_slice + _str_slice = ArrowStringArrayMixin._str_slice def _str_contains( self, pat, case: bool = True, flags: int = 0, na=np.nan, regex: bool = True diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 5c15a9f9e6346..f86d927ddda67 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -2036,12 +2036,7 @@ def test_str_join_string_type(): [None, 2, None, ["ab", None]], [None, 2, 1, ["ab", None]], [1, 3, 1, ["bc", None]], - ( - None, - None, - -1, - ["dcba", None], - ), + (None, None, -1, ["dcba", None]), ], ) def test_str_slice(start, stop, step, exp): diff --git a/pandas/tests/strings/test_string_array.py b/pandas/tests/strings/test_string_array.py index 352063318ff65..0b3f368afea5e 100644 --- a/pandas/tests/strings/test_string_array.py +++ b/pandas/tests/strings/test_string_array.py @@ -13,7 +13,7 @@ @pytest.mark.filterwarnings("ignore:Falling back") -def test_string_array(nullable_string_dtype, any_string_method, request): +def test_string_array(nullable_string_dtype, any_string_method): method_name, args, kwargs = any_string_method data = ["a", "bb", np.nan, "ccc"] diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index ed80816054d43..c101dd1e1d24b 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -403,7 +403,7 @@ def test_pipe_failures(any_string_dtype): (3, 0, -1, ["ofa", "aba", np.nan, "aba"]), ], ) -def test_slice(start, stop, step, expected, any_string_dtype, request): +def test_slice(start, stop, step, expected, any_string_dtype): ser = Series(["aafootwo", "aabartwo", np.nan, "aabazqux"], dtype=any_string_dtype) result = ser.str.slice(start, stop, step) expected = Series(expected, dtype=any_string_dtype) From df55897212b815042537ae1c8822309d476b4532 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 9 Sep 2024 14:18:14 -0700 Subject: [PATCH 10/15] Update pandas/tests/strings/test_strings.py Co-authored-by: Joris Van den Bossche --- pandas/tests/strings/test_strings.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index c101dd1e1d24b..7fd6685addd5e 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -393,12 +393,7 @@ def test_pipe_failures(any_string_dtype): [ (2, 5, None, ["foo", "bar", np.nan, "baz"]), (0, 3, -1, ["", "", np.nan, ""]), - ( - None, - None, - -1, - ["owtoofaa", "owtrabaa", np.nan, "xuqzabaa"], - ), + (None, None, -1, ["owtoofaa", "owtrabaa", np.nan, "xuqzabaa"]), (3, 10, 2, ["oto", "ato", np.nan, "aqx"]), (3, 0, -1, ["ofa", "aba", np.nan, "aba"]), ], From a493648fcb9821a89d689aeb1229e2cbf0419dd0 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 9 Sep 2024 16:13:56 -0700 Subject: [PATCH 11/15] mypy fixup --- pandas/core/arrays/_arrow_string_mixins.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/core/arrays/_arrow_string_mixins.py b/pandas/core/arrays/_arrow_string_mixins.py index ad0efb7f64fb2..1525f10c3418c 100644 --- a/pandas/core/arrays/_arrow_string_mixins.py +++ b/pandas/core/arrays/_arrow_string_mixins.py @@ -23,10 +23,7 @@ import pyarrow.compute as pc if TYPE_CHECKING: - from collections.abc import ( - Callable, - Sized, - ) + from collections.abc import Callable from pandas._typing import ( Scalar, @@ -35,7 +32,7 @@ class ArrowStringArrayMixin: - _pa_array: Sized + _pa_array: pa.ChunkedArray def __init__(self, *args, **kwargs) -> None: raise NotImplementedError From 60e547a1988030d0a4f66f3fa7429f9b1de5003f Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 9 Sep 2024 17:58:33 -0700 Subject: [PATCH 12/15] mypy fixup --- pandas/core/arrays/_arrow_string_mixins.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/core/arrays/_arrow_string_mixins.py b/pandas/core/arrays/_arrow_string_mixins.py index 1525f10c3418c..32fa5e7c383b5 100644 --- a/pandas/core/arrays/_arrow_string_mixins.py +++ b/pandas/core/arrays/_arrow_string_mixins.py @@ -94,10 +94,7 @@ def _str_get(self, i: int) -> Self: selected = pc.utf8_slice_codeunits( self._pa_array, start=start, stop=stop, step=step ) - null_value = pa.scalar( - None, - type=self._pa_array.type, # type: ignore[attr-defined] - ) + null_value = pa.scalar(None, type=self._pa_array.type) result = pc.if_else(not_out_of_bounds, selected, null_value) return type(self)(result) From ae23647f45e09676908547117da6d25e2849a824 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 9 Sep 2024 18:59:35 -0700 Subject: [PATCH 13/15] lint fixup --- pandas/tests/strings/test_strings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index 7fd6685addd5e..1ce46497c3c22 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -393,7 +393,7 @@ def test_pipe_failures(any_string_dtype): [ (2, 5, None, ["foo", "bar", np.nan, "baz"]), (0, 3, -1, ["", "", np.nan, ""]), - (None, None, -1, ["owtoofaa", "owtrabaa", np.nan, "xuqzabaa"]), + (None, None, -1, ["owtoofaa", "owtrabaa", np.nan, "xuqzabaa"]), (3, 10, 2, ["oto", "ato", np.nan, "aqx"]), (3, 0, -1, ["ofa", "aba", np.nan, "aba"]), ], From 9bc03e31298482775ecbff574ca7289cfaeccb8b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 10 Sep 2024 08:27:27 +0200 Subject: [PATCH 14/15] Update doc/source/whatsnew/v2.3.0.rst --- doc/source/whatsnew/v2.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst index da55e827246f1..03b3a6b55dff6 100644 --- a/doc/source/whatsnew/v2.3.0.rst +++ b/doc/source/whatsnew/v2.3.0.rst @@ -103,7 +103,7 @@ Conversion Strings ^^^^^^^ - Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`59628`) -- Bug in ``ser.str.slice`` with negative ``step`` with :class:`ArrowDtype` giving incorrect results (:issue:`59710`) +- Bug in ``ser.str.slice`` with negative ``step`` with :class:`ArrowDtype` and :class:`StringDtype` with ``storage="pyarrow"`` giving incorrect results (:issue:`59710`) - Bug in the ``center`` method on :class:`Series` and :class:`Index` object ``str`` accessors with pyarrow-backed dtype not matching the python behavior in corner cases with an odd number of fill characters (:issue:`54792`) - From 169b51af5ee2084ae212e74815c6ed340766e932 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 10 Sep 2024 08:31:36 +0200 Subject: [PATCH 15/15] one additional test case --- pandas/tests/strings/test_strings.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index 1ce46497c3c22..4995b448f7e94 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -394,6 +394,7 @@ def test_pipe_failures(any_string_dtype): (2, 5, None, ["foo", "bar", np.nan, "baz"]), (0, 3, -1, ["", "", np.nan, ""]), (None, None, -1, ["owtoofaa", "owtrabaa", np.nan, "xuqzabaa"]), + (None, 2, -1, ["owtoo", "owtra", np.nan, "xuqza"]), (3, 10, 2, ["oto", "ato", np.nan, "aqx"]), (3, 0, -1, ["ofa", "aba", np.nan, "aba"]), ],