Skip to content

Commit 2b6af64

Browse files
ChronoJonmroeschke
andauthored
BUG fix for str.startswith and str.endswith with tuple arg for "string[pyarrow]" dtype (GH#54942) (#54943)
* TST: added failing tuple tests for `str.startswith` and `str.startswith` (#54942) * FIX: added fix for `str.startswith` and `str.endswith` with tuple arg for `"string[pyarrow]"` dtype (GH#54942) * DOCS: added entry to whatsnew * DOCS: moved contrib note to `Strings` section and fixed typos * PERF: exchanged boolean array creation to method with less overhead * CLN: removed unnecessary type checks * DOCS: removed extra space before GH reference --------- Co-authored-by: Matthew Roeschke <[email protected]>
1 parent f64c608 commit 2b6af64

File tree

3 files changed

+39
-5
lines changed

3 files changed

+39
-5
lines changed

doc/source/whatsnew/v2.2.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ Conversion
332332
Strings
333333
^^^^^^^
334334
- Bug in :func:`pandas.api.types.is_string_dtype` while checking object array with no elements is of the string dtype (:issue:`54661`)
335-
-
335+
- Bug in :meth:`Series.str.startswith` and :meth:`Series.str.endswith` with arguments of type ``tuple[str, ...]`` for ``string[pyarrow]`` (:issue:`54942`)
336336
-
337337

338338
Interval

pandas/core/arrays/string_arrow.py

+30-4
Original file line numberDiff line numberDiff line change
@@ -336,14 +336,40 @@ def _str_contains(
336336
result[isna(result)] = bool(na)
337337
return result
338338

339-
def _str_startswith(self, pat: str, na=None):
340-
result = pc.starts_with(self._pa_array, pattern=pat)
339+
def _str_startswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
340+
if isinstance(pat, str):
341+
result = pc.starts_with(self._pa_array, pattern=pat)
342+
else:
343+
if len(pat) == 0:
344+
# mimic existing behaviour of string extension array
345+
# and python string method
346+
result = pa.array(
347+
np.zeros(len(self._pa_array), dtype=bool), mask=isna(self._pa_array)
348+
)
349+
else:
350+
result = pc.starts_with(self._pa_array, pattern=pat[0])
351+
352+
for p in pat[1:]:
353+
result = pc.or_(result, pc.starts_with(self._pa_array, pattern=p))
341354
if not isna(na):
342355
result = result.fill_null(na)
343356
return self._result_converter(result)
344357

345-
def _str_endswith(self, pat: str, na=None):
346-
result = pc.ends_with(self._pa_array, pattern=pat)
358+
def _str_endswith(self, pat: str | tuple[str, ...], na: Scalar | None = None):
359+
if isinstance(pat, str):
360+
result = pc.ends_with(self._pa_array, pattern=pat)
361+
else:
362+
if len(pat) == 0:
363+
# mimic existing behaviour of string extension array
364+
# and python string method
365+
result = pa.array(
366+
np.zeros(len(self._pa_array), dtype=bool), mask=isna(self._pa_array)
367+
)
368+
else:
369+
result = pc.ends_with(self._pa_array, pattern=pat[0])
370+
371+
for p in pat[1:]:
372+
result = pc.or_(result, pc.ends_with(self._pa_array, pattern=p))
347373
if not isna(na):
348374
result = result.fill_null(na)
349375
return self._result_converter(result)

pandas/tests/strings/conftest.py

+8
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@
1313
("decode", ("UTF-8",), {}),
1414
("encode", ("UTF-8",), {}),
1515
("endswith", ("a",), {}),
16+
("endswith", ((),), {}),
17+
("endswith", (("a",),), {}),
18+
("endswith", (("a", "b"),), {}),
19+
("endswith", (("a", "MISSING"),), {}),
1620
("endswith", ("a",), {"na": True}),
1721
("endswith", ("a",), {"na": False}),
1822
("extract", ("([a-z]*)",), {"expand": False}),
@@ -44,6 +48,10 @@
4448
("split", (" ",), {"expand": False}),
4549
("split", (" ",), {"expand": True}),
4650
("startswith", ("a",), {}),
51+
("startswith", (("a",),), {}),
52+
("startswith", (("a", "b"),), {}),
53+
("startswith", (("a", "MISSING"),), {}),
54+
("startswith", ((),), {}),
4755
("startswith", ("a",), {"na": True}),
4856
("startswith", ("a",), {"na": False}),
4957
("removeprefix", ("a",), {}),

0 commit comments

Comments
 (0)