Skip to content

Commit aabf659

Browse files
Backport PR #48587 on branch 1.5.x (Fix series.str.startswith(tuple)) (#48593)
Backport PR #48587: Fix `series.str.startswith(tuple)` Co-authored-by: Janosh Riebesell <[email protected]>
1 parent dfc00bf commit aabf659

File tree

3 files changed

+40
-17
lines changed

3 files changed

+40
-17
lines changed

pandas/core/strings/accessor.py

+31-10
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from pandas._typing import (
1919
DtypeObj,
2020
F,
21+
Scalar,
2122
)
2223
from pandas.util._decorators import (
2324
Appender,
@@ -2287,16 +2288,19 @@ def count(self, pat, flags=0):
22872288
return self._wrap_result(result, returns_string=False)
22882289

22892290
@forbid_nonstring_types(["bytes"])
2290-
def startswith(self, pat, na=None):
2291+
def startswith(
2292+
self, pat: str | tuple[str, ...], na: Scalar | None = None
2293+
) -> Series | Index:
22912294
"""
22922295
Test if the start of each string element matches a pattern.
22932296
22942297
Equivalent to :meth:`str.startswith`.
22952298
22962299
Parameters
22972300
----------
2298-
pat : str
2299-
Character sequence. Regular expressions are not accepted.
2301+
pat : str or tuple[str, ...]
2302+
Character sequence or tuple of strings. Regular expressions are not
2303+
accepted.
23002304
na : object, default NaN
23012305
Object shown if element tested is not a string. The default depends
23022306
on dtype of the array. For object-dtype, ``numpy.nan`` is used.
@@ -2331,6 +2335,13 @@ def startswith(self, pat, na=None):
23312335
3 NaN
23322336
dtype: object
23332337
2338+
>>> s.str.startswith(('b', 'B'))
2339+
0 True
2340+
1 True
2341+
2 False
2342+
3 NaN
2343+
dtype: object
2344+
23342345
Specifying `na` to be `False` instead of `NaN`.
23352346
23362347
>>> s.str.startswith('b', na=False)
@@ -2340,23 +2351,26 @@ def startswith(self, pat, na=None):
23402351
3 False
23412352
dtype: bool
23422353
"""
2343-
if not isinstance(pat, str):
2344-
msg = f"expected a string object, not {type(pat).__name__}"
2354+
if not isinstance(pat, (str, tuple)):
2355+
msg = f"expected a string or tuple, not {type(pat).__name__}"
23452356
raise TypeError(msg)
23462357
result = self._data.array._str_startswith(pat, na=na)
23472358
return self._wrap_result(result, returns_string=False)
23482359

23492360
@forbid_nonstring_types(["bytes"])
2350-
def endswith(self, pat, na=None):
2361+
def endswith(
2362+
self, pat: str | tuple[str, ...], na: Scalar | None = None
2363+
) -> Series | Index:
23512364
"""
23522365
Test if the end of each string element matches a pattern.
23532366
23542367
Equivalent to :meth:`str.endswith`.
23552368
23562369
Parameters
23572370
----------
2358-
pat : str
2359-
Character sequence. Regular expressions are not accepted.
2371+
pat : str or tuple[str, ...]
2372+
Character sequence or tuple of strings. Regular expressions are not
2373+
accepted.
23602374
na : object, default NaN
23612375
Object shown if element tested is not a string. The default depends
23622376
on dtype of the array. For object-dtype, ``numpy.nan`` is used.
@@ -2391,6 +2405,13 @@ def endswith(self, pat, na=None):
23912405
3 NaN
23922406
dtype: object
23932407
2408+
>>> s.str.endswith(('t', 'T'))
2409+
0 True
2410+
1 False
2411+
2 True
2412+
3 NaN
2413+
dtype: object
2414+
23942415
Specifying `na` to be `False` instead of `NaN`.
23952416
23962417
>>> s.str.endswith('t', na=False)
@@ -2400,8 +2421,8 @@ def endswith(self, pat, na=None):
24002421
3 False
24012422
dtype: bool
24022423
"""
2403-
if not isinstance(pat, str):
2404-
msg = f"expected a string object, not {type(pat).__name__}"
2424+
if not isinstance(pat, (str, tuple)):
2425+
msg = f"expected a string or tuple, not {type(pat).__name__}"
24052426
raise TypeError(msg)
24062427
result = self._data.array._str_endswith(pat, na=na)
24072428
return self._wrap_result(result, returns_string=False)

pandas/tests/strings/test_find_replace.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -291,21 +291,22 @@ def test_contains_nan(any_string_dtype):
291291
# --------------------------------------------------------------------------------------
292292

293293

294+
@pytest.mark.parametrize("pat", ["foo", ("foo", "baz")])
294295
@pytest.mark.parametrize("dtype", [None, "category"])
295296
@pytest.mark.parametrize("null_value", [None, np.nan, pd.NA])
296297
@pytest.mark.parametrize("na", [True, False])
297-
def test_startswith(dtype, null_value, na):
298+
def test_startswith(pat, dtype, null_value, na):
298299
# add category dtype parametrizations for GH-36241
299300
values = Series(
300301
["om", null_value, "foo_nom", "nom", "bar_foo", null_value, "foo"],
301302
dtype=dtype,
302303
)
303304

304-
result = values.str.startswith("foo")
305+
result = values.str.startswith(pat)
305306
exp = Series([False, np.nan, True, False, False, np.nan, True])
306307
tm.assert_series_equal(result, exp)
307308

308-
result = values.str.startswith("foo", na=na)
309+
result = values.str.startswith(pat, na=na)
309310
exp = Series([False, na, True, False, False, na, True])
310311
tm.assert_series_equal(result, exp)
311312

@@ -351,21 +352,22 @@ def test_startswith_nullable_string_dtype(nullable_string_dtype, na):
351352
# --------------------------------------------------------------------------------------
352353

353354

355+
@pytest.mark.parametrize("pat", ["foo", ("foo", "baz")])
354356
@pytest.mark.parametrize("dtype", [None, "category"])
355357
@pytest.mark.parametrize("null_value", [None, np.nan, pd.NA])
356358
@pytest.mark.parametrize("na", [True, False])
357-
def test_endswith(dtype, null_value, na):
359+
def test_endswith(pat, dtype, null_value, na):
358360
# add category dtype parametrizations for GH-36241
359361
values = Series(
360362
["om", null_value, "foo_nom", "nom", "bar_foo", null_value, "foo"],
361363
dtype=dtype,
362364
)
363365

364-
result = values.str.endswith("foo")
366+
result = values.str.endswith(pat)
365367
exp = Series([False, np.nan, False, False, True, np.nan, True])
366368
tm.assert_series_equal(result, exp)
367369

368-
result = values.str.endswith("foo", na=na)
370+
result = values.str.endswith(pat, na=na)
369371
exp = Series([False, na, False, False, True, na, True])
370372
tm.assert_series_equal(result, exp)
371373

pandas/tests/strings/test_strings.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
def test_startswith_endswith_non_str_patterns(pattern):
2727
# GH3485
2828
ser = Series(["foo", "bar"])
29-
msg = f"expected a string object, not {type(pattern).__name__}"
29+
msg = f"expected a string or tuple, not {type(pattern).__name__}"
3030
with pytest.raises(TypeError, match=msg):
3131
ser.str.startswith(pattern)
3232
with pytest.raises(TypeError, match=msg):

0 commit comments

Comments
 (0)