Skip to content

Commit 0a9f9ee

Browse files
authored
add Series.str.remove(pre|suf)fix (#43328)
1 parent daaf286 commit 0a9f9ee

File tree

8 files changed

+144
-1
lines changed

8 files changed

+144
-1
lines changed

doc/source/reference/series.rst

+2
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,8 @@ strings and apply several methods to it. These can be accessed like
427427
Series.str.normalize
428428
Series.str.pad
429429
Series.str.partition
430+
Series.str.removeprefix
431+
Series.str.removesuffix
430432
Series.str.repeat
431433
Series.str.replace
432434
Series.str.rfind

doc/source/user_guide/text.rst

+15
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,19 @@ regular expression object will raise a ``ValueError``.
335335
---------------------------------------------------------------------------
336336
ValueError: case and flags cannot be set when pat is a compiled regex
337337

338+
``removeprefix`` and ``removesuffix`` have the same effect as ``str.removeprefix`` and ``str.removesuffix`` added in Python 3.9
339+
<https://docs.python.org/3/library/stdtypes.html#str.removeprefix>`__:
340+
341+
.. versionadded:: 1.4.0
342+
343+
.. ipython:: python
344+
345+
s = pd.Series(["str_foo", "str_bar", "no_prefix"])
346+
s.str.removeprefix("str_")
347+
348+
s = pd.Series(["foo_str", "bar_str", "no_suffix"])
349+
s.str.removesuffix("_str")
350+
338351
.. _text.concatenate:
339352

340353
Concatenation
@@ -742,6 +755,8 @@ Method summary
742755
:meth:`~Series.str.get_dummies`;Split strings on the delimiter returning DataFrame of dummy variables
743756
:meth:`~Series.str.contains`;Return boolean array if each string contains pattern/regex
744757
:meth:`~Series.str.replace`;Replace occurrences of pattern/regex/string with some other string or the return value of a callable given the occurrence
758+
:meth:`~Series.str.removeprefix`;Remove prefix from string, i.e. only remove if string starts with prefix.
759+
:meth:`~Series.str.removesuffix`;Remove suffix from string, i.e. only remove if string ends with suffix.
745760
:meth:`~Series.str.repeat`;Duplicate values (``s.str.repeat(3)`` equivalent to ``x * 3``)
746761
:meth:`~Series.str.pad`;"Add whitespace to left, right, or both sides of strings"
747762
:meth:`~Series.str.center`;Equivalent to ``str.center``

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ Other enhancements
104104
- :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` now support the argument ``skipna`` (:issue:`34047`)
105105
- :meth:`read_table` now supports the argument ``storage_options`` (:issue:`39167`)
106106
- Methods that relied on hashmap based algos such as :meth:`DataFrameGroupBy.value_counts`, :meth:`DataFrameGroupBy.count` and :func:`factorize` ignored imaginary component for complex numbers (:issue:`17927`)
107+
- Add :meth:`Series.str.removeprefix` and :meth:`Series.str.removesuffix` introduced in Python 3.9 to remove pre-/suffixes from string-type :class:`Series` (:issue:`36944`)
107108

108109
.. ---------------------------------------------------------------------------
109110

pandas/core/strings/accessor.py

+63
Original file line numberDiff line numberDiff line change
@@ -1907,6 +1907,69 @@ def rstrip(self, to_strip=None):
19071907
result = self._data.array._str_rstrip(to_strip)
19081908
return self._wrap_result(result)
19091909

1910+
_shared_docs[
1911+
"str_removefix"
1912+
] = r"""
1913+
Remove a %(side)s from an object series. If the %(side)s is not present,
1914+
the original string will be returned.
1915+
1916+
Parameters
1917+
----------
1918+
%(side)s : str
1919+
%(side)s to remove.
1920+
1921+
Returns
1922+
-------
1923+
Series/Index: object
1924+
The Series or Index with given %(side)s removed.
1925+
1926+
See Also
1927+
--------
1928+
Series.str.remove%(other_side)s : Remove a %(other_side)s from an object series.
1929+
1930+
Examples
1931+
--------
1932+
>>> s = pd.Series(["str_foo", "str_bar", "no_prefix"])
1933+
>>> s
1934+
0 str_foo
1935+
1 str_bar
1936+
2 no_prefix
1937+
dtype: object
1938+
>>> s.str.removeprefix("str_")
1939+
0 foo
1940+
1 bar
1941+
2 no_prefix
1942+
dtype: object
1943+
1944+
>>> s = pd.Series(["foo_str", "bar_str", "no_suffix"])
1945+
>>> s
1946+
0 foo_str
1947+
1 bar_str
1948+
2 no_suffix
1949+
dtype: object
1950+
>>> s.str.removesuffix("_str")
1951+
0 foo
1952+
1 bar
1953+
2 no_suffix
1954+
dtype: object
1955+
"""
1956+
1957+
@Appender(
1958+
_shared_docs["str_removefix"] % {"side": "prefix", "other_side": "suffix"}
1959+
)
1960+
@forbid_nonstring_types(["bytes"])
1961+
def removeprefix(self, prefix):
1962+
result = self._data.array._str_removeprefix(prefix)
1963+
return self._wrap_result(result)
1964+
1965+
@Appender(
1966+
_shared_docs["str_removefix"] % {"side": "suffix", "other_side": "prefix"}
1967+
)
1968+
@forbid_nonstring_types(["bytes"])
1969+
def removesuffix(self, suffix):
1970+
result = self._data.array._str_removesuffix(suffix)
1971+
return self._wrap_result(result)
1972+
19101973
@forbid_nonstring_types(["bytes"])
19111974
def wrap(self, width, **kwargs):
19121975
r"""

pandas/core/strings/base.py

+12
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,15 @@
33
import abc
44
from collections.abc import Callable # noqa: PDF001
55
import re
6+
from typing import TYPE_CHECKING
67

78
import numpy as np
89

910
from pandas._typing import Scalar
1011

12+
if TYPE_CHECKING:
13+
from pandas import Series
14+
1115

1216
class BaseStringArrayMethods(abc.ABC):
1317
"""
@@ -223,6 +227,14 @@ def _str_lstrip(self, to_strip=None):
223227
def _str_rstrip(self, to_strip=None):
224228
pass
225229

230+
@abc.abstractmethod
231+
def _str_removeprefix(self, prefix: str) -> Series:
232+
pass
233+
234+
@abc.abstractmethod
235+
def _str_removesuffix(self, suffix: str) -> Series:
236+
pass
237+
226238
@abc.abstractmethod
227239
def _str_split(self, pat=None, n=-1, expand=False):
228240
pass

pandas/core/strings/object_array.py

+29-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from collections.abc import Callable # noqa: PDF001
44
import re
55
import textwrap
6+
from typing import TYPE_CHECKING
67
import unicodedata
78

89
import numpy as np
@@ -20,6 +21,9 @@
2021

2122
from pandas.core.strings.base import BaseStringArrayMethods
2223

24+
if TYPE_CHECKING:
25+
from pandas import Series
26+
2327

2428
class ObjectStringArrayMixin(BaseStringArrayMethods):
2529
"""
@@ -36,7 +40,7 @@ def _str_map(
3640
self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True
3741
):
3842
"""
39-
Map a callable over valid element of the array.
43+
Map a callable over valid elements of the array.
4044
4145
Parameters
4246
----------
@@ -414,6 +418,30 @@ def _str_lstrip(self, to_strip=None):
414418
def _str_rstrip(self, to_strip=None):
415419
return self._str_map(lambda x: x.rstrip(to_strip))
416420

421+
def _str_removeprefix(self, prefix: str) -> Series:
422+
# outstanding question on whether to use native methods for users
423+
# on Python 3.9+ https://git.io/JE9QK, in which case we could do
424+
# return self._str_map(str.removeprefix)
425+
426+
def removeprefix(text: str) -> str:
427+
if text.startswith(prefix):
428+
return text[len(prefix) :]
429+
return text
430+
431+
return self._str_map(removeprefix)
432+
433+
def _str_removesuffix(self, suffix: str) -> Series:
434+
# this could be used on Python 3.9+
435+
# f = lambda x: x.removesuffix(suffix)
436+
# return self._str_map(str.removesuffix)
437+
438+
def removesuffix(text: str) -> str:
439+
if text.endswith(suffix):
440+
return text[: -len(suffix)]
441+
return text
442+
443+
return self._str_map(removesuffix)
444+
417445
def _str_extract(self, pat: str, flags: int = 0, expand: bool = True):
418446
regex = re.compile(pat, flags=flags)
419447
na_value = self._str_na_value

pandas/tests/strings/conftest.py

+2
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@
4646
("startswith", ("a",), {}),
4747
("startswith", ("a",), {"na": True}),
4848
("startswith", ("a",), {"na": False}),
49+
("removeprefix", ("a",), {}),
50+
("removesuffix", ("a",), {}),
4951
# translating unicode points of "a" to "d"
5052
("translate", ({97: 100},), {}),
5153
("wrap", (2,), {}),

pandas/tests/strings/test_strings.py

+20
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,26 @@ def test_strip_lstrip_rstrip_args(any_string_dtype):
535535
tm.assert_series_equal(result, expected)
536536

537537

538+
@pytest.mark.parametrize(
539+
"prefix, expected", [("a", ["b", " b c", "bc"]), ("ab", ["", "a b c", "bc"])]
540+
)
541+
def test_removeprefix(any_string_dtype, prefix, expected):
542+
ser = Series(["ab", "a b c", "bc"], dtype=any_string_dtype)
543+
result = ser.str.removeprefix(prefix)
544+
ser_expected = Series(expected, dtype=any_string_dtype)
545+
tm.assert_series_equal(result, ser_expected)
546+
547+
548+
@pytest.mark.parametrize(
549+
"suffix, expected", [("c", ["ab", "a b ", "b"]), ("bc", ["ab", "a b c", ""])]
550+
)
551+
def test_removesuffix(any_string_dtype, suffix, expected):
552+
ser = Series(["ab", "a b c", "bc"], dtype=any_string_dtype)
553+
result = ser.str.removesuffix(suffix)
554+
ser_expected = Series(expected, dtype=any_string_dtype)
555+
tm.assert_series_equal(result, ser_expected)
556+
557+
538558
def test_string_slice_get_syntax(any_string_dtype):
539559
ser = Series(
540560
["YYY", "B", "C", "YYYYYYbYYY", "BYYYcYYY", np.nan, "CYYYBYYY", "dog", "cYYYt"],

0 commit comments

Comments
 (0)