Skip to content

add Series.str.remove(pre|suf)fix #43328

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Sep 6, 2021
2 changes: 2 additions & 0 deletions doc/source/reference/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,8 @@ strings and apply several methods to it. These can be accessed like
Series.str.normalize
Series.str.pad
Series.str.partition
Series.str.removeprefix
Series.str.removesuffix
Series.str.repeat
Series.str.replace
Series.str.rfind
Expand Down
15 changes: 15 additions & 0 deletions doc/source/user_guide/text.rst
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,19 @@ regular expression object will raise a ``ValueError``.
---------------------------------------------------------------------------
ValueError: case and flags cannot be set when pat is a compiled regex

``removeprefix`` and ``removesuffix`` have the same effect as ``str.removeprefix`` and ``str.removesuffix`` added in Python 3.9
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a versionadded 1.4. tag here

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think you need this instead of the one on L349

<https://docs.python.org/3/library/stdtypes.html#str.removeprefix>`__:

.. ipython:: python

s = pd.Series(["str_foo", "str_bar", "no_prefix"])
s.str.removeprefix("str_")

s = pd.Series(["foo_str", "bar_str", "no_suffix"])
s.str.removesuffix("_str")

.. versionchanged:: 1.4.0

.. _text.concatenate:

Concatenation
Expand Down Expand Up @@ -742,6 +755,8 @@ Method summary
:meth:`~Series.str.get_dummies`;Split strings on the delimiter returning DataFrame of dummy variables
:meth:`~Series.str.contains`;Return boolean array if each string contains pattern/regex
:meth:`~Series.str.replace`;Replace occurrences of pattern/regex/string with some other string or the return value of a callable given the occurrence
:meth:`~Series.str.removeprefix`;Remove prefix from string, i.e. only remove if string starts with prefix.
:meth:`~Series.str.removesuffix`;Remove suffix from string, i.e. only remove if string ends with suffix.
:meth:`~Series.str.repeat`;Duplicate values (``s.str.repeat(3)`` equivalent to ``x * 3``)
:meth:`~Series.str.pad`;"Add whitespace to left, right, or both sides of strings"
:meth:`~Series.str.center`;Equivalent to ``str.center``
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ Other enhancements
- :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` now support the argument ``skipna`` (:issue:`34047`)
- :meth:`read_table` now supports the argument ``storage_options`` (:issue:`39167`)
- Methods that relied on hashmap based algos such as :meth:`DataFrameGroupBy.value_counts`, :meth:`DataFrameGroupBy.count` and :func:`factorize` ignored imaginary component for complex numbers (:issue:`17927`)
- Add :meth:`Series.str.removeprefix` and :meth:`Series.str.removesuffix` introduced in Python 3.9 to remove pre-/suffixes from string-type :class:`Series` (:issue:`36944`)

.. ---------------------------------------------------------------------------

Expand Down
63 changes: 63 additions & 0 deletions pandas/core/strings/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1907,6 +1907,69 @@ def rstrip(self, to_strip=None):
result = self._data.array._str_rstrip(to_strip)
return self._wrap_result(result)

_shared_docs[
"str_removefix"
] = r"""
Remove a %(side)s from an object series. If the %(side)s is not present,
the original string will be returned.

Parameters
----------
%(side)s : str
%(side)s to remove.

Returns
-------
Series/Index: object
The Series or Index with given %(side)s removed.

See Also
--------
Series.str.remove%(other_side)s : Remove a %(other_side)s from an object series.

Examples
--------
>>> s = pd.Series(["str_foo", "str_bar", "no_prefix"])
>>> s
0 str_foo
1 str_bar
2 no_prefix
dtype: object
>>> s.str.removeprefix("str_")
0 foo
1 bar
2 no_prefix
dtype: object

>>> s = pd.Series(["foo_str", "bar_str", "no_suffix"])
>>> s
0 foo_str
1 bar_str
2 no_suffix
dtype: object
>>> s.str.removesuffix("_str")
0 foo
1 bar
2 no_suffix
dtype: object
"""

@Appender(
_shared_docs["str_removefix"] % {"side": "prefix", "other_side": "suffix"}
)
@forbid_nonstring_types(["bytes"])
def removeprefix(self, prefix):
result = self._data.array._str_removeprefix(prefix)
return self._wrap_result(result)

@Appender(
_shared_docs["str_removefix"] % {"side": "suffix", "other_side": "prefix"}
)
@forbid_nonstring_types(["bytes"])
def removesuffix(self, suffix):
result = self._data.array._str_removesuffix(suffix)
return self._wrap_result(result)

@forbid_nonstring_types(["bytes"])
def wrap(self, width, **kwargs):
r"""
Expand Down
12 changes: 12 additions & 0 deletions pandas/core/strings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,15 @@
import abc
from collections.abc import Callable # noqa: PDF001
import re
from typing import TYPE_CHECKING

import numpy as np

from pandas._typing import Scalar

if TYPE_CHECKING:
from pandas import Series


class BaseStringArrayMethods(abc.ABC):
"""
Expand Down Expand Up @@ -223,6 +227,14 @@ def _str_lstrip(self, to_strip=None):
def _str_rstrip(self, to_strip=None):
pass

@abc.abstractmethod
def _str_removeprefix(self, prefix: str) -> Series:
pass

@abc.abstractmethod
def _str_removesuffix(self, suffix: str) -> Series:
pass

@abc.abstractmethod
def _str_split(self, pat=None, n=-1, expand=False):
pass
Expand Down
30 changes: 29 additions & 1 deletion pandas/core/strings/object_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from collections.abc import Callable # noqa: PDF001
import re
import textwrap
from typing import TYPE_CHECKING
import unicodedata

import numpy as np
Expand All @@ -20,6 +21,9 @@

from pandas.core.strings.base import BaseStringArrayMethods

if TYPE_CHECKING:
from pandas import Series


class ObjectStringArrayMixin(BaseStringArrayMethods):
"""
Expand All @@ -36,7 +40,7 @@ def _str_map(
self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True
):
"""
Map a callable over valid element of the array.
Map a callable over valid elements of the array.

Parameters
----------
Expand Down Expand Up @@ -414,6 +418,30 @@ def _str_lstrip(self, to_strip=None):
def _str_rstrip(self, to_strip=None):
return self._str_map(lambda x: x.rstrip(to_strip))

def _str_removeprefix(self, prefix: str) -> Series:
# outstanding question on whether to use native methods for users
# on Python 3.9+ https://git.io/JE9QK, in which case we could do
# return self._str_map(str.removeprefix)

def removeprefix(text: str) -> str:
if text.startswith(prefix):
return text[len(prefix) :]
return text

return self._str_map(removeprefix)

def _str_removesuffix(self, suffix: str) -> Series:
# this could be used on Python 3.9+
# f = lambda x: x.removesuffix(suffix)
# return self._str_map(str.removesuffix)

def removesuffix(text: str) -> str:
if text.endswith(suffix):
return text[: -len(suffix)]
return text

return self._str_map(removesuffix)

def _str_extract(self, pat: str, flags: int = 0, expand: bool = True):
regex = re.compile(pat, flags=flags)
na_value = self._str_na_value
Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/strings/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@
("startswith", ("a",), {}),
("startswith", ("a",), {"na": True}),
("startswith", ("a",), {"na": False}),
("removeprefix", ("a",), {}),
("removesuffix", ("a",), {}),
# translating unicode points of "a" to "d"
("translate", ({97: 100},), {}),
("wrap", (2,), {}),
Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/strings/test_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,26 @@ def test_strip_lstrip_rstrip_args(any_string_dtype):
tm.assert_series_equal(result, expected)


@pytest.mark.parametrize(
"prefix, expected", [("a", ["b", " b c", "bc"]), ("ab", ["", "a b c", "bc"])]
)
def test_removeprefix(any_string_dtype, prefix, expected):
ser = Series(["ab", "a b c", "bc"], dtype=any_string_dtype)
result = ser.str.removeprefix(prefix)
ser_expected = Series(expected, dtype=any_string_dtype)
tm.assert_series_equal(result, ser_expected)


@pytest.mark.parametrize(
"suffix, expected", [("c", ["ab", "a b ", "b"]), ("bc", ["ab", "a b c", ""])]
)
def test_removesuffix(any_string_dtype, suffix, expected):
ser = Series(["ab", "a b c", "bc"], dtype=any_string_dtype)
result = ser.str.removesuffix(suffix)
ser_expected = Series(expected, dtype=any_string_dtype)
tm.assert_series_equal(result, ser_expected)


def test_string_slice_get_syntax(any_string_dtype):
ser = Series(
["YYY", "B", "C", "YYYYYYbYYY", "BYYYcYYY", np.nan, "CYYYBYYY", "dog", "cYYYt"],
Expand Down