diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 799bc88ffff4e..3349fb6864de4 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -59,6 +59,7 @@ Other enhancements - :meth:`Series.loc.__getitem__` and :meth:`Series.loc.__setitem__` with :class:`MultiIndex` now raising helpful error message when indexer has too many dimensions (:issue:`35349`) - :meth:`pandas.read_stata` and :class:`StataReader` support reading data from compressed files. - Add support for parsing ``ISO 8601``-like timestamps with negative signs to :meth:`pandas.Timedelta` (:issue:`37172`) +- Added :meth:`Series.str.removeprefix` to remove prefixes from string type Series, which has the same functionality as ``str.removeprefix`` from the Python standard library. .. --------------------------------------------------------------------------- diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 0e6ffa637f1ae..d4b464ff2ce81 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -2881,6 +2881,44 @@ def casefold(self): "isdecimal", docstring=_shared_docs["ismethods"] % _doc_args["isdecimal"] ) + @forbid_nonstring_types(["bytes"]) + def removeprefix(self, prefix: str = None): + """ + Remove a defined prefix from an object series. If the prefix is not present, + the original string will be returned. + + Parameters + ---------- + prefix: str, default None + Prefix to remove. + + Returns + ------- + Series/Index: object + The Series or Index with given prefix removed. + + See also + -------- + Series.str.strip : Remove leading and trailing characters. + + Examples + -------- + >>> s = pd.Series(["str_string1", "str_string2", "no_prefix"]) + >>> s + 0 str_string1 + 1 str_string2 + 2 no_prefix + dtype: object + + >>> s.str.removeprefix("str_") + 0 string1 + 1 string2 + 2 no_prefix + dtype: object + """ + result = self._data.array._str_removeprefix(prefix) + return self._wrap_result(result) + def cat_safe(list_of_columns: List, sep: str): """ diff --git a/pandas/core/strings/base.py b/pandas/core/strings/base.py index 08064244a2ff9..4842d388897db 100644 --- a/pandas/core/strings/base.py +++ b/pandas/core/strings/base.py @@ -223,3 +223,7 @@ def _str_split(self, pat=None, n=-1, expand=False): @abc.abstractmethod def _str_rsplit(self, pat=None, n=-1): pass + + @abc.abstractmethod + def _str_removeprefix(self, prefix=None): + pass diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py index 471f1e521b991..4e7d474693dce 100644 --- a/pandas/core/strings/object_array.py +++ b/pandas/core/strings/object_array.py @@ -430,3 +430,11 @@ def _str_lstrip(self, to_strip=None): def _str_rstrip(self, to_strip=None): return self._str_map(lambda x: x.rstrip(to_strip)) + + def _str_removeprefix(self, prefix=None): + f_startswith = lambda x: x.startswith(prefix) + f_slice = lambda x: x[len(prefix) :] + has_prefix = self._str_map(f_startswith, dtype="object") + sliced = self._str_map(f_slice, dtype="object") + + return np.where(has_prefix, sliced, self) diff --git a/pandas/tests/strings/conftest.py b/pandas/tests/strings/conftest.py index 4fedbee91f649..1d27b73ab55f1 100644 --- a/pandas/tests/strings/conftest.py +++ b/pandas/tests/strings/conftest.py @@ -50,6 +50,7 @@ ("translate", ({97: 100},), {}), ("wrap", (2,), {}), ("zfill", (10,), {}), + ("removeprefix", ("a ",), {}), ] + list( zip( [ diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index 92e7bf258d2d7..6849a64b7fdcc 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -767,3 +767,15 @@ def test_str_accessor_in_apply_func(): expected = Series(["A/D", "B/E", "C/F"]) result = df.apply(lambda f: "/".join(f.str.upper()), axis=1) tm.assert_series_equal(result, expected) + + +def test_str_removeprefix(): + # https://github.com/pandas-dev/pandas/issues/36944 + + df = DataFrame( + {"A": ["str_string1", "str_string2", "str_string3", "string_no_prefix"]} + ) + df["A"] = df["A"].str.removeprefix("str_") + result = DataFrame({"A": ["string1", "string2", "string3", "string_no_prefix"]}) + + tm.assert_frame_equal(df, result)