diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 7e8403c94ceef..3597fd09f219b 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -158,7 +158,7 @@ Conversion Strings ^^^^^^^ -- +- Enhancement to allow dictionaries to be passed to func: Series.str.replace that contain pairs of strings to be replaced, along with their replacement. - Interval diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 1b020a3d96411..b20edbfb21bed 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -1314,12 +1314,13 @@ def fullmatch(self, pat, case: bool = True, flags: int = 0, na=None): @forbid_nonstring_types(["bytes"]) def replace( self, - pat: str | re.Pattern, - repl: str | Callable, + pat: str | re.Pattern = "", + repl: str | Callable = "", n: int = -1, case: bool | None = None, flags: int = 0, regex: bool = False, + pat_dict: dict = dict(), ): r""" Replace each occurrence of pattern/regex in the Series/Index. @@ -1354,6 +1355,8 @@ def replace( - If False, treats the pattern as a literal string - Cannot be set to False if `pat` is a compiled regex or `repl` is a callable. + pat_dict : dict, default empty dictionary + pairs of strings being replaced, and their updated values. Returns ------- @@ -1459,10 +1462,26 @@ def replace( if case is None: case = True - result = self._data.array._str_replace( - pat, repl, n=n, case=case, flags=flags, regex=regex - ) - return self._wrap_result(result) + if not pat and not pat_dict: + raise ValueError( + "Cannot replace a string without specifying a string to be modified." + ) + + if pat_dict: + res_output = self._data + for key, value in pat_dict.items(): + result = res_output.array._str_replace( + key, str(value), n=n, case=case, flags=flags, regex=regex + ) + res_output = self._wrap_result(result) + + else: + result = self._data.array._str_replace( + pat, repl, n=n, case=case, flags=flags, regex=regex + ) + res_output = self._wrap_result(result) + + return res_output @forbid_nonstring_types(["bytes"]) def repeat(self, repeats): diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py index 6f6acb7a996b2..dc842610d3fa7 100644 --- a/pandas/tests/strings/test_find_replace.py +++ b/pandas/tests/strings/test_find_replace.py @@ -639,6 +639,31 @@ def test_replace_regex_single_character(regex, any_string_dtype): tm.assert_series_equal(result, expected) +def test_replace_dict(any_string_dtype): + # New replace behavior introduced in #51914 + series = Series(data=["A", "B_junk", "C_gunk"], name="my_messy_col") + new_series1 = series.str.replace(pat_dict={"_gunk": "_junk"}) + expected1 = Series(data=["A", "B_junk", "C_junk"], name="my_messy_col") + tm.assert_series_equal(new_series1, expected1) + + +def test_replace_multi_dict(any_string_dtype): + # New replace behavior introduced in #51914 + series = Series(data=["A", "B", "C"], name="my_messy_col") + new_series = series.str.replace(pat_dict={"A": "", "B": 1}) + expected = Series(data=["", "1", "C"], name="my_messy_col") + tm.assert_series_equal(new_series, expected) + + +def test_replace_dict_invalid(any_string_dtype): + # New replace behavior introduced in #51914 + msg = "Cannot replace a string without specifying a string to be modified." + series = Series(data=["A", "B_junk", "C_gunk"], name="my_messy_col") + + with pytest.raises(ValueError, match=msg): + series.str.replace() + + # -------------------------------------------------------------------------------------- # str.match # --------------------------------------------------------------------------------------