Skip to content

ENH: Adding implementation for issue #51748 #51919

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 9 commits into from
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ Conversion

Strings
^^^^^^^
-
- Enhancement to allow dictionaries to be passed to func: Series.str.replace that contain <key:value> pairs of strings to be replaced, along with their replacement.
-

Interval
Expand Down
31 changes: 25 additions & 6 deletions pandas/core/strings/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1314,12 +1314,13 @@ def fullmatch(self, pat, case: bool = True, flags: int = 0, na=None):
@forbid_nonstring_types(["bytes"])
def replace(
self,
pat: str | re.Pattern,
repl: str | Callable,
pat: str | re.Pattern = "",
repl: str | Callable = "",
n: int = -1,
case: bool | None = None,
flags: int = 0,
regex: bool = False,
pat_dict: dict = dict(),
):
r"""
Replace each occurrence of pattern/regex in the Series/Index.
Expand Down Expand Up @@ -1354,6 +1355,8 @@ def replace(
- If False, treats the pattern as a literal string
- Cannot be set to False if `pat` is a compiled regex or `repl` is
a callable.
pat_dict : dict, default empty dictionary
<key:value> pairs of strings being replaced, and their updated values.

Returns
-------
Expand Down Expand Up @@ -1459,10 +1462,26 @@ def replace(
if case is None:
case = True

result = self._data.array._str_replace(
pat, repl, n=n, case=case, flags=flags, regex=regex
)
return self._wrap_result(result)
if not pat and not pat_dict:
raise ValueError(
"Cannot replace a string without specifying a string to be modified."
)

if pat_dict:
res_output = self._data
for key, value in pat_dict.items():
result = res_output.array._str_replace(
key, str(value), n=n, case=case, flags=flags, regex=regex
)
res_output = self._wrap_result(result)

else:
result = self._data.array._str_replace(
pat, repl, n=n, case=case, flags=flags, regex=regex
)
res_output = self._wrap_result(result)

return res_output

@forbid_nonstring_types(["bytes"])
def repeat(self, repeats):
Expand Down
25 changes: 25 additions & 0 deletions pandas/tests/strings/test_find_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,6 +639,31 @@ def test_replace_regex_single_character(regex, any_string_dtype):
tm.assert_series_equal(result, expected)


def test_replace_dict(any_string_dtype):
# New replace behavior introduced in #51914
series = Series(data=["A", "B_junk", "C_gunk"], name="my_messy_col")
new_series1 = series.str.replace(pat_dict={"_gunk": "_junk"})
expected1 = Series(data=["A", "B_junk", "C_junk"], name="my_messy_col")
tm.assert_series_equal(new_series1, expected1)


def test_replace_multi_dict(any_string_dtype):
# New replace behavior introduced in #51914
series = Series(data=["A", "B", "C"], name="my_messy_col")
new_series = series.str.replace(pat_dict={"A": "", "B": 1})
expected = Series(data=["", "1", "C"], name="my_messy_col")
tm.assert_series_equal(new_series, expected)


def test_replace_dict_invalid(any_string_dtype):
# New replace behavior introduced in #51914
msg = "Cannot replace a string without specifying a string to be modified."
series = Series(data=["A", "B_junk", "C_gunk"], name="my_messy_col")

with pytest.raises(ValueError, match=msg):
series.str.replace()


# --------------------------------------------------------------------------------------
# str.match
# --------------------------------------------------------------------------------------
Expand Down