Skip to content

Commit 8871b1c

Browse files
authored
ENH: Allow dictionaries to be passed to pandas.Series.str.replace (pandas-dev#56175)
1 parent dfb3f6c commit 8871b1c

File tree

3 files changed

+47
-8
lines changed

3 files changed

+47
-8
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ Other enhancements
3030
^^^^^^^^^^^^^^^^^^
3131
- :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`)
3232
- :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`55642`)
33+
- Allow dictionaries to be passed to :meth:`pandas.Series.str.replace` via ``pat`` parameter (:issue:`51748`)
3334
-
3435

3536
.. ---------------------------------------------------------------------------

pandas/core/strings/accessor.py

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1426,8 +1426,8 @@ def fullmatch(self, pat, case: bool = True, flags: int = 0, na=None):
14261426
@forbid_nonstring_types(["bytes"])
14271427
def replace(
14281428
self,
1429-
pat: str | re.Pattern,
1430-
repl: str | Callable,
1429+
pat: str | re.Pattern | dict,
1430+
repl: str | Callable | None = None,
14311431
n: int = -1,
14321432
case: bool | None = None,
14331433
flags: int = 0,
@@ -1441,11 +1441,14 @@ def replace(
14411441
14421442
Parameters
14431443
----------
1444-
pat : str or compiled regex
1444+
pat : str, compiled regex, or a dict
14451445
String can be a character sequence or regular expression.
1446+
Dictionary contains <key : value> pairs of strings to be replaced
1447+
along with the updated value.
14461448
repl : str or callable
14471449
Replacement string or a callable. The callable is passed the regex
14481450
match object and must return a replacement string to be used.
1451+
Must have a value of None if `pat` is a dict
14491452
See :func:`re.sub`.
14501453
n : int, default -1 (all)
14511454
Number of replacements to make from start.
@@ -1479,6 +1482,7 @@ def replace(
14791482
* if `regex` is False and `repl` is a callable or `pat` is a compiled
14801483
regex
14811484
* if `pat` is a compiled regex and `case` or `flags` is set
1485+
* if `pat` is a dictionary and `repl` is not None.
14821486
14831487
Notes
14841488
-----
@@ -1488,6 +1492,15 @@ def replace(
14881492
14891493
Examples
14901494
--------
1495+
When `pat` is a dictionary, every key in `pat` is replaced
1496+
with its corresponding value:
1497+
1498+
>>> pd.Series(["A", "B", np.nan]).str.replace(pat={"A": "a", "B": "b"})
1499+
0 a
1500+
1 b
1501+
2 NaN
1502+
dtype: object
1503+
14911504
When `pat` is a string and `regex` is True, the given `pat`
14921505
is compiled as a regex. When `repl` is a string, it replaces matching
14931506
regex patterns as with :meth:`re.sub`. NaN value(s) in the Series are
@@ -1550,8 +1563,11 @@ def replace(
15501563
2 NaN
15511564
dtype: object
15521565
"""
1566+
if isinstance(pat, dict) and repl is not None:
1567+
raise ValueError("repl cannot be used when pat is a dictionary")
1568+
15531569
# Check whether repl is valid (GH 13438, GH 15055)
1554-
if not (isinstance(repl, str) or callable(repl)):
1570+
if not isinstance(pat, dict) and not (isinstance(repl, str) or callable(repl)):
15551571
raise TypeError("repl must be a string or callable")
15561572

15571573
is_compiled_re = is_re(pat)
@@ -1571,10 +1587,17 @@ def replace(
15711587
if case is None:
15721588
case = True
15731589

1574-
result = self._data.array._str_replace(
1575-
pat, repl, n=n, case=case, flags=flags, regex=regex
1576-
)
1577-
return self._wrap_result(result)
1590+
res_output = self._data
1591+
if not isinstance(pat, dict):
1592+
pat = {pat: repl}
1593+
1594+
for key, value in pat.items():
1595+
result = res_output.array._str_replace(
1596+
key, value, n=n, case=case, flags=flags, regex=regex
1597+
)
1598+
res_output = self._wrap_result(result)
1599+
1600+
return res_output
15781601

15791602
@forbid_nonstring_types(["bytes"])
15801603
def repeat(self, repeats):

pandas/tests/strings/test_find_replace.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,21 @@ def test_endswith_nullable_string_dtype(nullable_string_dtype, na):
355355
# --------------------------------------------------------------------------------------
356356
# str.replace
357357
# --------------------------------------------------------------------------------------
358+
def test_replace_dict_invalid(any_string_dtype):
359+
# GH 51914
360+
series = Series(data=["A", "B_junk", "C_gunk"], name="my_messy_col")
361+
msg = "repl cannot be used when pat is a dictionary"
362+
363+
with pytest.raises(ValueError, match=msg):
364+
series.str.replace(pat={"A": "a", "B": "b"}, repl="A")
365+
366+
367+
def test_replace_dict(any_string_dtype):
368+
# GH 51914
369+
series = Series(data=["A", "B", "C"], name="my_messy_col")
370+
new_series = series.str.replace(pat={"A": "a", "B": "b"})
371+
expected = Series(data=["a", "b", "C"], name="my_messy_col")
372+
tm.assert_series_equal(new_series, expected)
358373

359374

360375
def test_replace(any_string_dtype):

0 commit comments

Comments
 (0)