Skip to content

Commit 9ca620f

Browse files
committed
ENH: Allow literal (non-regex) replacement using .str.replace pandas-dev#16808
1 parent f4c9d96 commit 9ca620f

File tree

2 files changed

+65
-30
lines changed

2 files changed

+65
-30
lines changed

pandas/core/strings.py

+51-30
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@ def str_endswith(arr, pat, na=np.nan):
305305
return _na_map(f, arr, na, dtype=bool)
306306

307307

308-
def str_replace(arr, pat, repl, n=-1, case=None, flags=0):
308+
def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True):
309309
r"""
310310
Replace occurrences of pattern/regex in the Series/Index with
311311
some other string. Equivalent to :meth:`str.replace` or
@@ -336,6 +336,11 @@ def str_replace(arr, pat, repl, n=-1, case=None, flags=0):
336336
flags : int, default 0 (no flags)
337337
- re module flags, e.g. re.IGNORECASE
338338
- Cannot be set if `pat` is a compiled regex
339+
regex : boolean, default True
340+
- If True, assumes the passed-in pattern is a regular expression.
341+
- If False, treats the pattern as a literal string
342+
- Cannot be set to False if `pat` is a compiled regex or `repl` is
343+
a callable.
339344
340345
Returns
341346
-------
@@ -344,17 +349,27 @@ def str_replace(arr, pat, repl, n=-1, case=None, flags=0):
344349
Notes
345350
-----
346351
When `pat` is a compiled regex, all flags should be included in the
347-
compiled regex. Use of `case` or `flags` with a compiled regex will
348-
raise an error.
352+
compiled regex. Use of `case`, `flags`, or `regex` with a compiled regex
353+
will raise an error.
349354
350355
Examples
351356
--------
352-
When `repl` is a string, every `pat` is replaced as with
353-
:meth:`str.replace`. NaN value(s) in the Series are left as is.
357+
When `pat` is a string and `regex` is False, every `pat` is replaced with
358+
`repl` as with :meth:`str.replace`. NaN value(s) in the Series are left as
359+
is.
354360
355-
>>> pd.Series(['foo', 'fuz', np.nan]).str.replace('f', 'b')
356-
0 boo
357-
1 buz
361+
>>> pd.Series(['f.o', 'fuz', np.nan]).str.replace('f.', 'ba', regex=False)
362+
0 bao
363+
1 fuz
364+
2 NaN
365+
dtype: object
366+
367+
When `pat` is a string and `regex` is True, the given `pat` is compiled
368+
as a regex. When `repl` is a string, it replaces matching regex patterns
369+
literally as with :meth:`re.sub`:
370+
>>> pd.Series(['foo', 'fuz', np.nan]).str.replace('f.', 'ba', regex=True)
371+
0 bao
372+
1 baz
358373
2 NaN
359374
dtype: object
360375
@@ -403,27 +418,33 @@ def str_replace(arr, pat, repl, n=-1, case=None, flags=0):
403418
raise TypeError("repl must be a string or callable")
404419

405420
is_compiled_re = is_re(pat)
406-
if is_compiled_re:
407-
if (case is not None) or (flags != 0):
408-
raise ValueError("case and flags cannot be set"
409-
" when pat is a compiled regex")
410-
else:
411-
# not a compiled regex
412-
# set default case
413-
if case is None:
414-
case = True
415-
416-
# add case flag, if provided
417-
if case is False:
418-
flags |= re.IGNORECASE
419-
420-
use_re = is_compiled_re or len(pat) > 1 or flags or callable(repl)
421-
422-
if use_re:
423-
n = n if n >= 0 else 0
424-
regex = re.compile(pat, flags=flags)
425-
f = lambda x: regex.sub(repl=repl, string=x, count=n)
421+
if regex:
422+
if is_compiled_re:
423+
if (case is not None) or (flags != 0):
424+
raise ValueError("case and flags cannot be set"
425+
" when pat is a compiled regex")
426+
else:
427+
# not a compiled regex
428+
# set default case
429+
if case is None:
430+
case = True
431+
432+
# add case flag, if provided
433+
if case is False:
434+
flags |= re.IGNORECASE
435+
if is_compiled_re or len(pat) > 1 or flags or callable(repl):
436+
n = n if n >= 0 else 0
437+
compiled = re.compile(pat, flags=flags)
438+
f = lambda x: compiled.sub(repl=repl, string=x, count=n)
439+
else:
440+
f = lambda x: x.replace(pat, repl, n)
426441
else:
442+
if is_compiled_re:
443+
raise ValueError("Cannot use a compiled regex as replacement "
444+
"pattern with regex=False")
445+
if callable(repl):
446+
raise ValueError("Cannot use a callable replacement when "
447+
"regex=False")
427448
f = lambda x: x.replace(pat, repl, n)
428449

429450
return _na_map(f, arr)
@@ -1595,9 +1616,9 @@ def match(self, pat, case=True, flags=0, na=np.nan, as_indexer=None):
15951616
return self._wrap_result(result)
15961617

15971618
@copy(str_replace)
1598-
def replace(self, pat, repl, n=-1, case=None, flags=0):
1619+
def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True):
15991620
result = str_replace(self._data, pat, repl, n=n, case=case,
1600-
flags=flags)
1621+
flags=flags, regex=regex)
16011622
return self._wrap_result(result)
16021623

16031624
@copy(str_repeat)

pandas/tests/test_strings.py

+14
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,16 @@ def test_replace(self):
431431
values = klass(data)
432432
pytest.raises(TypeError, values.str.replace, 'a', repl)
433433

434+
# GH16808 literal replace (regex=False vs regex=True)
435+
values = Series(['f.o', 'foo', NA])
436+
exp = Series(['bao', 'bao', NA])
437+
result = values.str.replace('f.', 'ba')
438+
tm.assert_series_equal(result, exp)
439+
440+
exp = Series(['bao', 'foo', NA])
441+
result = values.str.replace('f.', 'ba', regex=False)
442+
tm.assert_series_equal(result, exp)
443+
434444
def test_replace_callable(self):
435445
# GH 15055
436446
values = Series(['fooBAD__barBAD', NA])
@@ -441,6 +451,8 @@ def test_replace_callable(self):
441451
exp = Series(['foObaD__baRbaD', NA])
442452
tm.assert_series_equal(result, exp)
443453

454+
pytest.raises(ValueError, values.str.replace, 'abc', repl, regex=False)
455+
444456
# test with wrong number of arguments, raising an error
445457
if compat.PY2:
446458
p_err = r'takes (no|(exactly|at (least|most)) ?\d+) arguments?'
@@ -522,6 +534,8 @@ def test_replace_compiled_regex(self):
522534
"case and flags cannot be"):
523535
result = values.str.replace(pat, '', case=True)
524536

537+
pytest.raises(ValueError, values.str.replace, pat, '', regex=False)
538+
525539
# test with callable
526540
values = Series(['fooBAD__barBAD', NA])
527541
repl = lambda m: m.group(0).swapcase()

0 commit comments

Comments
 (0)