Skip to content

Commit 050a1a2

Browse files
authored
DEPR: Change str.replace(regex) from True to False & single behavior (#49486)
* DEPR: Change str.replace(regex) from True to False & single behavior * Add versionnchanged
1 parent 133c6dd commit 050a1a2

File tree

4 files changed

+26
-70
lines changed

4 files changed

+26
-70
lines changed

doc/source/user_guide/text.rst

+8-6
Original file line numberDiff line numberDiff line change
@@ -267,14 +267,16 @@ i.e., from the end of the string to the beginning of the string:
267267
s3
268268
s3.str.replace("^.a|dog", "XX-XX ", case=False, regex=True)
269269
270-
.. warning::
271270
272-
Some caution must be taken when dealing with regular expressions! The current behavior
273-
is to treat single character patterns as literal strings, even when ``regex`` is set
274-
to ``True``. This behavior is deprecated and will be removed in a future version so
275-
that the ``regex`` keyword is always respected.
271+
.. versionchanged:: 2.0
272+
273+
Single character pattern with ``regex=True`` will also be treated as regular expressions:
274+
275+
.. ipython:: python
276276
277-
.. versionchanged:: 1.2.0
277+
s4 = pd.Series(["a.b", ".", "b", np.nan, ""], dtype="string")
278+
s4
279+
s4.str.replace(".", "a", regex=True)
278280
279281
If you want literal replacement of a string (equivalent to :meth:`str.replace`), you
280282
can set the optional ``regex`` parameter to ``False``, rather than escaping each

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,7 @@ Removal of prior version deprecations/changes
322322
- Changed behavior of :class:`Index` constructor when passed a ``SparseArray`` or ``SparseDtype`` to retain that dtype instead of casting to ``numpy.ndarray`` (:issue:`43930`)
323323
- Changed behavior of :class:`Index`, :class:`Series`, :class:`DataFrame` constructors with floating-dtype data and a :class:`DatetimeTZDtype`, the data are now interpreted as UTC-times instead of wall-times, consistent with how integer-dtype data are treated (:issue:`45573`)
324324
- Removed the deprecated ``base`` and ``loffset`` arguments from :meth:`pandas.DataFrame.resample`, :meth:`pandas.Series.resample` and :class:`pandas.Grouper`. Use ``offset`` or ``origin`` instead (:issue:`31809`)
325+
- Change the default argument of ``regex`` for :meth:`Series.str.replace` from ``True`` to ``False``. Additionally, a single character ``pat`` with ``regex=True`` is now treated as a regular expression instead of a string literal. (:issue:`36695`, :issue:`24804`)
325326
- Changed behavior of :meth:`DataFrame.any` and :meth:`DataFrame.all` with ``bool_only=True``; object-dtype columns with all-bool values will no longer be included, manually cast to ``bool`` dtype first (:issue:`46188`)
326327
- Changed behavior of comparison of a :class:`Timestamp` with a ``datetime.date`` object; these now compare as un-equal and raise on inequality comparisons, matching the ``datetime.datetime`` behavior (:issue:`36131`)
327328
- Enforced deprecation of silently dropping columns that raised a ``TypeError`` in :class:`Series.transform` and :class:`DataFrame.transform` when used with a list or dictionary (:issue:`43740`)

pandas/core/strings/accessor.py

+2-26
Original file line numberDiff line numberDiff line change
@@ -1323,7 +1323,7 @@ def replace(
13231323
n: int = -1,
13241324
case: bool | None = None,
13251325
flags: int = 0,
1326-
regex: bool | None = None,
1326+
regex: bool = False,
13271327
):
13281328
r"""
13291329
Replace each occurrence of pattern/regex in the Series/Index.
@@ -1351,16 +1351,14 @@ def replace(
13511351
flags : int, default 0 (no flags)
13521352
Regex module flags, e.g. re.IGNORECASE. Cannot be set if `pat` is a compiled
13531353
regex.
1354-
regex : bool, default True
1354+
regex : bool, default False
13551355
Determines if the passed-in pattern is a regular expression:
13561356
13571357
- If True, assumes the passed-in pattern is a regular expression.
13581358
- If False, treats the pattern as a literal string
13591359
- Cannot be set to False if `pat` is a compiled regex or `repl` is
13601360
a callable.
13611361
1362-
.. versionadded:: 0.23.0
1363-
13641362
Returns
13651363
-------
13661364
Series or Index of object
@@ -1444,20 +1442,6 @@ def replace(
14441442
2 NaN
14451443
dtype: object
14461444
"""
1447-
if regex is None:
1448-
if isinstance(pat, str) and any(c in pat for c in ".+*|^$?[](){}\\"):
1449-
# warn only in cases where regex behavior would differ from literal
1450-
msg = (
1451-
"The default value of regex will change from True to False "
1452-
"in a future version."
1453-
)
1454-
if len(pat) == 1:
1455-
msg += (
1456-
" In addition, single character regular expressions will "
1457-
"*not* be treated as literal strings when regex=True."
1458-
)
1459-
warnings.warn(msg, FutureWarning, stacklevel=find_stack_level())
1460-
14611445
# Check whether repl is valid (GH 13438, GH 15055)
14621446
if not (isinstance(repl, str) or callable(repl)):
14631447
raise TypeError("repl must be a string or callable")
@@ -1476,14 +1460,6 @@ def replace(
14761460
elif callable(repl):
14771461
raise ValueError("Cannot use a callable replacement when regex=False")
14781462

1479-
# The current behavior is to treat single character patterns as literal strings,
1480-
# even when ``regex`` is set to ``True``.
1481-
if isinstance(pat, str) and len(pat) == 1:
1482-
regex = False
1483-
1484-
if regex is None:
1485-
regex = True
1486-
14871463
if case is None:
14881464
case = True
14891465

pandas/tests/strings/test_find_replace.py

+15-38
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,7 @@ def test_replace_callable_raises(any_string_dtype, repl):
423423
with tm.maybe_produces_warning(
424424
PerformanceWarning, any_string_dtype == "string[pyarrow]"
425425
):
426-
values.str.replace("a", repl)
426+
values.str.replace("a", repl, regex=True)
427427

428428

429429
def test_replace_callable_named_groups(any_string_dtype):
@@ -477,7 +477,7 @@ def test_replace_compiled_regex_unicode(any_string_dtype):
477477
with tm.maybe_produces_warning(
478478
PerformanceWarning, any_string_dtype == "string[pyarrow]"
479479
):
480-
result = ser.str.replace(pat, ", ")
480+
result = ser.str.replace(pat, ", ", regex=True)
481481
tm.assert_series_equal(result, expected)
482482

483483

@@ -490,13 +490,13 @@ def test_replace_compiled_regex_raises(any_string_dtype):
490490
msg = "case and flags cannot be set when pat is a compiled regex"
491491

492492
with pytest.raises(ValueError, match=msg):
493-
ser.str.replace(pat, "", flags=re.IGNORECASE)
493+
ser.str.replace(pat, "", flags=re.IGNORECASE, regex=True)
494494

495495
with pytest.raises(ValueError, match=msg):
496-
ser.str.replace(pat, "", case=False)
496+
ser.str.replace(pat, "", case=False, regex=True)
497497

498498
with pytest.raises(ValueError, match=msg):
499-
ser.str.replace(pat, "", case=True)
499+
ser.str.replace(pat, "", case=True, regex=True)
500500

501501

502502
def test_replace_compiled_regex_callable(any_string_dtype):
@@ -507,7 +507,7 @@ def test_replace_compiled_regex_callable(any_string_dtype):
507507
with tm.maybe_produces_warning(
508508
PerformanceWarning, any_string_dtype == "string[pyarrow]"
509509
):
510-
result = ser.str.replace(pat, repl, n=2)
510+
result = ser.str.replace(pat, repl, n=2, regex=True)
511511
expected = Series(["foObaD__baRbaD", np.nan], dtype=any_string_dtype)
512512
tm.assert_series_equal(result, expected)
513513

@@ -617,48 +617,25 @@ def test_replace_not_case_sensitive_not_regex(any_string_dtype):
617617
tm.assert_series_equal(result, expected)
618618

619619

620-
def test_replace_regex_default_warning(any_string_dtype):
620+
def test_replace_regex(any_string_dtype):
621621
# https://github.com/pandas-dev/pandas/pull/24809
622622
s = Series(["a", "b", "ac", np.nan, ""], dtype=any_string_dtype)
623-
msg = (
624-
"The default value of regex will change from True to False in a "
625-
"future version\\.$"
626-
)
627-
628-
with tm.assert_produces_warning(
629-
FutureWarning,
630-
match=msg,
631-
raise_on_extra_warnings=any_string_dtype != "string[pyarrow]",
632-
):
633-
result = s.str.replace("^.$", "a")
623+
result = s.str.replace("^.$", "a", regex=True)
634624
expected = Series(["a", "a", "ac", np.nan, ""], dtype=any_string_dtype)
635625
tm.assert_series_equal(result, expected)
636626

637627

638-
@pytest.mark.parametrize("regex", [True, False, None])
628+
@pytest.mark.parametrize("regex", [True, False])
639629
def test_replace_regex_single_character(regex, any_string_dtype):
640-
# https://github.com/pandas-dev/pandas/pull/24809
641-
642-
# The current behavior is to treat single character patterns as literal strings,
643-
# even when ``regex`` is set to ``True``.
644-
630+
# https://github.com/pandas-dev/pandas/pull/24809, enforced in 2.0
631+
# GH 24804
645632
s = Series(["a.b", ".", "b", np.nan, ""], dtype=any_string_dtype)
646633

647-
if regex is None:
648-
msg = re.escape(
649-
"The default value of regex will change from True to False in a future "
650-
"version. In addition, single character regular expressions will *not* "
651-
"be treated as literal strings when regex=True."
652-
)
653-
with tm.assert_produces_warning(
654-
FutureWarning,
655-
match=msg,
656-
):
657-
result = s.str.replace(".", "a", regex=regex)
634+
result = s.str.replace(".", "a", regex=regex)
635+
if regex:
636+
expected = Series(["aaa", "a", "a", np.nan, ""], dtype=any_string_dtype)
658637
else:
659-
result = s.str.replace(".", "a", regex=regex)
660-
661-
expected = Series(["aab", "a", "b", np.nan, ""], dtype=any_string_dtype)
638+
expected = Series(["aab", "a", "b", np.nan, ""], dtype=any_string_dtype)
662639
tm.assert_series_equal(result, expected)
663640

664641

0 commit comments

Comments
 (0)