diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 3685a24d60e74..463142715e311 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -432,6 +432,30 @@ Backwards incompatible API changes Pandas 0.24.0 includes a number of API breaking changes. +Replacing strings using Pattern +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Be sure to perform a replace of literal strings by passing the +regex=False parameter to func:`str.replace`. Mainly when the +pattern is 1 size string (:issue:`24809`) + +Before: + +.. ipython:: python + + s = pd.Series(['A|B|C']) + + result = s.str.replace('|', ' ') + result + +After: + +.. ipython:: python + + s = pd.Series(['A|B|C']) + + result = s.str.replace('|', ' ', regex=False) + result .. _whatsnew_0240.api_breaking.deps: @@ -1645,6 +1669,7 @@ Strings - Bug in :meth:`Index.str.split` was not nan-safe (:issue:`23677`). - Bug :func:`Series.str.contains` not respecting the ``na`` argument for a ``Categorical`` dtype ``Series`` (:issue:`22158`) - Bug in :meth:`Index.str.cat` when the result contained only ``NaN`` (:issue:`24044`) +- Bug in :func:`Series.str.replace` not applying regex in patterns of length 1 (:issue:`24809`) Interval ^^^^^^^^ diff --git a/pandas/core/strings.py b/pandas/core/strings.py index ca79dcd9408d8..f2586602e5498 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -425,7 +425,7 @@ def str_endswith(arr, pat, na=np.nan): return _na_map(f, arr, na, dtype=bool) -def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True): +def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=None): r""" Replace occurrences of pattern/regex in the Series/Index with some other string. Equivalent to :meth:`str.replace` or @@ -564,7 +564,7 @@ def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True): # add case flag, if provided if case is False: flags |= re.IGNORECASE - if is_compiled_re or len(pat) > 1 or flags or callable(repl): + if is_compiled_re or pat or flags or callable(repl): n = n if n >= 0 else 0 compiled = re.compile(pat, flags=flags) f = lambda x: compiled.sub(repl=repl, string=x, count=n) @@ -577,6 +577,9 @@ def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True): if callable(repl): raise ValueError("Cannot use a callable replacement when " "regex=False") + if regex==None: + warnings.warn("Warning: Interpreting '%s' as a literal, not a regex... " % pat + + "The default will change in the future.", FutureWarning, stacklevel=3) f = lambda x: x.replace(pat, repl, n) return _na_map(f, arr) @@ -2529,7 +2532,7 @@ def match(self, pat, case=True, flags=0, na=np.nan): return self._wrap_result(result, fill_value=na) @copy(str_replace) - def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True): + def replace(self, pat, repl, n=-1, case=None, flags=0, regex=None): result = str_replace(self._parent, pat, repl, n=n, case=case, flags=flags, regex=regex) return self._wrap_result(result) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 7cea3be03d1a7..18acf46eb4c71 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -1008,6 +1008,21 @@ def test_replace(self): values = klass(data) pytest.raises(TypeError, values.str.replace, 'a', repl) + # GH 24804 + def test_replace_single_pattern(self): + values = Series(['abc', '123']) + + result = values.str.replace('.', 'foo', regex=True) + expected = Series(['foofoofoo', 'foofoofoo']) + tm.assert_series_equal(result, expected) + + def test_replace_without_specifying_regex_parameter(self): + values = Series(['a.c']) + + result = values.str.replace('.', 'b') + expected = Series(['abc']) + tm.assert_series_equal(result, expected) + def test_replace_callable(self): # GH 15055 values = Series(['fooBAD__barBAD', NA]) @@ -2924,7 +2939,7 @@ def test_pipe_failures(self): tm.assert_series_equal(result, exp) - result = s.str.replace('|', ' ') + result = s.str.replace('|', ' ', regex=False) exp = Series(['A B C']) tm.assert_series_equal(result, exp)