From c9845820ed77d1ef3437291efac17665ee47471f Mon Sep 17 00:00:00 2001 From: Allan Larangeiras Date: Wed, 16 Jan 2019 16:34:11 -0200 Subject: [PATCH 1/9] string replace pattern size fix --- pandas/core/strings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index ca79dcd9408d8..c79afa32ed223 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -564,7 +564,7 @@ def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True): # add case flag, if provided if case is False: flags |= re.IGNORECASE - if is_compiled_re or len(pat) > 1 or flags or callable(repl): + if is_compiled_re or len(pat) > 0 or flags or callable(repl): n = n if n >= 0 else 0 compiled = re.compile(pat, flags=flags) f = lambda x: compiled.sub(repl=repl, string=x, count=n) From 67b0870a7a0a9c82ee41ea6d69571155864deb95 Mon Sep 17 00:00:00 2001 From: Allan Larangeira Date: Wed, 16 Jan 2019 19:39:32 -0200 Subject: [PATCH 2/9] adding test case to replace pattern problem --- pandas/tests/test_strings.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 7cea3be03d1a7..6a4283bed7402 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -1008,6 +1008,13 @@ def test_replace(self): values = klass(data) pytest.raises(TypeError, values.str.replace, 'a', repl) + def test_replace_single_pattern(self): + values = Series(['abc','123']) + + result = values.str.replace('.', 'foo') + exp = Series(['foofoofoo', 'foofoofoo']) + tm.assert_series_equal(result, exp) + def test_replace_callable(self): # GH 15055 values = Series(['fooBAD__barBAD', NA]) From 340ae8963f84622cbf6e04d88849c8a2239a00e6 Mon Sep 17 00:00:00 2001 From: Allan Larangeira Date: Wed, 16 Jan 2019 21:02:59 -0200 Subject: [PATCH 3/9] - adding comments and refactoring - fixing test_pipe_failures (it's not a regex test, it's a char test) --- pandas/tests/test_strings.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 6a4283bed7402..a068d5ae6301c 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -1008,12 +1008,13 @@ def test_replace(self): values = klass(data) pytest.raises(TypeError, values.str.replace, 'a', repl) + # GH 24804 def test_replace_single_pattern(self): - values = Series(['abc','123']) + values = Series(['abc', '123']) result = values.str.replace('.', 'foo') - exp = Series(['foofoofoo', 'foofoofoo']) - tm.assert_series_equal(result, exp) + chars_replaced_expected = Series(['foofoofoo', 'foofoofoo']) + tm.assert_series_equal(result, chars_replaced_expected) def test_replace_callable(self): # GH 15055 @@ -2931,7 +2932,7 @@ def test_pipe_failures(self): tm.assert_series_equal(result, exp) - result = s.str.replace('|', ' ') + result = s.str.replace('|', ' ', regex=False) exp = Series(['A B C']) tm.assert_series_equal(result, exp) From 5a4e1319c3ad04703fd8faf4195db2f9ba2e3d24 Mon Sep 17 00:00:00 2001 From: Allan Larangeira Date: Thu, 17 Jan 2019 05:07:51 -0200 Subject: [PATCH 4/9] removing len check from replace pattern --- pandas/core/strings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index c79afa32ed223..7a9bbe7dcaf89 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -564,7 +564,7 @@ def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True): # add case flag, if provided if case is False: flags |= re.IGNORECASE - if is_compiled_re or len(pat) > 0 or flags or callable(repl): + if is_compiled_re or pat or flags or callable(repl): n = n if n >= 0 else 0 compiled = re.compile(pat, flags=flags) f = lambda x: compiled.sub(repl=repl, string=x, count=n) From cf8dc7905c14242038749d1227b5ae43d5884884 Mon Sep 17 00:00:00 2001 From: Allan Larangeira Date: Thu, 17 Jan 2019 05:52:08 -0200 Subject: [PATCH 5/9] - refactoring test case variable name - adding whatsnew entry and a note for API breaking change --- doc/source/whatsnew/v0.24.0.rst | 4 ++++ pandas/tests/test_strings.py | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 3685a24d60e74..a51f22cc3c515 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -795,6 +795,9 @@ Now, the return type is consistently a :class:`DataFrame`. and a :class:`DataFrame` with sparse values. The memory usage will be the same as in the previous version of pandas. + Be sure to perform a replace of literal strings by passing the + regex=False parameter to func:`str.replace`. + .. _whatsnew_0240.api_breaking.frame_to_dict_index_orient: Raise ValueError in ``DataFrame.to_dict(orient='index')`` @@ -1645,6 +1648,7 @@ Strings - Bug in :meth:`Index.str.split` was not nan-safe (:issue:`23677`). - Bug :func:`Series.str.contains` not respecting the ``na`` argument for a ``Categorical`` dtype ``Series`` (:issue:`22158`) - Bug in :meth:`Index.str.cat` when the result contained only ``NaN`` (:issue:`24044`) +- Bug in :func:`Series.str.replace` not applying regex in patterns of len size = 1 (:issue:`24809`) Interval ^^^^^^^^ diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index a068d5ae6301c..d222fcb05be9f 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -1013,8 +1013,8 @@ def test_replace_single_pattern(self): values = Series(['abc', '123']) result = values.str.replace('.', 'foo') - chars_replaced_expected = Series(['foofoofoo', 'foofoofoo']) - tm.assert_series_equal(result, chars_replaced_expected) + expected = Series(['foofoofoo', 'foofoofoo']) + tm.assert_series_equal(result, expected) def test_replace_callable(self): # GH 15055 From 924ecc87cadd43bd074669ca3aeef492d41d11f1 Mon Sep 17 00:00:00 2001 From: Allan Larangeira Date: Thu, 17 Jan 2019 07:23:06 -0200 Subject: [PATCH 6/9] removing whitespace in the end of the line --- doc/source/whatsnew/v0.24.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index a51f22cc3c515..92a399734a2e2 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -795,7 +795,7 @@ Now, the return type is consistently a :class:`DataFrame`. and a :class:`DataFrame` with sparse values. The memory usage will be the same as in the previous version of pandas. - Be sure to perform a replace of literal strings by passing the + Be sure to perform a replace of literal strings by passing the regex=False parameter to func:`str.replace`. .. _whatsnew_0240.api_breaking.frame_to_dict_index_orient: From 97bf73a283cee04d70564924e3e6d417ace86c4d Mon Sep 17 00:00:00 2001 From: Allan Larangeiras Date: Thu, 17 Jan 2019 09:45:40 -0200 Subject: [PATCH 7/9] - changing the position of the API breaking note - adding before and after example --- doc/source/whatsnew/v0.24.0.rst | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 92a399734a2e2..c25f7c02ce9a3 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -432,6 +432,30 @@ Backwards incompatible API changes Pandas 0.24.0 includes a number of API breaking changes. +Replacing strings using Pattern +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Be sure to perform a replace of literal strings by passing the +regex=False parameter to func:`str.replace`. Mainly when the +pattern is 1 size string (:issue:`24809`) + +Before: + +.. ipython:: python + + s = pd.Series(['A|B|C']) + + result = s.str.replace('|', ' ') + result + +After: + +.. ipython:: python + + s = pd.Series(['A|B|C']) + + result = s.str.replace('|', ' ', regex=False) + result .. _whatsnew_0240.api_breaking.deps: @@ -795,9 +819,6 @@ Now, the return type is consistently a :class:`DataFrame`. and a :class:`DataFrame` with sparse values. The memory usage will be the same as in the previous version of pandas. - Be sure to perform a replace of literal strings by passing the - regex=False parameter to func:`str.replace`. - .. _whatsnew_0240.api_breaking.frame_to_dict_index_orient: Raise ValueError in ``DataFrame.to_dict(orient='index')`` From da16172c37230cce8a520081a25932452db10e72 Mon Sep 17 00:00:00 2001 From: Allan Larangeiras Date: Thu, 17 Jan 2019 10:31:00 -0200 Subject: [PATCH 8/9] removing whitespace from the documentation --- doc/source/whatsnew/v0.24.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index c25f7c02ce9a3..d4ff329e954e7 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -436,7 +436,7 @@ Replacing strings using Pattern ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Be sure to perform a replace of literal strings by passing the -regex=False parameter to func:`str.replace`. Mainly when the +regex=False parameter to func:`str.replace`. Mainly when the pattern is 1 size string (:issue:`24809`) Before: From 93a0715e36a4e292fcf9389d581646d5ffab5ec1 Mon Sep 17 00:00:00 2001 From: Allan Larangeiras Date: Thu, 17 Jan 2019 15:34:23 -0200 Subject: [PATCH 9/9] - making the changes requested by the project members --- doc/source/whatsnew/v0.24.0.rst | 2 +- pandas/core/strings.py | 7 +++++-- pandas/tests/test_strings.py | 9 ++++++++- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index d4ff329e954e7..463142715e311 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1669,7 +1669,7 @@ Strings - Bug in :meth:`Index.str.split` was not nan-safe (:issue:`23677`). - Bug :func:`Series.str.contains` not respecting the ``na`` argument for a ``Categorical`` dtype ``Series`` (:issue:`22158`) - Bug in :meth:`Index.str.cat` when the result contained only ``NaN`` (:issue:`24044`) -- Bug in :func:`Series.str.replace` not applying regex in patterns of len size = 1 (:issue:`24809`) +- Bug in :func:`Series.str.replace` not applying regex in patterns of length 1 (:issue:`24809`) Interval ^^^^^^^^ diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 7a9bbe7dcaf89..f2586602e5498 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -425,7 +425,7 @@ def str_endswith(arr, pat, na=np.nan): return _na_map(f, arr, na, dtype=bool) -def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True): +def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=None): r""" Replace occurrences of pattern/regex in the Series/Index with some other string. Equivalent to :meth:`str.replace` or @@ -577,6 +577,9 @@ def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True): if callable(repl): raise ValueError("Cannot use a callable replacement when " "regex=False") + if regex==None: + warnings.warn("Warning: Interpreting '%s' as a literal, not a regex... " % pat + + "The default will change in the future.", FutureWarning, stacklevel=3) f = lambda x: x.replace(pat, repl, n) return _na_map(f, arr) @@ -2529,7 +2532,7 @@ def match(self, pat, case=True, flags=0, na=np.nan): return self._wrap_result(result, fill_value=na) @copy(str_replace) - def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True): + def replace(self, pat, repl, n=-1, case=None, flags=0, regex=None): result = str_replace(self._parent, pat, repl, n=n, case=case, flags=flags, regex=regex) return self._wrap_result(result) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index d222fcb05be9f..18acf46eb4c71 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -1012,10 +1012,17 @@ def test_replace(self): def test_replace_single_pattern(self): values = Series(['abc', '123']) - result = values.str.replace('.', 'foo') + result = values.str.replace('.', 'foo', regex=True) expected = Series(['foofoofoo', 'foofoofoo']) tm.assert_series_equal(result, expected) + def test_replace_without_specifying_regex_parameter(self): + values = Series(['a.c']) + + result = values.str.replace('.', 'b') + expected = Series(['abc']) + tm.assert_series_equal(result, expected) + def test_replace_callable(self): # GH 15055 values = Series(['fooBAD__barBAD', NA])