From c079870a1ccbf9717d32ef2d582264828cd83bc9 Mon Sep 17 00:00:00 2001 From: HyunTruth <1234hjlee@naver.com> Date: Wed, 15 Aug 2018 11:25:01 +0900 Subject: [PATCH 1/9] removed as_indexer(deprecated of 0.21.0) completely from str.match() --- pandas/core/strings.py | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 07e744a6284ef..d8df2302f5dd1 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -709,7 +709,7 @@ def rep(x, r): return result -def str_match(arr, pat, case=True, flags=0, na=np.nan, as_indexer=None): +def str_match(arr, pat, case=True, flags=0, na=np.nan): """ Determine if each string matches a regular expression. @@ -722,8 +722,6 @@ def str_match(arr, pat, case=True, flags=0, na=np.nan, as_indexer=None): flags : int, default 0 (no flags) re module flags, e.g. re.IGNORECASE na : default NaN, fill value for missing values. - as_indexer - .. deprecated:: 0.21.0 Returns ------- @@ -741,17 +739,6 @@ def str_match(arr, pat, case=True, flags=0, na=np.nan, as_indexer=None): regex = re.compile(pat, flags=flags) - if (as_indexer is False) and (regex.groups > 0): - raise ValueError("as_indexer=False with a pattern with groups is no " - "longer supported. Use '.str.extract(pat)' instead") - elif as_indexer is not None: - # Previously, this keyword was used for changing the default but - # deprecated behaviour. This keyword is now no longer needed. - warnings.warn("'as_indexer' keyword was specified but is ignored " - "(match now returns a boolean indexer by default), " - "and will be removed in a future version.", - FutureWarning, stacklevel=3) - dtype = bool f = lambda x: bool(regex.match(x)) From f6612aac96f081f6fa2bd3bb09d717923aeee157 Mon Sep 17 00:00:00 2001 From: HyunTruth <1234hjlee@naver.com> Date: Wed, 15 Aug 2018 11:38:38 +0900 Subject: [PATCH 2/9] changed tests to suit removal of as_indexer from str.match() --- pandas/tests/test_strings.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 9d008dfd25c90..0a67f19aea2cf 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -938,20 +938,24 @@ def test_match(self): exp = Series([True, NA, False]) tm.assert_series_equal(result, exp) - # test passing as_indexer still works but is ignored + # GH 22316 test the removal of as_indexer from match values = Series(['fooBAD__barBAD', NA, 'foo']) exp = Series([True, NA, False]) - with tm.assert_produces_warning(FutureWarning): + with tm.assert_raises_regex(TypeError, + "str_match() got an " + "unexpected keyword " + "argument 'as_indexer'"): result = values.str.match('.*BAD[_]+.*BAD', as_indexer=True) - tm.assert_series_equal(result, exp) - with tm.assert_produces_warning(FutureWarning): + with tm.assert_raises_regex(TypeError, + "str_match() got an " + "unexpected keyword " + "argument 'as_indexer'"): result = values.str.match('.*BAD[_]+.*BAD', as_indexer=False) - tm.assert_series_equal(result, exp) - with tm.assert_produces_warning(FutureWarning): + with tm.assert_raises_regex(TypeError, + "str_match() got an " + "unexpected keyword " + "argument 'as_indexer'"): result = values.str.match('.*(BAD[_]+).*(BAD)', as_indexer=True) - tm.assert_series_equal(result, exp) - pytest.raises(ValueError, values.str.match, '.*(BAD[_]+).*(BAD)', - as_indexer=False) # mixed mixed = Series(['aBAD_BAD', NA, 'BAD_b_BAD', True, datetime.today(), From 72f74b2da4b1b6a48967616775212a9ee6d1d228 Mon Sep 17 00:00:00 2001 From: HyunTruth <1234hjlee@naver.com> Date: Wed, 15 Aug 2018 11:49:38 +0900 Subject: [PATCH 3/9] remove tests for the same points --- pandas/tests/test_strings.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 0a67f19aea2cf..653e621eba886 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -946,16 +946,6 @@ def test_match(self): "unexpected keyword " "argument 'as_indexer'"): result = values.str.match('.*BAD[_]+.*BAD', as_indexer=True) - with tm.assert_raises_regex(TypeError, - "str_match() got an " - "unexpected keyword " - "argument 'as_indexer'"): - result = values.str.match('.*BAD[_]+.*BAD', as_indexer=False) - with tm.assert_raises_regex(TypeError, - "str_match() got an " - "unexpected keyword " - "argument 'as_indexer'"): - result = values.str.match('.*(BAD[_]+).*(BAD)', as_indexer=True) # mixed mixed = Series(['aBAD_BAD', NA, 'BAD_b_BAD', True, datetime.today(), From ce529e9ea3c5692f08cad5e448fb6f2660b29e1a Mon Sep 17 00:00:00 2001 From: HyunTruth <1234hjlee@naver.com> Date: Wed, 15 Aug 2018 14:02:59 +0900 Subject: [PATCH 4/9] Added whatsnew and updated copy decorator(match) to not use as_indexer as well --- doc/source/whatsnew/v0.24.0.txt | 2 +- pandas/core/strings.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index cf12759c051fc..b701efad9f8ba 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -630,7 +630,7 @@ Numeric Strings ^^^^^^^ -- +- Removed as_indexer(deprecated of 0.21.0) keyword completely from str.match() (:issue:`22356`,:issue:`6581`) - - diff --git a/pandas/core/strings.py b/pandas/core/strings.py index d8df2302f5dd1..f80260d8d2779 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -2456,9 +2456,8 @@ def contains(self, pat, case=True, flags=0, na=np.nan, regex=True): return self._wrap_result(result) @copy(str_match) - def match(self, pat, case=True, flags=0, na=np.nan, as_indexer=None): - result = str_match(self._parent, pat, case=case, flags=flags, na=na, - as_indexer=as_indexer) + def match(self, pat, case=True, flags=0, na=np.nan): + result = str_match(self._parent, pat, case=case, flags=flags, na=na) return self._wrap_result(result) @copy(str_replace) From 454612eb692716f510503bc2ba3cab5bc640fc96 Mon Sep 17 00:00:00 2001 From: HyunTruth <1234hjlee@naver.com> Date: Wed, 15 Aug 2018 14:23:25 +0900 Subject: [PATCH 5/9] fixed assertion on the new test --- pandas/tests/test_strings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 653e621eba886..7b42e87204adf 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -942,7 +942,7 @@ def test_match(self): values = Series(['fooBAD__barBAD', NA, 'foo']) exp = Series([True, NA, False]) with tm.assert_raises_regex(TypeError, - "str_match() got an " + "match() got an " "unexpected keyword " "argument 'as_indexer'"): result = values.str.match('.*BAD[_]+.*BAD', as_indexer=True) From db7f34edb13c5d4d0410f255d2488f50f0264ce0 Mon Sep 17 00:00:00 2001 From: HyunTruth <1234hjlee@naver.com> Date: Wed, 15 Aug 2018 14:45:43 +0900 Subject: [PATCH 6/9] changed regex of the test to the output from tests --- pandas/tests/test_strings.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 7b42e87204adf..dd33ad2353bab 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -942,9 +942,8 @@ def test_match(self): values = Series(['fooBAD__barBAD', NA, 'foo']) exp = Series([True, NA, False]) with tm.assert_raises_regex(TypeError, - "match() got an " - "unexpected keyword " - "argument 'as_indexer'"): + "match() got an unexpected " + "keyword argument 'as_indexer'"): result = values.str.match('.*BAD[_]+.*BAD', as_indexer=True) # mixed From 07cff36f67944e2a6479ba0ef1625384f3367dc7 Mon Sep 17 00:00:00 2001 From: HyunTruth <1234hjlee@naver.com> Date: Wed, 15 Aug 2018 15:05:30 +0900 Subject: [PATCH 7/9] assertion regex to exclude the func name --- pandas/tests/test_strings.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index dd33ad2353bab..12dbd17d1b89d 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -942,8 +942,9 @@ def test_match(self): values = Series(['fooBAD__barBAD', NA, 'foo']) exp = Series([True, NA, False]) with tm.assert_raises_regex(TypeError, - "match() got an unexpected " - "keyword argument 'as_indexer'"): + "got an unexpected " + "keyword argument " + "'as_indexer'"): result = values.str.match('.*BAD[_]+.*BAD', as_indexer=True) # mixed From 23604e940c72954106d8e148c64fa0aee67a7a2b Mon Sep 17 00:00:00 2001 From: HyunTruth <1234hjlee@naver.com> Date: Wed, 15 Aug 2018 15:08:13 +0900 Subject: [PATCH 8/9] applied suggested changes to whatsnew --- doc/source/whatsnew/v0.24.0.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index b701efad9f8ba..321561b4df6b4 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -518,7 +518,7 @@ Removal of prior version deprecations/changes - The ``LongPanel`` and ``WidePanel`` classes have been removed (:issue:`10892`) - Several private functions were removed from the (non-public) module ``pandas.core.common`` (:issue:`22001`) - Removal of the previously deprecated module ``pandas.core.datetools`` (:issue:`14105`, :issue:`14094`) -- +- Removal of the previously deprecated as_indexer keyword completely from ``str.match()`` (:issue:`22356`,:issue:`6581`) .. _whatsnew_0240.performance: @@ -630,7 +630,7 @@ Numeric Strings ^^^^^^^ -- Removed as_indexer(deprecated of 0.21.0) keyword completely from str.match() (:issue:`22356`,:issue:`6581`) +- - - From b109d199d6ebecd00d916949e738ca9845afaad5 Mon Sep 17 00:00:00 2001 From: HyunTruth <1234hjlee@naver.com> Date: Wed, 15 Aug 2018 16:03:36 +0900 Subject: [PATCH 9/9] deleted the corresponding tests in `test_match` for `as_indexer` --- pandas/tests/test_strings.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 12dbd17d1b89d..446df5d2e7d11 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -938,15 +938,6 @@ def test_match(self): exp = Series([True, NA, False]) tm.assert_series_equal(result, exp) - # GH 22316 test the removal of as_indexer from match - values = Series(['fooBAD__barBAD', NA, 'foo']) - exp = Series([True, NA, False]) - with tm.assert_raises_regex(TypeError, - "got an unexpected " - "keyword argument " - "'as_indexer'"): - result = values.str.match('.*BAD[_]+.*BAD', as_indexer=True) - # mixed mixed = Series(['aBAD_BAD', NA, 'BAD_b_BAD', True, datetime.today(), 'foo', None, 1, 2.])