From e9a418ddef1021d2f9032bdf019bf41f2d174a32 Mon Sep 17 00:00:00 2001 From: Asish Mahapatra Date: Wed, 9 Sep 2020 12:22:45 -0400 Subject: [PATCH 1/8] add breaking test --- pandas/tests/test_strings.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index d9396d70f9112..0ea4b695bcae6 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -838,8 +838,12 @@ def test_contains_for_object_category(self): expected = Series([True, False, False, True, False]) tm.assert_series_equal(result, expected) - def test_startswith(self): - values = Series(["om", np.nan, "foo_nom", "nom", "bar_foo", np.nan, "foo"]) + # add category dtype parametrizations for GH-36241 + @pytest.mark.parametrize("dtype", [None, "category"]) + def test_startswith(self, dtype): + values = Series( + ["om", np.nan, "foo_nom", "nom", "bar_foo", np.nan, "foo"], dtype=dtype + ) result = values.str.startswith("foo") exp = Series([False, np.nan, True, False, False, np.nan, True]) @@ -867,8 +871,12 @@ def test_startswith(self): ) tm.assert_series_equal(rs, xp) - def test_endswith(self): - values = Series(["om", np.nan, "foo_nom", "nom", "bar_foo", np.nan, "foo"]) + # add category dtype parametrizations for GH-36241 + @pytest.mark.parametrize("dtype", [None, "category"]) + def test_endswith(self, dtype): + values = Series( + ["om", np.nan, "foo_nom", "nom", "bar_foo", np.nan, "foo"], dtype=dtype + ) result = values.str.endswith("foo") exp = Series([False, np.nan, False, False, True, np.nan, True]) From 1a5cf4f30b2481f90dd9e2e7e6050b6d35cc9fed Mon Sep 17 00:00:00 2001 From: Asish Mahapatra Date: Wed, 9 Sep 2020 12:23:26 -0400 Subject: [PATCH 2/8] propagate na to _wrap_result --- pandas/core/strings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 6702bf519c52e..4decd86764ccc 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -2050,7 +2050,7 @@ def wrapper2(self, pat, flags=0, **kwargs): @forbid_nonstring_types(forbidden_types, name=name) def wrapper3(self, pat, na=np.nan): result = f(self._parent, pat, na=na) - return self._wrap_result(result, returns_string=returns_string) + return self._wrap_result(result, returns_string=returns_string, fill_value=na) wrapper = wrapper3 if na else wrapper2 if flags else wrapper1 From 2f90e6b3f0729d4a5cda6362b7e641e4e19a0b16 Mon Sep 17 00:00:00 2001 From: Asish Mahapatra Date: Fri, 11 Sep 2020 15:28:07 -0400 Subject: [PATCH 3/8] add string, fill value and null value parametrize --- pandas/tests/test_strings.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 0ea4b695bcae6..7ca3f6c9e58ec 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -838,19 +838,21 @@ def test_contains_for_object_category(self): expected = Series([True, False, False, True, False]) tm.assert_series_equal(result, expected) - # add category dtype parametrizations for GH-36241 - @pytest.mark.parametrize("dtype", [None, "category"]) - def test_startswith(self, dtype): + @pytest.mark.parametrize("dtype", [None, "category", "string"]) + @pytest.mark.parametrize("null_value", [None, np.nan, pd.NA]) + @pytest.mark.parametrize("na", [True, False]) + def test_startswith(self, dtype, null_value, na): + # add category dtype parametrizations for GH-36241 values = Series( - ["om", np.nan, "foo_nom", "nom", "bar_foo", np.nan, "foo"], dtype=dtype + ["om", null_value, "foo_nom", "nom", "bar_foo", null_value, "foo"], dtype=dtype ) result = values.str.startswith("foo") exp = Series([False, np.nan, True, False, False, np.nan, True]) tm.assert_series_equal(result, exp) - result = values.str.startswith("foo", na=True) - tm.assert_series_equal(result, exp.fillna(True).astype(bool)) + result = values.str.startswith("foo", na=na) + tm.assert_series_equal(result, exp.fillna(na).astype(bool)) # mixed mixed = np.array( @@ -871,19 +873,21 @@ def test_startswith(self, dtype): ) tm.assert_series_equal(rs, xp) - # add category dtype parametrizations for GH-36241 - @pytest.mark.parametrize("dtype", [None, "category"]) - def test_endswith(self, dtype): + @pytest.mark.parametrize("dtype", [None, "category", "string"]) + @pytest.mark.parametrize("null_value", [None, np.nan, pd.NA]) + @pytest.mark.parametrize("na", [True, False]) + def test_endswith(self, dtype, null_value, na): + # add category dtype parametrizations for GH-36241 values = Series( - ["om", np.nan, "foo_nom", "nom", "bar_foo", np.nan, "foo"], dtype=dtype + ["om", null_value, "foo_nom", "nom", "bar_foo", null_value, "foo"], dtype=dtype ) result = values.str.endswith("foo") exp = Series([False, np.nan, False, False, True, np.nan, True]) tm.assert_series_equal(result, exp) - result = values.str.endswith("foo", na=False) - tm.assert_series_equal(result, exp.fillna(False).astype(bool)) + result = values.str.endswith("foo", na=na) + tm.assert_series_equal(result, exp.fillna(na).astype(bool)) # mixed mixed = np.array( @@ -3536,6 +3540,7 @@ def test_casefold(self): def test_string_array(any_string_method): + print(any_string_method) method_name, args, kwargs = any_string_method if method_name == "decode": pytest.skip("decode requires bytes.") From 54e486e5d21e8b0a386cbb9be4f35e96d6ce95c5 Mon Sep 17 00:00:00 2001 From: Asish Mahapatra Date: Fri, 11 Sep 2020 16:10:29 -0400 Subject: [PATCH 4/8] add na=True/False param to string tests; parametrize over na and null values --- pandas/tests/test_strings.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 7ca3f6c9e58ec..f6b02d64271b0 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -29,6 +29,8 @@ def assert_series_or_index_equal(left, right): ("decode", ("UTF-8",), {}), ("encode", ("UTF-8",), {}), ("endswith", ("a",), {}), + ("endswith", ("a",), {"na": True}), + ("endswith", ("a",), {"na": False}), ("extract", ("([a-z]*)",), {"expand": False}), ("extract", ("([a-z]*)",), {"expand": True}), ("extractall", ("([a-z]*)",), {}), @@ -58,6 +60,8 @@ def assert_series_or_index_equal(left, right): ("split", (" ",), {"expand": False}), ("split", (" ",), {"expand": True}), ("startswith", ("a",), {}), + ("startswith", ("a",), {"na": True}), + ("startswith", ("a",), {"na": False}), # translating unicode points of "a" to "d" ("translate", ({97: 100},), {}), ("wrap", (2,), {}), @@ -838,7 +842,7 @@ def test_contains_for_object_category(self): expected = Series([True, False, False, True, False]) tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("dtype", [None, "category", "string"]) + @pytest.mark.parametrize("dtype", [None, "category"]) @pytest.mark.parametrize("null_value", [None, np.nan, pd.NA]) @pytest.mark.parametrize("na", [True, False]) def test_startswith(self, dtype, null_value, na): @@ -873,7 +877,7 @@ def test_startswith(self, dtype, null_value, na): ) tm.assert_series_equal(rs, xp) - @pytest.mark.parametrize("dtype", [None, "category", "string"]) + @pytest.mark.parametrize("dtype", [None, "category"]) @pytest.mark.parametrize("null_value", [None, np.nan, pd.NA]) @pytest.mark.parametrize("na", [True, False]) def test_endswith(self, dtype, null_value, na): @@ -3540,7 +3544,6 @@ def test_casefold(self): def test_string_array(any_string_method): - print(any_string_method) method_name, args, kwargs = any_string_method if method_name == "decode": pytest.skip("decode requires bytes.") @@ -3565,6 +3568,10 @@ def test_string_array(any_string_method): assert result.dtype == "boolean" result = result.astype(object) + elif expected.dtype == "bool": + assert result.dtype == "boolean" + result = result.astype("bool") + elif expected.dtype == "float" and expected.isna().any(): assert result.dtype == "Int64" result = result.astype("float") From 44d386e3977c0b17b042f164b499e455b789ce37 Mon Sep 17 00:00:00 2001 From: Asish Mahapatra Date: Fri, 11 Sep 2020 16:13:30 -0400 Subject: [PATCH 5/8] add whatsnew note --- doc/source/whatsnew/v1.1.3.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index e3161012da5d1..bf873d3ff3157 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -22,7 +22,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- +- Bug in :meth:`str.startswith` and :meth:`str.endswith` for Series with ``category`` dtype not propagating ``na`` parameter (:issue:`36241`) .. --------------------------------------------------------------------------- From 5e3be5a2a6ab999ff51136dc52808621d5242b12 Mon Sep 17 00:00:00 2001 From: Asish Mahapatra Date: Fri, 11 Sep 2020 16:14:18 -0400 Subject: [PATCH 6/8] black --- pandas/tests/test_strings.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index f6b02d64271b0..cd946d18f1cdf 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -848,7 +848,8 @@ def test_contains_for_object_category(self): def test_startswith(self, dtype, null_value, na): # add category dtype parametrizations for GH-36241 values = Series( - ["om", null_value, "foo_nom", "nom", "bar_foo", null_value, "foo"], dtype=dtype + ["om", null_value, "foo_nom", "nom", "bar_foo", null_value, "foo"], + dtype=dtype, ) result = values.str.startswith("foo") @@ -883,7 +884,8 @@ def test_startswith(self, dtype, null_value, na): def test_endswith(self, dtype, null_value, na): # add category dtype parametrizations for GH-36241 values = Series( - ["om", null_value, "foo_nom", "nom", "bar_foo", null_value, "foo"], dtype=dtype + ["om", null_value, "foo_nom", "nom", "bar_foo", null_value, "foo"], + dtype=dtype, ) result = values.str.endswith("foo") From d8e5bb7ce0b9d594cb655da6fea964b2dee3ad91 Mon Sep 17 00:00:00 2001 From: Asish Mahapatra Date: Fri, 11 Sep 2020 16:36:13 -0400 Subject: [PATCH 7/8] Update doc/source/whatsnew/v1.1.3.rst Co-authored-by: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> --- doc/source/whatsnew/v1.1.3.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index bf873d3ff3157..c06990e3f2051 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -22,7 +22,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- Bug in :meth:`str.startswith` and :meth:`str.endswith` for Series with ``category`` dtype not propagating ``na`` parameter (:issue:`36241`) +- Bug in :meth:`Series.str.startswith` and :meth:`Series.str.endswith` with ``category`` dtype not propagating ``na`` parameter (:issue:`36241`) .. --------------------------------------------------------------------------- From 1467a0a7980fb38aec6d1905869d0dcd0f1364cb Mon Sep 17 00:00:00 2001 From: Asish Mahapatra Date: Fri, 11 Sep 2020 20:07:25 -0400 Subject: [PATCH 8/8] make expected series explicit --- pandas/tests/test_strings.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index cd946d18f1cdf..c792a48d3ef08 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -857,7 +857,8 @@ def test_startswith(self, dtype, null_value, na): tm.assert_series_equal(result, exp) result = values.str.startswith("foo", na=na) - tm.assert_series_equal(result, exp.fillna(na).astype(bool)) + exp = Series([False, na, True, False, False, na, True]) + tm.assert_series_equal(result, exp) # mixed mixed = np.array( @@ -893,7 +894,8 @@ def test_endswith(self, dtype, null_value, na): tm.assert_series_equal(result, exp) result = values.str.endswith("foo", na=na) - tm.assert_series_equal(result, exp.fillna(na).astype(bool)) + exp = Series([False, na, False, False, True, na, True]) + tm.assert_series_equal(result, exp) # mixed mixed = np.array(