From 7d5e4f825adf3126412c2202edab6cbb6aee10d6 Mon Sep 17 00:00:00 2001 From: prakhar987 Date: Sat, 8 Feb 2020 15:24:34 +0530 Subject: [PATCH 1/4] BUG: Series.str.repeat can handle pd.NA for vectored inputs (#31632) --- doc/source/whatsnew/v1.0.2.rst | 4 ++++ pandas/core/strings.py | 2 ++ pandas/tests/test_strings.py | 6 ++++++ 3 files changed, 12 insertions(+) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 70aaaa6d0a60d..ac0a3ae42f6db 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -29,6 +29,10 @@ Bug fixes - Using ``pd.NA`` with :meth:`DataFrame.to_json` now correctly outputs a null value instead of an empty object (:issue:`31615`) +**Strings** + +- Using ``pd.NA`` with :meth:`Series.str.repeat` now correctly outputs a null value instead of raising error for vector inputs (:issue:`31632`) + .. --------------------------------------------------------------------------- .. _whatsnew_102.contributors: diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 18c7504f2c2f8..3dc964f6c3949 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -778,6 +778,8 @@ def scalar_rep(x): else: def rep(x, r): + if isinstance(x, libmissing.NAType): + return x try: return bytes.__mul__(x, r) except TypeError: diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 62d26dacde67b..0bc10ac3e7823 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -1153,6 +1153,12 @@ def test_repeat(self): assert isinstance(rs, Series) tm.assert_series_equal(rs, xp) + # GH: 31632 + values = Series(["a", None]) + result = values.str.repeat([3, 4]) + exp = Series(["aaa", None]) + tm.assert_series_equal(result, exp) + def test_match(self): # New match behavior introduced in 0.13 values = Series(["fooBAD__barBAD", np.nan, "foo"]) From 5c59289f1eb5d4fd9c8408a8c39ecaf7a0eebcb6 Mon Sep 17 00:00:00 2001 From: prakhar pandey Date: Mon, 10 Feb 2020 09:30:19 +0530 Subject: [PATCH 2/4] Moved test to seperate function --- pandas/tests/test_strings.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 0bc10ac3e7823..56861e3ec5e54 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -1153,12 +1153,18 @@ def test_repeat(self): assert isinstance(rs, Series) tm.assert_series_equal(rs, xp) + def test_repeat_with_null(self): # GH: 31632 values = Series(["a", None]) result = values.str.repeat([3, 4]) exp = Series(["aaa", None]) tm.assert_series_equal(result, exp) + values = Series(["a", "b"]) + result = values.str.repeat([3, None]) + exp = Series(["aaa", None]) + tm.assert_series_equal(result, exp) + def test_match(self): # New match behavior introduced in 0.13 values = Series(["fooBAD__barBAD", np.nan, "foo"]) From 11d9670f0ca55804027c443b45e26e4e8b47ab06 Mon Sep 17 00:00:00 2001 From: prakhar pandey Date: Tue, 11 Feb 2020 09:54:21 +0530 Subject: [PATCH 3/4] Used is instead of isinstance and added dtype to series in tests --- pandas/core/strings.py | 2 +- pandas/tests/test_strings.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 3dc964f6c3949..9ef066d55689f 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -778,7 +778,7 @@ def scalar_rep(x): else: def rep(x, r): - if isinstance(x, libmissing.NAType): + if x is libmissing.NA: return x try: return bytes.__mul__(x, r) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 56861e3ec5e54..ef217d2cd7aa4 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -1155,14 +1155,14 @@ def test_repeat(self): def test_repeat_with_null(self): # GH: 31632 - values = Series(["a", None]) + values = Series(["a", None], dtype='string') result = values.str.repeat([3, 4]) - exp = Series(["aaa", None]) + exp = Series(["aaa", None], dtype='string') tm.assert_series_equal(result, exp) - values = Series(["a", "b"]) + values = Series(["a", "b"], dtype='string') result = values.str.repeat([3, None]) - exp = Series(["aaa", None]) + exp = Series(["aaa", None], dtype='string') tm.assert_series_equal(result, exp) def test_match(self): From 3e50a2c0197ba38268c9fc95937742490eddc379 Mon Sep 17 00:00:00 2001 From: prakhar pandey Date: Tue, 11 Feb 2020 09:55:31 +0530 Subject: [PATCH 4/4] Style fix --- pandas/tests/test_strings.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index ef217d2cd7aa4..faa7b78b07301 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -1155,14 +1155,14 @@ def test_repeat(self): def test_repeat_with_null(self): # GH: 31632 - values = Series(["a", None], dtype='string') + values = Series(["a", None], dtype="string") result = values.str.repeat([3, 4]) - exp = Series(["aaa", None], dtype='string') + exp = Series(["aaa", None], dtype="string") tm.assert_series_equal(result, exp) - values = Series(["a", "b"], dtype='string') + values = Series(["a", "b"], dtype="string") result = values.str.repeat([3, None]) - exp = Series(["aaa", None], dtype='string') + exp = Series(["aaa", None], dtype="string") tm.assert_series_equal(result, exp) def test_match(self):