From 075fe672ea06035d62e799e57e1a60f0e4dcebeb Mon Sep 17 00:00:00 2001 From: Nitish Satyavolu Date: Thu, 9 Jan 2025 22:20:29 -0800 Subject: [PATCH 1/2] BUG: DataFrame/Series regex replace fix for all NA values --- pandas/core/array_algos/replace.py | 3 ++- pandas/tests/frame/methods/test_replace.py | 7 +++++++ pandas/tests/series/methods/test_replace.py | 7 +++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/pandas/core/array_algos/replace.py b/pandas/core/array_algos/replace.py index a9ad66b7cb2e5..debd6368e98a4 100644 --- a/pandas/core/array_algos/replace.py +++ b/pandas/core/array_algos/replace.py @@ -89,7 +89,8 @@ def _check_comparison_types( op = np.vectorize( lambda x: bool(re.search(b, x)) if isinstance(x, str) and isinstance(b, (str, Pattern)) - else False + else False, + otypes=[bool], ) # GH#32621 use mask to avoid comparing to NAs diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index b2320798ea9a2..c95806d9316bf 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -713,6 +713,13 @@ def test_replace_with_None_keeps_categorical(self): ) tm.assert_frame_equal(result, expected) + def test_replace_all_NA(self): + # GH#60688 + df = DataFrame({"ticker": ["#1234#"], "name": [None]}) + result = df.replace({col: {r"^#": "$"} for col in df.columns}, regex=True) + expected = DataFrame({"ticker": ["$1234#"], "name": [None]}) + tm.assert_frame_equal(result, expected) + def test_replace_value_is_none(self, datetime_frame): orig_value = datetime_frame.iloc[0, 0] orig2 = datetime_frame.iloc[1, 0] diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index ecfe3d1b39d31..abd5d075ea3d5 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -708,3 +708,10 @@ def test_replace_ea_float_with_bool(self): expected = ser.copy() result = ser.replace(0.0, True) tm.assert_series_equal(result, expected) + + def test_replace_all_NA(self): + # GH#60688 + df = pd.Series([pd.NA, pd.NA]) + result = df.replace({r"^#": "$"}, regex=True) + expected = pd.Series([pd.NA, pd.NA]) + tm.assert_series_equal(result, expected) From 5c1210edc4dc9f59bea314036c2c7eecc399ea43 Mon Sep 17 00:00:00 2001 From: Nitish Satyavolu Date: Thu, 9 Jan 2025 22:23:22 -0800 Subject: [PATCH 2/2] Add entry to whatsnew --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 94c289ef3ace7..5cc258a54fa48 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -798,6 +798,7 @@ Other - Bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`) - Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`) - Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`) +- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` throwing ``ValueError`` when ``regex=True`` and all NA values. (:issue:`60688`) - Bug in :meth:`Series.to_string` when series contains complex floats with exponents (:issue:`60405`) - Bug in :meth:`read_csv` where chained fsspec TAR file and ``compression="infer"`` fails with ``tarfile.ReadError`` (:issue:`60028`) - Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`)