diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 2cfab63f8a63f..96b3355872dc6 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -651,6 +651,7 @@ def _replace_regex( value, inplace: bool = False, mask=None, + using_cow: bool = False, ) -> list[Block]: """ Replace elements by the given value. @@ -665,6 +666,8 @@ def _replace_regex( Perform inplace modification. mask : array-like of bool, optional True indicate corresponding element is ignored. + using_cow: bool, default False + Specifying if copy on write is enabled. Returns ------- @@ -673,15 +676,27 @@ def _replace_regex( if not self._can_hold_element(to_replace): # i.e. only ObjectBlock, but could in principle include a # String ExtensionBlock + if using_cow: + return [self.copy(deep=False)] return [self] if inplace else [self.copy()] rx = re.compile(to_replace) - new_values = self.values if inplace else self.values.copy() + if using_cow: + if inplace and not self.refs.has_reference(): + refs = self.refs + new_values = self.values + else: + refs = None + new_values = self.values.copy() + else: + refs = None + new_values = self.values if inplace else self.values.copy() + replace_regex(new_values, rx, value, mask) - block = self.make_block(new_values) - return block.convert(copy=False) + block = self.make_block(new_values, refs=refs) + return block.convert(copy=False, using_cow=using_cow) @final def replace_list( diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 1a18bb6519f77..0d6478aeb47bf 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -467,7 +467,7 @@ def replace(self: T, to_replace, value, inplace: bool) -> T: ) def replace_regex(self, **kwargs): - return self.apply("_replace_regex", **kwargs) + return self.apply("_replace_regex", **kwargs, using_cow=using_copy_on_write()) def replace_list( self: T, diff --git a/pandas/tests/copy_view/test_replace.py b/pandas/tests/copy_view/test_replace.py index c29e19a6e832f..c0b5498b839f5 100644 --- a/pandas/tests/copy_view/test_replace.py +++ b/pandas/tests/copy_view/test_replace.py @@ -47,6 +47,51 @@ def test_replace(using_copy_on_write, replace_kwargs): tm.assert_frame_equal(df, df_orig) +def test_replace_regex_inplace_refs(using_copy_on_write): + df = DataFrame({"a": ["aaa", "bbb"]}) + df_orig = df.copy() + view = df[:] + arr = get_array(df, "a") + df.replace(to_replace=r"^a.*$", value="new", inplace=True, regex=True) + if using_copy_on_write: + assert not np.shares_memory(arr, get_array(df, "a")) + assert df._mgr._has_no_reference(0) + tm.assert_frame_equal(view, df_orig) + else: + assert np.shares_memory(arr, get_array(df, "a")) + + +def test_replace_regex_inplace(using_copy_on_write): + df = DataFrame({"a": ["aaa", "bbb"]}) + arr = get_array(df, "a") + df.replace(to_replace=r"^a.*$", value="new", inplace=True, regex=True) + if using_copy_on_write: + assert df._mgr._has_no_reference(0) + assert np.shares_memory(arr, get_array(df, "a")) + + df_orig = df.copy() + df2 = df.replace(to_replace=r"^b.*$", value="new", regex=True) + tm.assert_frame_equal(df_orig, df) + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + + +def test_replace_regex_inplace_no_op(using_copy_on_write): + df = DataFrame({"a": [1, 2]}) + arr = get_array(df, "a") + df.replace(to_replace=r"^a.$", value="new", inplace=True, regex=True) + if using_copy_on_write: + assert df._mgr._has_no_reference(0) + assert np.shares_memory(arr, get_array(df, "a")) + + df_orig = df.copy() + df2 = df.replace(to_replace=r"^x.$", value="new", regex=True) + tm.assert_frame_equal(df_orig, df) + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + else: + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + + def test_replace_mask_all_false_second_block(using_copy_on_write): df = DataFrame({"a": [1.5, 2, 3], "b": 100.5, "c": 1, "d": 2}) df_orig = df.copy()