From de485b977c0551754b80586870324d6c31a5aeec Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 2 Jul 2023 23:48:28 +0200 Subject: [PATCH 1/2] ENH: Don't fragment manager if convert is no-op --- pandas/core/internals/blocks.py | 9 ++++++++- pandas/tests/frame/methods/test_replace.py | 7 +++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 1d572dbfd5386..5d857acfdc7f9 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -480,7 +480,14 @@ def convert( return [self.copy()] if copy else [self] if self.ndim != 1 and self.shape[0] != 1: - return self.split_and_operate(Block.convert, copy=copy, using_cow=using_cow) + blocks = self.split_and_operate( + Block.convert, copy=copy, using_cow=using_cow + ) + if all(blk.dtype.kind == "O" for blk in blocks): + if using_cow: + return [self.copy(deep=False)] + return [self.copy()] if copy else [self] + return blocks values = self.values if values.ndim == 2: diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 9256df72cdf7b..79e77761f9a8b 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1592,3 +1592,10 @@ def test_replace_categorical_no_replacement(self): result = df.replace(to_replace=[".", "def"], value=["_", None]) tm.assert_frame_equal(result, expected) + + def test_replace_object_splitting(self): + # GH# + df = DataFrame({"a": ["a"], "b": "b"}) + assert len(df._mgr.blocks) == 1 + df.replace(to_replace=r"^\s*$", value="", inplace=True, regex=True) + assert len(df._mgr.blocks) == 1 From c782c9a4986dc326a36edddfd04502d1f550ce70 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 3 Jul 2023 18:19:38 +0200 Subject: [PATCH 2/2] Update --- pandas/core/internals/blocks.py | 1 + pandas/tests/frame/methods/test_replace.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 5d857acfdc7f9..8923faf444953 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -484,6 +484,7 @@ def convert( Block.convert, copy=copy, using_cow=using_cow ) if all(blk.dtype.kind == "O" for blk in blocks): + # Avoid fragmenting the block if convert is a no-op if using_cow: return [self.copy(deep=False)] return [self.copy()] if copy else [self] diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 79e77761f9a8b..1846ac24e9cc5 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1594,7 +1594,7 @@ def test_replace_categorical_no_replacement(self): tm.assert_frame_equal(result, expected) def test_replace_object_splitting(self): - # GH# + # GH#53977 df = DataFrame({"a": ["a"], "b": "b"}) assert len(df._mgr.blocks) == 1 df.replace(to_replace=r"^\s*$", value="", inplace=True, regex=True)