Skip to content

Commit 3c5fb2e

Browse files
Backport PR #51659 on branch 2.0.x (ENH: Improve replace lazy copy for categoricals) (#51699)
Backport PR #51659: ENH: Improve replace lazy copy for categoricals Co-authored-by: Patrick Hoefler <[email protected]>
1 parent c047d9c commit 3c5fb2e

File tree

2 files changed

+33
-2
lines changed

2 files changed

+33
-2
lines changed

pandas/core/internals/blocks.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -701,8 +701,7 @@ def replace_list(
701701
# TODO: avoid special-casing
702702
# GH49404
703703
if using_cow and inplace:
704-
# TODO(CoW): Optimize
705-
blk = self.copy()
704+
blk = self.copy(deep=self.refs.has_reference())
706705
else:
707706
blk = self if inplace else self.copy()
708707
values = cast(Categorical, blk.values)

pandas/tests/copy_view/test_replace.py

+32
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,38 @@ def test_replace_to_replace_wrong_dtype(using_copy_on_write):
112112
assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
113113

114114

115+
def test_replace_list_categorical(using_copy_on_write):
116+
df = DataFrame({"a": ["a", "b", "c"]}, dtype="category")
117+
arr = get_array(df, "a")
118+
df.replace(["c"], value="a", inplace=True)
119+
assert np.shares_memory(arr.codes, get_array(df, "a").codes)
120+
if using_copy_on_write:
121+
assert df._mgr._has_no_reference(0)
122+
123+
df_orig = df.copy()
124+
df2 = df.replace(["b"], value="a")
125+
assert not np.shares_memory(arr.codes, get_array(df2, "a").codes)
126+
127+
tm.assert_frame_equal(df, df_orig)
128+
129+
130+
def test_replace_list_inplace_refs_categorical(using_copy_on_write):
131+
df = DataFrame({"a": ["a", "b", "c"]}, dtype="category")
132+
view = df[:]
133+
df_orig = df.copy()
134+
df.replace(["c"], value="a", inplace=True)
135+
if using_copy_on_write:
136+
assert not np.shares_memory(
137+
get_array(view, "a").codes, get_array(df, "a").codes
138+
)
139+
tm.assert_frame_equal(df_orig, view)
140+
else:
141+
# This could be inplace
142+
assert not np.shares_memory(
143+
get_array(view, "a").codes, get_array(df, "a").codes
144+
)
145+
146+
115147
@pytest.mark.parametrize("to_replace", [1.5, [1.5], []])
116148
def test_replace_inplace(using_copy_on_write, to_replace):
117149
df = DataFrame({"a": [1.5, 2, 3]})

0 commit comments

Comments
 (0)