Skip to content

CoW: Add warning for fillna #56288

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Dec 5, 2023
1 change: 1 addition & 0 deletions pandas/core/internals/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ def fillna(self, value, limit: int | None, inplace: bool, downcast) -> Self:
inplace=inplace,
downcast=downcast,
using_cow=using_copy_on_write(),
already_warned=_AlreadyWarned(),
)

@final
Expand Down
21 changes: 20 additions & 1 deletion pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1591,6 +1591,7 @@ def fillna(
inplace: bool = False,
downcast=None,
using_cow: bool = False,
already_warned=None,
) -> list[Block]:
"""
fillna on the block with the value. If we fail, then convert to
Expand Down Expand Up @@ -1626,7 +1627,9 @@ def fillna(
mask[mask.cumsum(self.ndim - 1) > limit] = False

if inplace:
nbs = self.putmask(mask.T, value, using_cow=using_cow)
nbs = self.putmask(
mask.T, value, using_cow=using_cow, already_warned=already_warned
)
else:
# without _downcast, we would break
# test_fillna_dtype_conversion_equiv_replace
Expand Down Expand Up @@ -2208,6 +2211,7 @@ def fillna(
inplace: bool = False,
downcast=None,
using_cow: bool = False,
already_warned=None,
) -> list[Block]:
if isinstance(self.dtype, IntervalDtype):
# Block.fillna handles coercion (test_fillna_interval)
Expand All @@ -2217,6 +2221,7 @@ def fillna(
inplace=inplace,
downcast=downcast,
using_cow=using_cow,
already_warned=already_warned,
)
if using_cow and self._can_hold_na and not self.values._hasna:
refs = self.refs
Expand Down Expand Up @@ -2244,6 +2249,20 @@ def fillna(
DeprecationWarning,
stacklevel=find_stack_level(),
)
else:
if (
not copy
and warn_copy_on_write()
and already_warned is not None
and not already_warned.warned_already
):
Comment on lines +2253 to +2258
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this only be done in case of inplace=True? Or is that already embodied in the not copy?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

copy is only False if inplace is True

if self.refs.has_reference():
warnings.warn(
COW_WARNING_SETITEM_MSG,
FutureWarning,
stacklevel=find_stack_level(),
)
already_warned.warned_already = True

nb = self.make_block_same_class(new_values, refs=refs)
return nb._maybe_downcast([nb], downcast, using_cow=using_cow, caller="fillna")
Expand Down
14 changes: 8 additions & 6 deletions pandas/tests/copy_view/test_interp_fillna.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,14 +229,15 @@ def test_fillna_inplace(using_copy_on_write, downcast):
assert df._mgr._has_no_reference(1)


def test_fillna_inplace_reference(using_copy_on_write):
def test_fillna_inplace_reference(using_copy_on_write, warn_copy_on_write):
df = DataFrame({"a": [1.5, np.nan], "b": 1})
df_orig = df.copy()
arr_a = get_array(df, "a")
arr_b = get_array(df, "b")
view = df[:]

df.fillna(5.5, inplace=True)
with tm.assert_cow_warning(warn_copy_on_write):
df.fillna(5.5, inplace=True)
if using_copy_on_write:
assert not np.shares_memory(get_array(df, "a"), arr_a)
assert np.shares_memory(get_array(df, "b"), arr_b)
Expand All @@ -250,7 +251,7 @@ def test_fillna_inplace_reference(using_copy_on_write):
tm.assert_frame_equal(df, expected)


def test_fillna_interval_inplace_reference(using_copy_on_write):
def test_fillna_interval_inplace_reference(using_copy_on_write, warn_copy_on_write):
# Set dtype explicitly to avoid implicit cast when setting nan
ser = Series(
interval_range(start=0, end=5), name="a", dtype="interval[float64, right]"
Expand All @@ -259,7 +260,8 @@ def test_fillna_interval_inplace_reference(using_copy_on_write):

ser_orig = ser.copy()
view = ser[:]
ser.fillna(value=Interval(left=0, right=5), inplace=True)
with tm.assert_cow_warning(warn_copy_on_write):
ser.fillna(value=Interval(left=0, right=5), inplace=True)

if using_copy_on_write:
assert not np.shares_memory(
Expand Down Expand Up @@ -330,8 +332,8 @@ def test_fillna_inplace_ea_noop_shares_memory(
df = DataFrame({"a": [1, NA, 3], "b": 1}, dtype=any_numeric_ea_and_arrow_dtype)
df_orig = df.copy()
view = df[:]
# TODO(CoW-warn)
df.fillna(100, inplace=True)
with tm.assert_cow_warning(warn_copy_on_write):
df.fillna(100, inplace=True)

if isinstance(df["a"].dtype, ArrowDtype) or using_copy_on_write:
assert not np.shares_memory(get_array(df, "a"), get_array(view, "a"))
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/frame/methods/test_fillna.py
Original file line number Diff line number Diff line change
Expand Up @@ -745,8 +745,7 @@ def test_inplace_dict_update_view(
df = DataFrame({"x": [np.nan, 2], "y": [np.nan, 2]})
df_orig = df.copy()
result_view = df[:]
# TODO(CoW-warn) better warning message + should warn in all cases
with tm.assert_cow_warning(warn_copy_on_write and not isinstance(val, int)):
with tm.assert_cow_warning(warn_copy_on_write):
df.fillna(val, inplace=True)
expected = DataFrame({"x": [-1, 2.0], "y": [-1.0, 2]})
tm.assert_frame_equal(df, expected)
Expand Down