Skip to content

Commit 6312cbb

Browse files
phoflim-vinicius
authored and
im-vinicius
committed
CoW: Add warning for chained assignment with fillna (pandas-dev#53779)
1 parent e5f0a1a commit 6312cbb

File tree

8 files changed

+72
-7
lines changed

8 files changed

+72
-7
lines changed

doc/source/whatsnew/v2.1.0.rst

+8
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,14 @@ Copy-on-Write improvements
2727
of those Index objects for the columns of the DataFrame (:issue:`52947`)
2828
- Add lazy copy mechanism to :meth:`DataFrame.eval` (:issue:`53746`)
2929

30+
- Trying to operate inplace on a temporary column selection
31+
(for example, ``df["a"].fillna(100, inplace=True)``)
32+
will now always raise a warning when Copy-on-Write is enabled. In this mode,
33+
operating inplace like this will never work, since the selection behaves
34+
as a temporary copy. This holds true for:
35+
36+
- DataFrame.fillna / Series.fillna
37+
3038
.. _whatsnew_210.enhancements.enhancement2:
3139

3240
``map(func, na_action="ignore")`` now works for all array types

pandas/core/generic.py

+17
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import operator
1010
import pickle
1111
import re
12+
import sys
1213
from typing import (
1314
TYPE_CHECKING,
1415
Any,
@@ -89,13 +90,19 @@
8990
WriteExcelBuffer,
9091
npt,
9192
)
93+
from pandas.compat import (
94+
PY311,
95+
PYPY,
96+
)
9297
from pandas.compat._optional import import_optional_dependency
9398
from pandas.compat.numpy import function as nv
9499
from pandas.errors import (
95100
AbstractMethodError,
101+
ChainedAssignmentError,
96102
InvalidIndexError,
97103
SettingWithCopyError,
98104
SettingWithCopyWarning,
105+
_chained_assignment_method_msg,
99106
)
100107
from pandas.util._decorators import doc
101108
from pandas.util._exceptions import find_stack_level
@@ -7083,6 +7090,16 @@ def fillna(
70837090
Note that column D is not affected since it is not present in df2.
70847091
"""
70857092
inplace = validate_bool_kwarg(inplace, "inplace")
7093+
if inplace:
7094+
if not PYPY and using_copy_on_write():
7095+
refcount = 2 if PY311 else 3
7096+
if sys.getrefcount(self) <= refcount:
7097+
warnings.warn(
7098+
_chained_assignment_method_msg,
7099+
ChainedAssignmentError,
7100+
stacklevel=2,
7101+
)
7102+
70867103
value, method = validate_fillna_kwargs(value, method)
70877104
if method is not None:
70887105
warnings.warn(

pandas/errors/__init__.py

+11
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,17 @@ class ChainedAssignmentError(Warning):
393393
)
394394

395395

396+
_chained_assignment_method_msg = (
397+
"A value is trying to be set on a copy of a DataFrame or Series "
398+
"through chained assignment.\n"
399+
"When using the Copy-on-Write mode, such chained assignment never works "
400+
"to update the original DataFrame or Series, because the intermediate "
401+
"object on which we are setting values always behaves as a copy.\n\n"
402+
"Try using 'df.method({col: value}, inplace=True)' instead, to perform "
403+
"the operation inplace.\n\n"
404+
)
405+
406+
396407
class NumExprClobberingError(NameError):
397408
"""
398409
Exception raised when trying to use a built-in numexpr name as a variable name.

pandas/tests/copy_view/test_interp_fillna.py

+13
Original file line numberDiff line numberDiff line change
@@ -344,3 +344,16 @@ def test_fillna_inplace_ea_noop_shares_memory(
344344
assert not np.shares_memory(get_array(df, "b"), get_array(view, "b"))
345345
df.iloc[0, 1] = 100
346346
tm.assert_frame_equal(df_orig, view)
347+
348+
349+
def test_fillna_chained_assignment(using_copy_on_write):
350+
df = DataFrame({"a": [1, np.nan, 2], "b": 1})
351+
df_orig = df.copy()
352+
if using_copy_on_write:
353+
with tm.raises_chained_assignment_error():
354+
df["a"].fillna(100, inplace=True)
355+
tm.assert_frame_equal(df, df_orig)
356+
357+
with tm.raises_chained_assignment_error():
358+
df[["a"]].fillna(100, inplace=True)
359+
tm.assert_frame_equal(df, df_orig)

pandas/tests/frame/methods/test_fillna.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import numpy as np
22
import pytest
33

4+
from pandas.errors import ChainedAssignmentError
45
import pandas.util._test_decorators as td
56

67
from pandas import (
@@ -49,11 +50,12 @@ def test_fillna_on_column_view(self, using_copy_on_write):
4950
arr = np.full((40, 50), np.nan)
5051
df = DataFrame(arr, copy=False)
5152

52-
# TODO(CoW): This should raise a chained assignment error
53-
df[0].fillna(-1, inplace=True)
5453
if using_copy_on_write:
54+
with tm.assert_produces_warning(ChainedAssignmentError):
55+
df[0].fillna(-1, inplace=True)
5556
assert np.isnan(arr[:, 0]).all()
5657
else:
58+
df[0].fillna(-1, inplace=True)
5759
assert (arr[:, 0] == -1).all()
5860

5961
# i.e. we didn't create a new 49-column block
@@ -105,7 +107,9 @@ def test_fillna_mixed_float(self, mixed_float_frame):
105107
result = mf.fillna(method="pad")
106108
_check_mixed_float(result, dtype={"C": None})
107109

108-
def test_fillna_empty(self):
110+
def test_fillna_empty(self, using_copy_on_write):
111+
if using_copy_on_write:
112+
pytest.skip("condition is unnecessary complex and is deprecated anyway")
109113
# empty frame (GH#2778)
110114
df = DataFrame(columns=["x"])
111115
for m in ["pad", "backfill"]:

pandas/tests/frame/test_block_internals.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@
77
import numpy as np
88
import pytest
99

10-
from pandas.errors import PerformanceWarning
10+
from pandas.errors import (
11+
ChainedAssignmentError,
12+
PerformanceWarning,
13+
)
1114
import pandas.util._test_decorators as td
1215

1316
import pandas as pd
@@ -410,7 +413,11 @@ def test_update_inplace_sets_valid_block_values(using_copy_on_write):
410413
df = DataFrame({"a": Series([1, 2, None], dtype="category")})
411414

412415
# inplace update of a single column
413-
df["a"].fillna(1, inplace=True)
416+
if using_copy_on_write:
417+
with tm.assert_produces_warning(ChainedAssignmentError):
418+
df["a"].fillna(1, inplace=True)
419+
else:
420+
df["a"].fillna(1, inplace=True)
414421

415422
# check we haven't put a Series into any block.values
416423
assert isinstance(df._mgr.blocks[0].values, Categorical)

pandas/tests/indexing/multiindex/test_chaining_and_caching.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
import numpy as np
22
import pytest
33

4-
from pandas.errors import SettingWithCopyError
4+
from pandas.errors import (
5+
ChainedAssignmentError,
6+
SettingWithCopyError,
7+
)
58
import pandas.util._test_decorators as td
69

710
from pandas import (
@@ -30,7 +33,8 @@ def test_detect_chained_assignment(using_copy_on_write):
3033
zed = DataFrame(events, index=["a", "b"], columns=multiind)
3134

3235
if using_copy_on_write:
33-
zed["eyes"]["right"].fillna(value=555, inplace=True)
36+
with tm.assert_produces_warning(ChainedAssignmentError):
37+
zed["eyes"]["right"].fillna(value=555, inplace=True)
3438
else:
3539
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
3640
with pytest.raises(SettingWithCopyError, match=msg):

scripts/validate_unwanted_patterns.py

+1
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
"_arrow_dtype_mapping",
5252
"_global_config",
5353
"_chained_assignment_msg",
54+
"_chained_assignment_method_msg",
5455
}
5556

5657

0 commit comments

Comments
 (0)