Skip to content

CLN: Misc pre-COW stuff #57406

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 13, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions pandas/_testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@
get_obj,
)
from pandas._testing.contexts import (
assert_cow_warning,
decompress_file,
ensure_clean,
raises_chained_assignment_error,
Expand Down Expand Up @@ -583,7 +582,6 @@ def shares_memory(left, right) -> bool:
"assert_series_equal",
"assert_sp_array_equal",
"assert_timedelta_array_equal",
"assert_cow_warning",
"at",
"BOOL_DTYPES",
"box_expected",
Expand Down
26 changes: 0 additions & 26 deletions pandas/_testing/contexts.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,29 +228,3 @@ def raises_chained_assignment_error(warn=True, extra_warnings=(), extra_match=()
warning,
match="|".join((match, *extra_match)),
)


def assert_cow_warning(warn=True, match=None, **kwargs):
"""
Assert that a warning is raised in the CoW warning mode.

Parameters
----------
warn : bool, default True
By default, check that a warning is raised. Can be turned off by passing False.
match : str
The warning message to match against, if different from the default.
kwargs
Passed through to assert_produces_warning
"""
from pandas._testing import assert_produces_warning

if not warn:
from contextlib import nullcontext

return nullcontext()

if not match:
match = "Setting a value on a view"

return assert_produces_warning(FutureWarning, match=match, **kwargs)
6 changes: 2 additions & 4 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1050,8 +1050,7 @@ def _pad_or_backfill(
copy: bool = True,
) -> Self:
if not self._hasna:
# TODO(CoW): Not necessary anymore when CoW is the default
return self.copy()
return self

if limit is None and limit_area is None:
method = missing.clean_fill_method(method)
Expand Down Expand Up @@ -1084,8 +1083,7 @@ def fillna(
value, method = validate_fillna_kwargs(value, method)

if not self._hasna:
# TODO(CoW): Not necessary anymore when CoW is the default
return self.copy()
return self

if limit is not None:
return super().fillna(value=value, method=method, limit=limit, copy=copy)
Expand Down
23 changes: 0 additions & 23 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,29 +131,6 @@
_dtype_obj = np.dtype("object")


COW_WARNING_GENERAL_MSG = """\
Setting a value on a view: behaviour will change in pandas 3.0.
You are mutating a Series or DataFrame object, and currently this mutation will
also have effect on other Series or DataFrame objects that share data with this
object. In pandas 3.0 (with Copy-on-Write), updating one Series or DataFrame object
will never modify another.
"""


COW_WARNING_SETITEM_MSG = """\
Setting a value on a view: behaviour will change in pandas 3.0.
Currently, the mutation will also have effect on the object that shares data
with this object. For example, when setting a value in a Series that was
extracted from a column of a DataFrame, that DataFrame will also be updated:

ser = df["col"]
ser[0] = 0 <--- in pandas 2, this also updates `df`

In pandas 3.0 (with Copy-on-Write), updating one Series/DataFrame will never
modify another, and thus in the example above, `df` will not be changed.
"""


def maybe_split(meth: F) -> F:
"""
If we have a multi-column block, split and operate block-wise. Otherwise
Expand Down
30 changes: 0 additions & 30 deletions pandas/errors/cow.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,33 +22,3 @@
"using 'df.method({col: value}, inplace=True)' instead, to perform "
"the operation inplace on the original object.\n\n"
)


_chained_assignment_warning_msg = (
"ChainedAssignmentError: behaviour will change in pandas 3.0!\n"
"You are setting values through chained assignment. Currently this works "
"in certain cases, but when using Copy-on-Write (which will become the "
"default behaviour in pandas 3.0) this will never work to update the "
"original DataFrame or Series, because the intermediate object on which "
"we are setting values will behave as a copy.\n"
"A typical example is when you are setting values in a column of a "
"DataFrame, like:\n\n"
'df["col"][row_indexer] = value\n\n'
'Use `df.loc[row_indexer, "col"] = values` instead, to perform the '
"assignment in a single step and ensure this keeps updating the original `df`.\n\n"
"See the caveats in the documentation: "
"https://pandas.pydata.org/pandas-docs/stable/user_guide/"
"indexing.html#returning-a-view-versus-a-copy\n"
)

_chained_assignment_warning_method_msg = (
"A value is trying to be set on a copy of a DataFrame or Series "
"through chained assignment using an inplace method.\n"
"The behavior will change in pandas 3.0. This inplace method will "
"never work because the intermediate object on which we are setting "
"values always behaves as a copy.\n\n"
"For example, when doing 'df[col].method(value, inplace=True)', try "
"using 'df.method({col: value}, inplace=True)' or "
"df[col] = df[col].method(value) instead, to perform "
"the operation inplace on the original object.\n\n"
)
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import pandas._testing as tm


# TODO(CoW-warn) expand the cases
@pytest.mark.parametrize(
"indexer", [0, [0, 1], slice(0, 2), np.array([True, False, True])]
)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/copy_view/test_core_functionalities.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def test_setitem_with_view_invalidated_does_not_copy(request):
df["b"] = 100
arr = get_array(df, "a")
view = None # noqa: F841
# TODO(CoW-warn) false positive? -> block gets split because of `df["b"] = 100`
# TODO(CoW) block gets split because of `df["b"] = 100`
# which introduces additional refs, even when those of `view` go out of scopes
df.iloc[0, 0] = 100
# Setitem split the block. Since the old block shared data with view
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/json/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ def test_fillna_copy_frame(self, data_missing):
super().test_fillna_copy_frame(data_missing)

@pytest.mark.xfail(reason="Fails with CoW")
def test_equals_same_data_different_object(self, data, request):
def test_equals_same_data_different_object(self, data):
super().test_equals_same_data_different_object(data)

@pytest.mark.xfail(reason="failing on np.array(self, dtype=str)")
Expand Down
1 change: 0 additions & 1 deletion pandas/tests/frame/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,6 @@ def test_setitem(self, float_frame, using_infer_string):
# so raise/warn
smaller = float_frame[:2]

# With CoW, adding a new column doesn't raise a warning
smaller["col10"] = ["1", "2"]

if using_infer_string:
Expand Down
2 changes: 0 additions & 2 deletions pandas/tests/indexing/multiindex/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,8 +207,6 @@ def test_multiindex_assignment_single_dtype(self):
)

# arr can be losslessly cast to int, so this setitem is inplace
# INFO(CoW-warn) this does not warn because we directly took .values
# above, so no reference to a pandas object is alive for `view`
df.loc[4, "c"] = arr
exp = Series(arr, index=[8, 10], name="c", dtype="int64")
result = df.loc[4, "c"]
Expand Down
1 change: 0 additions & 1 deletion pandas/tests/indexing/test_chaining_and_caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,6 @@ def test_detect_chained_assignment_warning_stacklevel(self, rhs):
df = DataFrame(np.arange(25).reshape(5, 5))
df_original = df.copy()
chained = df.loc[:3]
# INFO(CoW) no warning, and original dataframe not changed
chained[2] = rhs
tm.assert_frame_equal(df, df_original)

Expand Down
3 changes: 0 additions & 3 deletions scripts/validate_unwanted_patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,6 @@
"_global_config",
"_chained_assignment_msg",
"_chained_assignment_method_msg",
"_chained_assignment_warning_msg",
"_chained_assignment_warning_method_msg",
"_check_cacher",
"_version_meson",
# The numba extensions need this to mock the iloc object
"_iLocIndexer",
Expand Down