Skip to content

CoW: Remove Copy-on-Write fixtures from more tests #57367

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions pandas/tests/apply/test_frame_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -1487,7 +1487,7 @@ def test_apply_dtype(col):
tm.assert_series_equal(result, expected)


def test_apply_mutating(using_copy_on_write):
def test_apply_mutating():
# GH#35462 case where applied func pins a new BlockManager to a row
df = DataFrame({"a": range(100), "b": range(100, 200)})
df_orig = df.copy()
Expand All @@ -1504,11 +1504,7 @@ def func(row):
result = df.apply(func, axis=1)

tm.assert_frame_equal(result, expected)
if using_copy_on_write:
# INFO(CoW) With copy on write, mutating a viewing row doesn't mutate the parent
tm.assert_frame_equal(df, df_orig)
else:
tm.assert_frame_equal(df, result)
tm.assert_frame_equal(df, df_orig)


def test_apply_empty_list_reduce():
Expand Down
12 changes: 4 additions & 8 deletions pandas/tests/computation/test_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -1964,7 +1964,7 @@ def test_eval_no_support_column_name(request, column):
tm.assert_frame_equal(result, expected)


def test_set_inplace(using_copy_on_write):
def test_set_inplace():
# https://github.com/pandas-dev/pandas/issues/47449
# Ensure we don't only update the DataFrame inplace, but also the actual
# column values, such that references to this column also get updated
Expand All @@ -1974,13 +1974,9 @@ def test_set_inplace(using_copy_on_write):
df.eval("A = B + C", inplace=True)
expected = DataFrame({"A": [11, 13, 15], "B": [4, 5, 6], "C": [7, 8, 9]})
tm.assert_frame_equal(df, expected)
if not using_copy_on_write:
tm.assert_series_equal(ser, expected["A"])
tm.assert_series_equal(result_view["A"], expected["A"])
else:
expected = Series([1, 2, 3], name="A")
tm.assert_series_equal(ser, expected)
tm.assert_series_equal(result_view["A"], expected)
expected = Series([1, 2, 3], name="A")
tm.assert_series_equal(ser, expected)
tm.assert_series_equal(result_view["A"], expected)


class TestValidate:
Expand Down
7 changes: 0 additions & 7 deletions pandas/tests/extension/base/setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,10 +398,6 @@ def test_setitem_series(self, data, full_indexer):
def test_setitem_frame_2d_values(self, data):
# GH#44514
df = pd.DataFrame({"A": data})
using_copy_on_write = pd.options.mode.copy_on_write

blk_data = df._mgr.arrays[0]

orig = df.copy()

df.iloc[:] = df.copy()
Expand All @@ -412,9 +408,6 @@ def test_setitem_frame_2d_values(self, data):

df.iloc[:] = df.values
tm.assert_frame_equal(df, orig)
if not using_copy_on_write:
# GH#33457 Check that this setting occurred in-place
assert df._mgr.arrays[0] is blk_data

df.iloc[:-1] = df.values[:-1]
tm.assert_frame_equal(df, orig)
Expand Down
8 changes: 0 additions & 8 deletions pandas/tests/extension/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,11 +212,3 @@ def invalid_scalar(data):
If the array can hold any item (i.e. object dtype), then use pytest.skip.
"""
return object.__new__(object)


@pytest.fixture
def using_copy_on_write() -> bool:
"""
Fixture to check if Copy-on-Write is enabled.
"""
return True
23 changes: 5 additions & 18 deletions pandas/tests/extension/decimal/test_decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,18 +245,6 @@ def test_fillna_series_method(self, data_missing, fillna_method):
):
super().test_fillna_series_method(data_missing, fillna_method)

def test_fillna_copy_frame(self, data_missing, using_copy_on_write):
warn = DeprecationWarning if not using_copy_on_write else None
msg = "ExtensionArray.fillna added a 'copy' keyword"
with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
super().test_fillna_copy_frame(data_missing)

def test_fillna_copy_series(self, data_missing, using_copy_on_write):
warn = DeprecationWarning if not using_copy_on_write else None
msg = "ExtensionArray.fillna added a 'copy' keyword"
with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
super().test_fillna_copy_series(data_missing)

@pytest.mark.parametrize("dropna", [True, False])
def test_value_counts(self, all_data, dropna):
all_data = all_data[:10]
Expand Down Expand Up @@ -554,12 +542,11 @@ def test_to_numpy_keyword():
tm.assert_numpy_array_equal(result, expected)


def test_array_copy_on_write(using_copy_on_write):
def test_array_copy_on_write():
df = pd.DataFrame({"a": [decimal.Decimal(2), decimal.Decimal(3)]}, dtype="object")
df2 = df.astype(DecimalDtype())
df.iloc[0, 0] = 0
if using_copy_on_write:
expected = pd.DataFrame(
{"a": [decimal.Decimal(2), decimal.Decimal(3)]}, dtype=DecimalDtype()
)
tm.assert_equal(df2.values, expected.values)
expected = pd.DataFrame(
{"a": [decimal.Decimal(2), decimal.Decimal(3)]}, dtype=DecimalDtype()
)
tm.assert_equal(df2.values, expected.values)
8 changes: 2 additions & 6 deletions pandas/tests/extension/json/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,12 +217,8 @@ def test_equals(self, data, na_value, as_series):
def test_fillna_copy_frame(self, data_missing):
super().test_fillna_copy_frame(data_missing)

def test_equals_same_data_different_object(
self, data, using_copy_on_write, request
):
if using_copy_on_write:
mark = pytest.mark.xfail(reason="Fails with CoW")
request.applymarker(mark)
@pytest.mark.xfail(reason="Fails with CoW")
def test_equals_same_data_different_object(self, data, request):
super().test_equals_same_data_different_object(data)

@pytest.mark.xfail(reason="failing on np.array(self, dtype=str)")
Expand Down
15 changes: 4 additions & 11 deletions pandas/tests/extension/test_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,32 +277,25 @@ def test_fillna_frame(self, data_missing):

_combine_le_expected_dtype = "Sparse[bool]"

def test_fillna_copy_frame(self, data_missing, using_copy_on_write):
def test_fillna_copy_frame(self, data_missing):
arr = data_missing.take([1, 1])
df = pd.DataFrame({"A": arr}, copy=False)

filled_val = df.iloc[0, 0]
result = df.fillna(filled_val)

if hasattr(df._mgr, "blocks"):
if using_copy_on_write:
assert df.values.base is result.values.base
else:
assert df.values.base is not result.values.base
assert df.values.base is result.values.base
assert df.A._values.to_dense() is arr.to_dense()

def test_fillna_copy_series(self, data_missing, using_copy_on_write):
def test_fillna_copy_series(self, data_missing):
arr = data_missing.take([1, 1])
ser = pd.Series(arr, copy=False)

filled_val = ser[0]
result = ser.fillna(filled_val)

if using_copy_on_write:
assert ser._values is result._values

else:
assert ser._values is not result._values
assert ser._values is result._values
assert ser._values.to_dense() is arr.to_dense()

@pytest.mark.xfail(reason="Not Applicable")
Expand Down
19 changes: 3 additions & 16 deletions pandas/tests/groupby/test_raises.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ def test_groupby_raises_timedelta(func):

@pytest.mark.parametrize("how", ["method", "agg", "transform"])
def test_groupby_raises_category(
how, by, groupby_series, groupby_func, using_copy_on_write, df_with_cat_col
how, by, groupby_series, groupby_func, df_with_cat_col
):
# GH#50749
df = df_with_cat_col
Expand Down Expand Up @@ -416,13 +416,7 @@ def test_groupby_raises_category(
r"unsupported operand type\(s\) for -: 'Categorical' and 'Categorical'",
),
"ffill": (None, ""),
"fillna": (
TypeError,
r"Cannot setitem on a Categorical with a new category \(0\), "
"set the categories first",
)
if not using_copy_on_write
else (None, ""), # no-op with CoW
"fillna": (None, ""), # no-op with CoW
"first": (None, ""),
"idxmax": (None, ""),
"idxmin": (None, ""),
Expand Down Expand Up @@ -555,7 +549,6 @@ def test_groupby_raises_category_on_category(
groupby_series,
groupby_func,
observed,
using_copy_on_write,
df_with_cat_col,
):
# GH#50749
Expand Down Expand Up @@ -616,13 +609,7 @@ def test_groupby_raises_category_on_category(
),
"diff": (TypeError, "unsupported operand type"),
"ffill": (None, ""),
"fillna": (
TypeError,
r"Cannot setitem on a Categorical with a new category \(0\), "
"set the categories first",
)
if not using_copy_on_write
else (None, ""), # no-op with CoW
"fillna": (None, ""), # no-op with CoW
"first": (None, ""),
"idxmax": (ValueError, "empty group due to unobserved categories")
if empty_groups
Expand Down
29 changes: 8 additions & 21 deletions pandas/tests/reshape/concat/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,24 +43,15 @@ def test_append_concat(self):
assert isinstance(result.index, PeriodIndex)
assert result.index[0] == s1.index[0]

def test_concat_copy(self, using_copy_on_write):
def test_concat_copy(self):
df = DataFrame(np.random.default_rng(2).standard_normal((4, 3)))
df2 = DataFrame(np.random.default_rng(2).integers(0, 10, size=4).reshape(4, 1))
df3 = DataFrame({5: "foo"}, index=range(4))

# These are actual copies.
result = concat([df, df2, df3], axis=1, copy=True)

if not using_copy_on_write:
for arr in result._mgr.arrays:
assert not any(
np.shares_memory(arr, y)
for x in [df, df2, df3]
for y in x._mgr.arrays
)
else:
for arr in result._mgr.arrays:
assert arr.base is not None
for arr in result._mgr.arrays:
assert arr.base is not None

# These are the same.
result = concat([df, df2, df3], axis=1, copy=False)
Expand All @@ -78,15 +69,11 @@ def test_concat_copy(self, using_copy_on_write):
result = concat([df, df2, df3, df4], axis=1, copy=False)
for arr in result._mgr.arrays:
if arr.dtype.kind == "f":
if using_copy_on_write:
# this is a view on some array in either df or df4
assert any(
np.shares_memory(arr, other)
for other in df._mgr.arrays + df4._mgr.arrays
)
else:
# the block was consolidated, so we got a copy anyway
assert arr.base is None
# this is a view on some array in either df or df4
assert any(
np.shares_memory(arr, other)
for other in df._mgr.arrays + df4._mgr.arrays
)
elif arr.dtype.kind in ["i", "u"]:
assert arr.base is df2._mgr.arrays[0].base
elif arr.dtype == object:
Expand Down
13 changes: 0 additions & 13 deletions pandas/tests/reshape/concat/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,19 +192,6 @@ def test_concat_duplicates_in_index_with_keys(self):
tm.assert_frame_equal(result, expected)
tm.assert_index_equal(result.index.levels[1], Index([1, 3], name="date"))

@pytest.mark.parametrize("ignore_index", [True, False])
@pytest.mark.parametrize("order", ["C", "F"])
def test_concat_copies(self, axis, order, ignore_index, using_copy_on_write):
# based on asv ConcatDataFrames
df = DataFrame(np.zeros((10, 5), dtype=np.float32, order=order))

res = concat([df] * 5, axis=axis, ignore_index=ignore_index, copy=True)

if not using_copy_on_write:
for arr in res._iter_column_arrays():
for arr2 in df._iter_column_arrays():
assert not np.shares_memory(arr, arr2)

def test_outer_sort_columns(self):
# GH#47127
df1 = DataFrame({"A": [0], "B": [1], 0: 1})
Expand Down
11 changes: 4 additions & 7 deletions pandas/tests/reshape/concat/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,23 +100,20 @@ def test_concat_rename_index(self):
tm.assert_frame_equal(result, exp)
assert result.index.names == exp.index.names

def test_concat_copy_index_series(self, axis, using_copy_on_write):
def test_concat_copy_index_series(self, axis):
# GH 29879
ser = Series([1, 2])
comb = concat([ser, ser], axis=axis, copy=True)
if not using_copy_on_write or axis in [0, "index"]:
if axis in [0, "index"]:
assert comb.index is not ser.index
else:
assert comb.index is ser.index

def test_concat_copy_index_frame(self, axis, using_copy_on_write):
def test_concat_copy_index_frame(self, axis):
# GH 29879
df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"])
comb = concat([df, df], axis=axis, copy=True)
if not using_copy_on_write:
assert not comb.index.is_(df.index)
assert not comb.columns.is_(df.columns)
elif axis in [0, "index"]:
if axis in [0, "index"]:
assert not comb.index.is_(df.index)
assert comb.columns.is_(df.columns)
elif axis in [1, "columns"]:
Expand Down