Skip to content

CoW: Remove copy-keywords as far as possible #57327

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ def _obj_with_exclusions(self):
return self.obj

if self._selection is not None:
return self.obj._getitem_nocopy(self._selection_list)
return self.obj[self._selection_list]

if len(self.exclusions) > 0:
# equivalent to `self.obj.drop(self.exclusions, axis=1)
Expand Down
87 changes: 28 additions & 59 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1676,8 +1676,8 @@ def dot(self, other: AnyArrayLike | DataFrame) -> DataFrame | Series:
if len(common) > len(self.columns) or len(common) > len(other.index):
raise ValueError("matrices are not aligned")

left = self.reindex(columns=common, copy=False)
right = other.reindex(index=common, copy=False)
left = self.reindex(columns=common)
right = other.reindex(index=common)
lvals = left.values
rvals = right._values
else:
Expand Down Expand Up @@ -3800,27 +3800,6 @@ def _iter_column_arrays(self) -> Iterator[ArrayLike]:
for i in range(len(self.columns)):
yield self._get_column_array(i)

def _getitem_nocopy(self, key: list):
"""
Behaves like __getitem__, but returns a view in cases where __getitem__
would make a copy.
"""
# TODO(CoW): can be removed if/when we are always Copy-on-Write
indexer = self.columns._get_indexer_strict(key, "columns")[1]
new_axis = self.columns[indexer]

new_mgr = self._mgr.reindex_indexer(
new_axis,
indexer,
axis=0,
allow_dups=True,
copy=False,
only_slice=True,
)
result = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes)
result = result.__finalize__(self)
return result

def __getitem__(self, key):
check_dict_or_set_indexers(key)
key = lib.item_from_zerodim(key)
Expand Down Expand Up @@ -3911,7 +3890,7 @@ def _getitem_bool_array(self, key):
key = check_bool_indexer(self.index, key)

if key.all():
return self.copy(deep=None)
return self.copy(deep=False)

indexer = key.nonzero()[0]
return self.take(indexer, axis=0)
Expand Down Expand Up @@ -4774,7 +4753,7 @@ def predicate(arr: ArrayLike) -> bool:

return True

mgr = self._mgr._get_data_subset(predicate).copy(deep=None)
mgr = self._mgr._get_data_subset(predicate).copy(deep=False)
return self._constructor_from_mgr(mgr, axes=mgr.axes).__finalize__(self)

def insert(
Expand Down Expand Up @@ -4919,7 +4898,7 @@ def assign(self, **kwargs) -> DataFrame:
Portland 17.0 62.6 290.15
Berkeley 25.0 77.0 298.15
"""
data = self.copy(deep=None)
data = self.copy(deep=False)

for k, v in kwargs.items():
data[k] = com.apply_if_callable(v, data)
Expand Down Expand Up @@ -4996,7 +4975,6 @@ def _reindex_multi(self, axes: dict[str, Index], fill_value) -> DataFrame:
else:
return self._reindex_with_indexers(
{0: [new_index, row_indexer], 1: [new_columns, col_indexer]},
copy=False,
fill_value=fill_value,
)

Expand Down Expand Up @@ -5038,7 +5016,7 @@ def set_axis(
axis: Axis = 0,
copy: bool | None = None,
) -> DataFrame:
return super().set_axis(labels, axis=axis, copy=copy)
return super().set_axis(labels, axis=axis)

@doc(
NDFrame.reindex,
Expand All @@ -5065,7 +5043,6 @@ def reindex(
columns=columns,
axis=axis,
method=method,
copy=copy,
level=level,
fill_value=fill_value,
limit=limit,
Expand Down Expand Up @@ -5463,7 +5440,6 @@ def rename(
index=index,
columns=columns,
axis=axis,
copy=copy,
inplace=inplace,
level=level,
errors=errors,
Expand Down Expand Up @@ -5534,7 +5510,7 @@ def _replace_columnwise(
DataFrame or None
"""
# Operate column-wise
res = self if inplace else self.copy(deep=None)
res = self if inplace else self.copy(deep=False)
ax = self.columns

for i, ax_value in enumerate(ax):
Expand Down Expand Up @@ -5823,8 +5799,7 @@ def set_index(
if inplace:
frame = self
else:
# GH 49473 Use "lazy copy" with Copy-on-Write
frame = self.copy(deep=None)
frame = self.copy(deep=False)

arrays: list[Index] = []
names: list[Hashable] = []
Expand Down Expand Up @@ -6114,7 +6089,7 @@ class max type
if inplace:
new_obj = self
else:
new_obj = self.copy(deep=None)
new_obj = self.copy(deep=False)
if allow_duplicates is not lib.no_default:
allow_duplicates = validate_bool_kwarg(allow_duplicates, "allow_duplicates")

Expand Down Expand Up @@ -6386,7 +6361,7 @@ def dropna(
raise ValueError(f"invalid how option: {how}")

if np.all(mask):
result = self.copy(deep=None)
result = self.copy(deep=False)
else:
result = self.loc(axis=axis)[mask]

Expand Down Expand Up @@ -6515,7 +6490,7 @@ def drop_duplicates(
4 Indomie pack 5.0
"""
if self.empty:
return self.copy(deep=None)
return self.copy(deep=False)

inplace = validate_bool_kwarg(inplace, "inplace")
ignore_index = validate_bool_kwarg(ignore_index, "ignore_index")
Expand Down Expand Up @@ -6631,7 +6606,7 @@ def duplicated(

def f(vals) -> tuple[np.ndarray, int]:
labels, shape = algorithms.factorize(vals, size_hint=len(self))
return labels.astype("i8", copy=False), len(shape)
return labels.astype("i8"), len(shape)

if subset is None:
# https://github.com/pandas-dev/pandas/issues/28770
Expand Down Expand Up @@ -6914,7 +6889,7 @@ def sort_values(
if inplace:
return self._update_inplace(self)
else:
return self.copy(deep=None)
return self.copy(deep=False)

if is_range_indexer(indexer, len(indexer)):
result = self.copy(deep=False)
Expand Down Expand Up @@ -7570,7 +7545,7 @@ def nsmallest(
),
)
def swaplevel(self, i: Axis = -2, j: Axis = -1, axis: Axis = 0) -> DataFrame:
result = self.copy(deep=None)
result = self.copy(deep=False)

axis = self._get_axis_number(axis)

Expand Down Expand Up @@ -7630,7 +7605,7 @@ class diet
if not isinstance(self._get_axis(axis), MultiIndex): # pragma: no cover
raise TypeError("Can only reorder levels on a hierarchical axis.")

result = self.copy(deep=None)
result = self.copy(deep=False)

if axis == 0:
assert isinstance(result.index, MultiIndex)
Expand Down Expand Up @@ -7933,9 +7908,7 @@ def to_series(right):
if flex is not None and isinstance(right, DataFrame):
if not left._indexed_same(right):
if flex:
left, right = left.align(
right, join="outer", level=level, copy=False
)
left, right = left.align(right, join="outer", level=level)
else:
raise ValueError(
"Can only compare identically-labeled (both index and columns) "
Expand All @@ -7948,7 +7921,7 @@ def to_series(right):
if not left.axes[axis].equals(right.index):
raise ValueError(
"Operands are not aligned. Do "
"`left, right = left.align(right, axis=1, copy=False)` "
"`left, right = left.align(right, axis=1)` "
"before operating."
)

Expand All @@ -7957,7 +7930,6 @@ def to_series(right):
join="outer",
axis=axis,
level=level,
copy=False,
)
right = left._maybe_align_series_as_frame(right, axis)

Expand Down Expand Up @@ -8467,7 +8439,7 @@ def combine(
"""
other_idxlen = len(other.index) # save for compare

this, other = self.align(other, copy=False)
this, other = self.align(other)
new_index = this.index

if other.empty and len(new_index) == len(self.index):
Expand Down Expand Up @@ -8507,15 +8479,15 @@ def combine(
# try to promote series, which is all NaN, as other_dtype.
new_dtype = other_dtype
try:
series = series.astype(new_dtype, copy=False)
series = series.astype(new_dtype)
except ValueError:
# e.g. new_dtype is integer types
pass
else:
# if we have different dtypes, possibly promote
new_dtype = find_common_type([this_dtype, other_dtype])
series = series.astype(new_dtype, copy=False)
other_series = other_series.astype(new_dtype, copy=False)
series = series.astype(new_dtype)
other_series = other_series.astype(new_dtype)

arr = func(series, other_series)
if isinstance(new_dtype, np.dtype):
Expand Down Expand Up @@ -9567,7 +9539,7 @@ def explode(
result.index = default_index(len(result))
else:
result.index = self.index.take(result.index)
result = result.reindex(columns=self.columns, copy=False)
result = result.reindex(columns=self.columns)

return result.__finalize__(self, method="explode")

Expand Down Expand Up @@ -10263,9 +10235,7 @@ def _append(
row_df = other.to_frame().T
# infer_objects is needed for
# test_append_empty_frame_to_series_with_dateutil_tz
other = row_df.infer_objects(copy=False).rename_axis(
index.names, copy=False
)
other = row_df.infer_objects().rename_axis(index.names)
elif isinstance(other, list):
if not other:
pass
Expand Down Expand Up @@ -10509,7 +10479,7 @@ def join(
res = concat(
frames, axis=1, join="outer", verify_integrity=True, sort=sort
)
return res.reindex(self.index, copy=False)
return res.reindex(self.index)
else:
return concat(
frames, axis=1, join=how, verify_integrity=True, sort=sort
Expand Down Expand Up @@ -10559,7 +10529,6 @@ def merge(
right_index=right_index,
sort=sort,
suffixes=suffixes,
copy=copy,
indicator=indicator,
validate=validate,
)
Expand Down Expand Up @@ -11024,7 +10993,7 @@ def corrwith(

if numeric_only:
other = other._get_numeric_data()
left, right = this.align(other, join="inner", copy=False)
left, right = this.align(other, join="inner")

if axis == 1:
left = left.T
Expand Down Expand Up @@ -11161,7 +11130,7 @@ def count(self, axis: Axis = 0, numeric_only: bool = False):
else:
result = notna(frame).sum(axis=axis)

return result.astype("int64", copy=False).__finalize__(self, method="count")
return result.astype("int64").__finalize__(self, method="count")

def _reduce(
self,
Expand Down Expand Up @@ -11225,7 +11194,7 @@ def _get_data() -> DataFrame:
if axis is None:
dtype = find_common_type([arr.dtype for arr in df._mgr.arrays])
if isinstance(dtype, ExtensionDtype):
df = df.astype(dtype, copy=False)
df = df.astype(dtype)
arr = concat_compat(list(df._iter_column_arrays()))
return arr._reduce(name, skipna=skipna, keepdims=False, **kwds)
return func(df.values)
Expand Down Expand Up @@ -11257,7 +11226,7 @@ def _get_data() -> DataFrame:
# be equivalent to transposing the original frame and aggregating
# with axis=0.
name = {"argmax": "idxmax", "argmin": "idxmin"}.get(name, name)
df = df.astype(dtype, copy=False)
df = df.astype(dtype)
arr = concat_compat(list(df._iter_column_arrays()))
nrows, ncols = df.shape
row_index = np.tile(np.arange(nrows), ncols)
Expand Down
Loading