Skip to content

Backport PR #51834: CoW: Set copy=False in internal usages of Series/DataFrame constructors #52012

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 30 additions & 14 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1592,16 +1592,21 @@ def dot(self, other: AnyArrayLike | DataFrame) -> DataFrame | Series:

if isinstance(other, DataFrame):
return self._constructor(
np.dot(lvals, rvals), index=left.index, columns=other.columns
np.dot(lvals, rvals),
index=left.index,
columns=other.columns,
copy=False,
)
elif isinstance(other, Series):
return self._constructor_sliced(np.dot(lvals, rvals), index=left.index)
return self._constructor_sliced(
np.dot(lvals, rvals), index=left.index, copy=False
)
elif isinstance(rvals, (np.ndarray, Index)):
result = np.dot(lvals, rvals)
if result.ndim == 2:
return self._constructor(result, index=left.index)
return self._constructor(result, index=left.index, copy=False)
else:
return self._constructor_sliced(result, index=left.index)
return self._constructor_sliced(result, index=left.index, copy=False)
else: # pragma: no cover
raise TypeError(f"unsupported type: {type(other)}")

Expand Down Expand Up @@ -3571,9 +3576,15 @@ def transpose(self, *args, copy: bool = False) -> DataFrame:

else:
new_arr = self.values.T
if copy:
if copy and not using_copy_on_write():
new_arr = new_arr.copy()
result = self._constructor(new_arr, index=self.columns, columns=self.index)
result = self._constructor(
new_arr,
index=self.columns,
columns=self.index,
# We already made a copy (more than one block)
copy=False,
)

return result.__finalize__(self, method="transpose")

Expand Down Expand Up @@ -3795,7 +3806,7 @@ def _getitem_multilevel(self, key):
else:
new_values = self._values[:, loc]
result = self._constructor(
new_values, index=self.index, columns=result_columns
new_values, index=self.index, columns=result_columns, copy=False
)
if using_copy_on_write() and isinstance(loc, slice):
result._mgr.add_references(self._mgr) # type: ignore[arg-type]
Expand Down Expand Up @@ -4029,7 +4040,7 @@ def _setitem_frame(self, key, value):
if isinstance(key, np.ndarray):
if key.shape != self.shape:
raise ValueError("Array conditional must be same shape as self")
key = self._constructor(key, **self._construct_axes_dict())
key = self._constructor(key, **self._construct_axes_dict(), copy=False)

if key.size and not all(is_bool_dtype(dtype) for dtype in key.dtypes):
raise TypeError(
Expand Down Expand Up @@ -4939,7 +4950,9 @@ def _reindex_multi(
# condition more specific.
indexer = row_indexer, col_indexer
new_values = take_2d_multi(self.values, indexer, fill_value=fill_value)
return self._constructor(new_values, index=new_index, columns=new_columns)
return self._constructor(
new_values, index=new_index, columns=new_columns, copy=False
)
else:
return self._reindex_with_indexers(
{0: [new_index, row_indexer], 1: [new_columns, col_indexer]},
Expand Down Expand Up @@ -10060,7 +10073,7 @@ def corr(
f"'{method}' was supplied"
)

result = self._constructor(correl, index=idx, columns=cols)
result = self._constructor(correl, index=idx, columns=cols, copy=False)
return result.__finalize__(self, method="corr")

def cov(
Expand Down Expand Up @@ -10191,7 +10204,7 @@ def cov(
else:
base_cov = libalgos.nancorr(mat, cov=True, minp=min_periods)

result = self._constructor(base_cov, index=idx, columns=cols)
result = self._constructor(base_cov, index=idx, columns=cols, copy=False)
return result.__finalize__(self, method="cov")

def corrwith(
Expand Down Expand Up @@ -10304,7 +10317,9 @@ def c(x):
return nanops.nancorr(x[0], x[1], method=method)

correl = self._constructor_sliced(
map(c, zip(left.values.T, right.values.T)), index=left.columns
map(c, zip(left.values.T, right.values.T)),
index=left.columns,
copy=False,
)

else:
Expand Down Expand Up @@ -10415,7 +10430,7 @@ def count(self, axis: Axis = 0, numeric_only: bool = False):
series_counts = notna(frame).sum(axis=axis)
counts = series_counts._values
result = self._constructor_sliced(
counts, index=frame._get_agg_axis(axis)
counts, index=frame._get_agg_axis(axis), copy=False
)

return result.astype("int64").__finalize__(self, method="count")
Expand Down Expand Up @@ -10524,7 +10539,7 @@ def _reduce_axis1(self, name: str, func, skipna: bool) -> Series:
middle = func(arr, axis=0, skipna=skipna)
result = ufunc(result, middle)

res_ser = self._constructor_sliced(result, index=self.index)
res_ser = self._constructor_sliced(result, index=self.index, copy=False)
return res_ser

def nunique(self, axis: Axis = 0, dropna: bool = True) -> Series:
Expand Down Expand Up @@ -11206,6 +11221,7 @@ def isin(self, values: Series | DataFrame | Sequence | Mapping) -> DataFrame:
).reshape(self.shape),
self.index,
self.columns,
copy=False,
)
return result.__finalize__(self, method="isin")

Expand Down
8 changes: 6 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -779,6 +779,8 @@ def swapaxes(
return self._constructor(
new_values,
*new_axes,
# The no-copy case for CoW is handled above
copy=False,
).__finalize__(self, method="swapaxes")

@final
Expand Down Expand Up @@ -9629,7 +9631,7 @@ def _where(
cond = np.asanyarray(cond)
if cond.shape != self.shape:
raise ValueError("Array conditional must be same shape as self")
cond = self._constructor(cond, **self._construct_axes_dict())
cond = self._constructor(cond, **self._construct_axes_dict(), copy=False)

# make sure we are boolean
fill_value = bool(inplace)
Expand Down Expand Up @@ -9704,7 +9706,9 @@ def _where(

# we are the same shape, so create an actual object for alignment
else:
other = self._constructor(other, **self._construct_axes_dict())
other = self._constructor(
other, **self._construct_axes_dict(), copy=False
)

if axis is None:
axis = 0
Expand Down
38 changes: 21 additions & 17 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -840,7 +840,7 @@ def view(self, dtype: Dtype | None = None) -> Series:
# self.array instead of self._values so we piggyback on PandasArray
# implementation
res_values = self.array.view(dtype)
res_ser = self._constructor(res_values, index=self.index)
res_ser = self._constructor(res_values, index=self.index, copy=False)
if isinstance(res_ser._mgr, SingleBlockManager) and using_copy_on_write():
blk = res_ser._mgr._block
blk.refs = cast("BlockValuesRefs", self._references)
Expand Down Expand Up @@ -1073,7 +1073,7 @@ def _get_values_tuple(self, key: tuple):

# If key is contained, would have returned by now
indexer, new_index = self.index.get_loc_level(key)
new_ser = self._constructor(self._values[indexer], index=new_index)
new_ser = self._constructor(self._values[indexer], index=new_index, copy=False)
if using_copy_on_write() and isinstance(indexer, slice):
new_ser._mgr.add_references(self._mgr) # type: ignore[arg-type]
return new_ser.__finalize__(self)
Expand Down Expand Up @@ -1113,7 +1113,9 @@ def _get_value(self, label, takeable: bool = False):

new_index = mi[loc]
new_index = maybe_droplevels(new_index, label)
new_ser = self._constructor(new_values, index=new_index, name=self.name)
new_ser = self._constructor(
new_values, index=new_index, name=self.name, copy=False
)
if using_copy_on_write() and isinstance(loc, slice):
new_ser._mgr.add_references(self._mgr) # type: ignore[arg-type]
return new_ser.__finalize__(self)
Expand Down Expand Up @@ -1413,7 +1415,7 @@ def repeat(self, repeats: int | Sequence[int], axis: None = None) -> Series:
nv.validate_repeat((), {"axis": axis})
new_index = self.index.repeat(repeats)
new_values = self._values.repeat(repeats)
return self._constructor(new_values, index=new_index).__finalize__(
return self._constructor(new_values, index=new_index, copy=False).__finalize__(
self, method="repeat"
)

Expand Down Expand Up @@ -1579,7 +1581,7 @@ def reset_index(
self.index = new_index
else:
return self._constructor(
self._values.copy(), index=new_index
self._values.copy(), index=new_index, copy=False
).__finalize__(self, method="reset_index")
elif inplace:
raise TypeError(
Expand Down Expand Up @@ -2101,7 +2103,7 @@ def mode(self, dropna: bool = True) -> Series:

# Ensure index is type stable (should always use int index)
return self._constructor(
res_values, index=range(len(res_values)), name=self.name
res_values, index=range(len(res_values)), name=self.name, copy=False
)

def unique(self) -> ArrayLike: # pylint: disable=useless-parent-delegation
Expand Down Expand Up @@ -2365,7 +2367,7 @@ def duplicated(self, keep: DropKeep = "first") -> Series:
dtype: bool
"""
res = self._duplicated(keep=keep)
result = self._constructor(res, index=self.index)
result = self._constructor(res, index=self.index, copy=False)
return result.__finalize__(self, method="duplicated")

def idxmin(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashable:
Expand Down Expand Up @@ -2543,7 +2545,7 @@ def round(self, decimals: int = 0, *args, **kwargs) -> Series:
"""
nv.validate_round(args, kwargs)
result = self._values.round(decimals)
result = self._constructor(result, index=self.index).__finalize__(
result = self._constructor(result, index=self.index, copy=False).__finalize__(
self, method="round"
)

Expand Down Expand Up @@ -2844,7 +2846,7 @@ def diff(self, periods: int = 1) -> Series:
{examples}
"""
result = algorithms.diff(self._values, periods)
return self._constructor(result, index=self.index).__finalize__(
return self._constructor(result, index=self.index, copy=False).__finalize__(
self, method="diff"
)

Expand Down Expand Up @@ -2962,7 +2964,7 @@ def dot(self, other: AnyArrayLike) -> Series | np.ndarray:

if isinstance(other, ABCDataFrame):
return self._constructor(
np.dot(lvals, rvals), index=other.columns
np.dot(lvals, rvals), index=other.columns, copy=False
).__finalize__(self, method="dot")
elif isinstance(other, Series):
return np.dot(lvals, rvals)
Expand Down Expand Up @@ -3264,7 +3266,7 @@ def combine(
# try_float=False is to match agg_series
npvalues = lib.maybe_convert_objects(new_values, try_float=False)
res_values = maybe_cast_pointwise_result(npvalues, self.dtype, same_dtype=False)
return self._constructor(res_values, index=new_index, name=new_name)
return self._constructor(res_values, index=new_index, name=new_name, copy=False)

def combine_first(self, other) -> Series:
"""
Expand Down Expand Up @@ -3615,7 +3617,7 @@ def sort_values(
return self.copy(deep=None)

result = self._constructor(
self._values[sorted_index], index=self.index[sorted_index]
self._values[sorted_index], index=self.index[sorted_index], copy=False
)

if ignore_index:
Expand Down Expand Up @@ -3863,7 +3865,9 @@ def argsort(
else:
result = np.argsort(values, kind=kind)

res = self._constructor(result, index=self.index, name=self.name, dtype=np.intp)
res = self._constructor(
result, index=self.index, name=self.name, dtype=np.intp, copy=False
)
return res.__finalize__(self, method="argsort")

def nlargest(
Expand Down Expand Up @@ -4238,7 +4242,7 @@ def explode(self, ignore_index: bool = False) -> Series:
else:
index = self.index.repeat(counts)

return self._constructor(values, index=index, name=self.name)
return self._constructor(values, index=index, name=self.name, copy=False)

def unstack(self, level: IndexLabel = -1, fill_value: Hashable = None) -> DataFrame:
"""
Expand Down Expand Up @@ -4369,7 +4373,7 @@ def map(
dtype: object
"""
new_values = self._map_values(arg, na_action=na_action)
return self._constructor(new_values, index=self.index).__finalize__(
return self._constructor(new_values, index=self.index, copy=False).__finalize__(
self, method="map"
)

Expand Down Expand Up @@ -4663,7 +4667,7 @@ def _reindex_indexer(
new_values = algorithms.take_nd(
self._values, indexer, allow_fill=True, fill_value=None
)
return self._constructor(new_values, index=new_index)
return self._constructor(new_values, index=new_index, copy=False)

def _needs_reindex_multi(self, axes, method, level) -> bool:
"""
Expand Down Expand Up @@ -5378,7 +5382,7 @@ def isin(self, values) -> Series:
dtype: bool
"""
result = algorithms.isin(self._values, values)
return self._constructor(result, index=self.index).__finalize__(
return self._constructor(result, index=self.index, copy=False).__finalize__(
self, method="isin"
)

Expand Down