Skip to content

Commit c98b7c8

Browse files
authored
CoW: Set copy=False in internal usages of Series/DataFrame constructors (#51834)
1 parent 74e8c00 commit c98b7c8

File tree

3 files changed

+56
-33
lines changed

3 files changed

+56
-33
lines changed

pandas/core/frame.py

+29-14
Original file line numberDiff line numberDiff line change
@@ -1603,16 +1603,21 @@ def dot(self, other: AnyArrayLike | DataFrame) -> DataFrame | Series:
16031603

16041604
if isinstance(other, DataFrame):
16051605
return self._constructor(
1606-
np.dot(lvals, rvals), index=left.index, columns=other.columns
1606+
np.dot(lvals, rvals),
1607+
index=left.index,
1608+
columns=other.columns,
1609+
copy=False,
16071610
)
16081611
elif isinstance(other, Series):
1609-
return self._constructor_sliced(np.dot(lvals, rvals), index=left.index)
1612+
return self._constructor_sliced(
1613+
np.dot(lvals, rvals), index=left.index, copy=False
1614+
)
16101615
elif isinstance(rvals, (np.ndarray, Index)):
16111616
result = np.dot(lvals, rvals)
16121617
if result.ndim == 2:
1613-
return self._constructor(result, index=left.index)
1618+
return self._constructor(result, index=left.index, copy=False)
16141619
else:
1615-
return self._constructor_sliced(result, index=left.index)
1620+
return self._constructor_sliced(result, index=left.index, copy=False)
16161621
else: # pragma: no cover
16171622
raise TypeError(f"unsupported type: {type(other)}")
16181623

@@ -3610,10 +3615,15 @@ def transpose(self, *args, copy: bool = False) -> DataFrame:
36103615

36113616
else:
36123617
new_arr = self.values.T
3613-
if copy:
3618+
if copy and not using_copy_on_write():
36143619
new_arr = new_arr.copy()
36153620
result = self._constructor(
3616-
new_arr, index=self.columns, columns=self.index, dtype=new_arr.dtype
3621+
new_arr,
3622+
index=self.columns,
3623+
columns=self.index,
3624+
dtype=new_arr.dtype,
3625+
# We already made a copy (more than one block)
3626+
copy=False,
36173627
)
36183628

36193629
return result.__finalize__(self, method="transpose")
@@ -3839,7 +3849,7 @@ def _getitem_multilevel(self, key):
38393849
else:
38403850
new_values = self._values[:, loc]
38413851
result = self._constructor(
3842-
new_values, index=self.index, columns=result_columns
3852+
new_values, index=self.index, columns=result_columns, copy=False
38433853
)
38443854
if using_copy_on_write() and isinstance(loc, slice):
38453855
result._mgr.add_references(self._mgr) # type: ignore[arg-type]
@@ -4079,7 +4089,7 @@ def _setitem_frame(self, key, value):
40794089
if isinstance(key, np.ndarray):
40804090
if key.shape != self.shape:
40814091
raise ValueError("Array conditional must be same shape as self")
4082-
key = self._constructor(key, **self._construct_axes_dict())
4092+
key = self._constructor(key, **self._construct_axes_dict(), copy=False)
40834093

40844094
if key.size and not all(is_bool_dtype(dtype) for dtype in key.dtypes):
40854095
raise TypeError(
@@ -4997,7 +5007,9 @@ def _reindex_multi(
49975007
# condition more specific.
49985008
indexer = row_indexer, col_indexer
49995009
new_values = take_2d_multi(self.values, indexer, fill_value=fill_value)
5000-
return self._constructor(new_values, index=new_index, columns=new_columns)
5010+
return self._constructor(
5011+
new_values, index=new_index, columns=new_columns, copy=False
5012+
)
50015013
else:
50025014
return self._reindex_with_indexers(
50035015
{0: [new_index, row_indexer], 1: [new_columns, col_indexer]},
@@ -10527,7 +10539,7 @@ def corr(
1052710539
f"'{method}' was supplied"
1052810540
)
1052910541

10530-
result = self._constructor(correl, index=idx, columns=cols)
10542+
result = self._constructor(correl, index=idx, columns=cols, copy=False)
1053110543
return result.__finalize__(self, method="corr")
1053210544

1053310545
def cov(
@@ -10658,7 +10670,7 @@ def cov(
1065810670
else:
1065910671
base_cov = libalgos.nancorr(mat, cov=True, minp=min_periods)
1066010672

10661-
result = self._constructor(base_cov, index=idx, columns=cols)
10673+
result = self._constructor(base_cov, index=idx, columns=cols, copy=False)
1066210674
return result.__finalize__(self, method="cov")
1066310675

1066410676
def corrwith(
@@ -10771,7 +10783,9 @@ def c(x):
1077110783
return nanops.nancorr(x[0], x[1], method=method)
1077210784

1077310785
correl = self._constructor_sliced(
10774-
map(c, zip(left.values.T, right.values.T)), index=left.columns
10786+
map(c, zip(left.values.T, right.values.T)),
10787+
index=left.columns,
10788+
copy=False,
1077510789
)
1077610790

1077710791
else:
@@ -10882,7 +10896,7 @@ def count(self, axis: Axis = 0, numeric_only: bool = False):
1088210896
series_counts = notna(frame).sum(axis=axis)
1088310897
counts = series_counts._values
1088410898
result = self._constructor_sliced(
10885-
counts, index=frame._get_agg_axis(axis)
10899+
counts, index=frame._get_agg_axis(axis), copy=False
1088610900
)
1088710901

1088810902
return result.astype("int64").__finalize__(self, method="count")
@@ -10991,7 +11005,7 @@ def _reduce_axis1(self, name: str, func, skipna: bool) -> Series:
1099111005
middle = func(arr, axis=0, skipna=skipna)
1099211006
result = ufunc(result, middle)
1099311007

10994-
res_ser = self._constructor_sliced(result, index=self.index)
11008+
res_ser = self._constructor_sliced(result, index=self.index, copy=False)
1099511009
return res_ser
1099611010

1099711011
def nunique(self, axis: Axis = 0, dropna: bool = True) -> Series:
@@ -11673,6 +11687,7 @@ def isin(self, values: Series | DataFrame | Sequence | Mapping) -> DataFrame:
1167311687
).reshape(self.shape),
1167411688
self.index,
1167511689
self.columns,
11690+
copy=False,
1167611691
)
1167711692
return result.__finalize__(self, method="isin")
1167811693

pandas/core/generic.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -779,6 +779,8 @@ def swapaxes(self, axis1: Axis, axis2: Axis, copy: bool_t | None = None) -> Self
779779
return self._constructor(
780780
new_values,
781781
*new_axes,
782+
# The no-copy case for CoW is handled above
783+
copy=False,
782784
).__finalize__(self, method="swapaxes")
783785

784786
@final
@@ -9713,7 +9715,7 @@ def _where(
97139715
cond = np.asanyarray(cond)
97149716
if cond.shape != self.shape:
97159717
raise ValueError("Array conditional must be same shape as self")
9716-
cond = self._constructor(cond, **self._construct_axes_dict())
9718+
cond = self._constructor(cond, **self._construct_axes_dict(), copy=False)
97179719

97189720
# make sure we are boolean
97199721
fill_value = bool(inplace)
@@ -9794,7 +9796,9 @@ def _where(
97949796

97959797
# we are the same shape, so create an actual object for alignment
97969798
else:
9797-
other = self._constructor(other, **self._construct_axes_dict())
9799+
other = self._constructor(
9800+
other, **self._construct_axes_dict(), copy=False
9801+
)
97989802

97999803
if axis is None:
98009804
axis = 0

pandas/core/series.py

+21-17
Original file line numberDiff line numberDiff line change
@@ -843,7 +843,7 @@ def view(self, dtype: Dtype | None = None) -> Series:
843843
# self.array instead of self._values so we piggyback on PandasArray
844844
# implementation
845845
res_values = self.array.view(dtype)
846-
res_ser = self._constructor(res_values, index=self.index)
846+
res_ser = self._constructor(res_values, index=self.index, copy=False)
847847
if isinstance(res_ser._mgr, SingleBlockManager) and using_copy_on_write():
848848
blk = res_ser._mgr._block
849849
blk.refs = cast("BlockValuesRefs", self._references)
@@ -1044,7 +1044,7 @@ def _get_values_tuple(self, key: tuple):
10441044

10451045
# If key is contained, would have returned by now
10461046
indexer, new_index = self.index.get_loc_level(key)
1047-
new_ser = self._constructor(self._values[indexer], index=new_index)
1047+
new_ser = self._constructor(self._values[indexer], index=new_index, copy=False)
10481048
if using_copy_on_write() and isinstance(indexer, slice):
10491049
new_ser._mgr.add_references(self._mgr) # type: ignore[arg-type]
10501050
return new_ser.__finalize__(self)
@@ -1084,7 +1084,9 @@ def _get_value(self, label, takeable: bool = False):
10841084

10851085
new_index = mi[loc]
10861086
new_index = maybe_droplevels(new_index, label)
1087-
new_ser = self._constructor(new_values, index=new_index, name=self.name)
1087+
new_ser = self._constructor(
1088+
new_values, index=new_index, name=self.name, copy=False
1089+
)
10881090
if using_copy_on_write() and isinstance(loc, slice):
10891091
new_ser._mgr.add_references(self._mgr) # type: ignore[arg-type]
10901092
return new_ser.__finalize__(self)
@@ -1384,7 +1386,7 @@ def repeat(self, repeats: int | Sequence[int], axis: None = None) -> Series:
13841386
nv.validate_repeat((), {"axis": axis})
13851387
new_index = self.index.repeat(repeats)
13861388
new_values = self._values.repeat(repeats)
1387-
return self._constructor(new_values, index=new_index).__finalize__(
1389+
return self._constructor(new_values, index=new_index, copy=False).__finalize__(
13881390
self, method="repeat"
13891391
)
13901392

@@ -1550,7 +1552,7 @@ def reset_index(
15501552
self.index = new_index
15511553
else:
15521554
return self._constructor(
1553-
self._values.copy(), index=new_index
1555+
self._values.copy(), index=new_index, copy=False
15541556
).__finalize__(self, method="reset_index")
15551557
elif inplace:
15561558
raise TypeError(
@@ -2072,7 +2074,7 @@ def mode(self, dropna: bool = True) -> Series:
20722074

20732075
# Ensure index is type stable (should always use int index)
20742076
return self._constructor(
2075-
res_values, index=range(len(res_values)), name=self.name
2077+
res_values, index=range(len(res_values)), name=self.name, copy=False
20762078
)
20772079

20782080
def unique(self) -> ArrayLike: # pylint: disable=useless-parent-delegation
@@ -2336,7 +2338,7 @@ def duplicated(self, keep: DropKeep = "first") -> Series:
23362338
dtype: bool
23372339
"""
23382340
res = self._duplicated(keep=keep)
2339-
result = self._constructor(res, index=self.index)
2341+
result = self._constructor(res, index=self.index, copy=False)
23402342
return result.__finalize__(self, method="duplicated")
23412343

23422344
def idxmin(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashable:
@@ -2514,7 +2516,7 @@ def round(self, decimals: int = 0, *args, **kwargs) -> Series:
25142516
"""
25152517
nv.validate_round(args, kwargs)
25162518
result = self._values.round(decimals)
2517-
result = self._constructor(result, index=self.index).__finalize__(
2519+
result = self._constructor(result, index=self.index, copy=False).__finalize__(
25182520
self, method="round"
25192521
)
25202522

@@ -2820,7 +2822,7 @@ def diff(self, periods: int = 1) -> Series:
28202822
{examples}
28212823
"""
28222824
result = algorithms.diff(self._values, periods)
2823-
return self._constructor(result, index=self.index).__finalize__(
2825+
return self._constructor(result, index=self.index, copy=False).__finalize__(
28242826
self, method="diff"
28252827
)
28262828

@@ -2938,7 +2940,7 @@ def dot(self, other: AnyArrayLike) -> Series | np.ndarray:
29382940

29392941
if isinstance(other, ABCDataFrame):
29402942
return self._constructor(
2941-
np.dot(lvals, rvals), index=other.columns
2943+
np.dot(lvals, rvals), index=other.columns, copy=False
29422944
).__finalize__(self, method="dot")
29432945
elif isinstance(other, Series):
29442946
return np.dot(lvals, rvals)
@@ -3167,7 +3169,7 @@ def combine(
31673169
# try_float=False is to match agg_series
31683170
npvalues = lib.maybe_convert_objects(new_values, try_float=False)
31693171
res_values = maybe_cast_pointwise_result(npvalues, self.dtype, same_dtype=False)
3170-
return self._constructor(res_values, index=new_index, name=new_name)
3172+
return self._constructor(res_values, index=new_index, name=new_name, copy=False)
31713173

31723174
def combine_first(self, other) -> Series:
31733175
"""
@@ -3528,7 +3530,7 @@ def sort_values(
35283530
return self.copy(deep=None)
35293531

35303532
result = self._constructor(
3531-
self._values[sorted_index], index=self.index[sorted_index]
3533+
self._values[sorted_index], index=self.index[sorted_index], copy=False
35323534
)
35333535

35343536
if ignore_index:
@@ -3776,7 +3778,9 @@ def argsort(
37763778
else:
37773779
result = np.argsort(values, kind=kind)
37783780

3779-
res = self._constructor(result, index=self.index, name=self.name, dtype=np.intp)
3781+
res = self._constructor(
3782+
result, index=self.index, name=self.name, dtype=np.intp, copy=False
3783+
)
37803784
return res.__finalize__(self, method="argsort")
37813785

37823786
def nlargest(
@@ -4151,7 +4155,7 @@ def explode(self, ignore_index: bool = False) -> Series:
41514155
else:
41524156
index = self.index.repeat(counts)
41534157

4154-
return self._constructor(values, index=index, name=self.name)
4158+
return self._constructor(values, index=index, name=self.name, copy=False)
41554159

41564160
def unstack(self, level: IndexLabel = -1, fill_value: Hashable = None) -> DataFrame:
41574161
"""
@@ -4282,7 +4286,7 @@ def map(
42824286
dtype: object
42834287
"""
42844288
new_values = self._map_values(arg, na_action=na_action)
4285-
return self._constructor(new_values, index=self.index).__finalize__(
4289+
return self._constructor(new_values, index=self.index, copy=False).__finalize__(
42864290
self, method="map"
42874291
)
42884292

@@ -4576,7 +4580,7 @@ def _reindex_indexer(
45764580
new_values = algorithms.take_nd(
45774581
self._values, indexer, allow_fill=True, fill_value=None
45784582
)
4579-
return self._constructor(new_values, index=new_index)
4583+
return self._constructor(new_values, index=new_index, copy=False)
45804584

45814585
def _needs_reindex_multi(self, axes, method, level) -> bool:
45824586
"""
@@ -5291,7 +5295,7 @@ def isin(self, values) -> Series:
52915295
dtype: bool
52925296
"""
52935297
result = algorithms.isin(self._values, values)
5294-
return self._constructor(result, index=self.index).__finalize__(
5298+
return self._constructor(result, index=self.index, copy=False).__finalize__(
52955299
self, method="isin"
52965300
)
52975301

0 commit comments

Comments
 (0)