Skip to content

Commit 7c45b9c

Browse files
phofljorisvandenbossche
authored andcommitted
CoW: Set copy=False in internal usages of Series/DataFrame constructors (pandas-dev#51834)
(cherry picked from commit c98b7c8)
1 parent 548c7f7 commit 7c45b9c

File tree

3 files changed

+57
-33
lines changed

3 files changed

+57
-33
lines changed

pandas/core/frame.py

+30-14
Original file line numberDiff line numberDiff line change
@@ -1592,16 +1592,21 @@ def dot(self, other: AnyArrayLike | DataFrame) -> DataFrame | Series:
15921592

15931593
if isinstance(other, DataFrame):
15941594
return self._constructor(
1595-
np.dot(lvals, rvals), index=left.index, columns=other.columns
1595+
np.dot(lvals, rvals),
1596+
index=left.index,
1597+
columns=other.columns,
1598+
copy=False,
15961599
)
15971600
elif isinstance(other, Series):
1598-
return self._constructor_sliced(np.dot(lvals, rvals), index=left.index)
1601+
return self._constructor_sliced(
1602+
np.dot(lvals, rvals), index=left.index, copy=False
1603+
)
15991604
elif isinstance(rvals, (np.ndarray, Index)):
16001605
result = np.dot(lvals, rvals)
16011606
if result.ndim == 2:
1602-
return self._constructor(result, index=left.index)
1607+
return self._constructor(result, index=left.index, copy=False)
16031608
else:
1604-
return self._constructor_sliced(result, index=left.index)
1609+
return self._constructor_sliced(result, index=left.index, copy=False)
16051610
else: # pragma: no cover
16061611
raise TypeError(f"unsupported type: {type(other)}")
16071612

@@ -3571,9 +3576,15 @@ def transpose(self, *args, copy: bool = False) -> DataFrame:
35713576

35723577
else:
35733578
new_arr = self.values.T
3574-
if copy:
3579+
if copy and not using_copy_on_write():
35753580
new_arr = new_arr.copy()
3576-
result = self._constructor(new_arr, index=self.columns, columns=self.index)
3581+
result = self._constructor(
3582+
new_arr,
3583+
index=self.columns,
3584+
columns=self.index,
3585+
# We already made a copy (more than one block)
3586+
copy=False,
3587+
)
35773588

35783589
return result.__finalize__(self, method="transpose")
35793590

@@ -3795,7 +3806,7 @@ def _getitem_multilevel(self, key):
37953806
else:
37963807
new_values = self._values[:, loc]
37973808
result = self._constructor(
3798-
new_values, index=self.index, columns=result_columns
3809+
new_values, index=self.index, columns=result_columns, copy=False
37993810
)
38003811
if using_copy_on_write() and isinstance(loc, slice):
38013812
result._mgr.add_references(self._mgr) # type: ignore[arg-type]
@@ -4029,7 +4040,7 @@ def _setitem_frame(self, key, value):
40294040
if isinstance(key, np.ndarray):
40304041
if key.shape != self.shape:
40314042
raise ValueError("Array conditional must be same shape as self")
4032-
key = self._constructor(key, **self._construct_axes_dict())
4043+
key = self._constructor(key, **self._construct_axes_dict(), copy=False)
40334044

40344045
if key.size and not all(is_bool_dtype(dtype) for dtype in key.dtypes):
40354046
raise TypeError(
@@ -4939,7 +4950,9 @@ def _reindex_multi(
49394950
# condition more specific.
49404951
indexer = row_indexer, col_indexer
49414952
new_values = take_2d_multi(self.values, indexer, fill_value=fill_value)
4942-
return self._constructor(new_values, index=new_index, columns=new_columns)
4953+
return self._constructor(
4954+
new_values, index=new_index, columns=new_columns, copy=False
4955+
)
49434956
else:
49444957
return self._reindex_with_indexers(
49454958
{0: [new_index, row_indexer], 1: [new_columns, col_indexer]},
@@ -10060,7 +10073,7 @@ def corr(
1006010073
f"'{method}' was supplied"
1006110074
)
1006210075

10063-
result = self._constructor(correl, index=idx, columns=cols)
10076+
result = self._constructor(correl, index=idx, columns=cols, copy=False)
1006410077
return result.__finalize__(self, method="corr")
1006510078

1006610079
def cov(
@@ -10191,7 +10204,7 @@ def cov(
1019110204
else:
1019210205
base_cov = libalgos.nancorr(mat, cov=True, minp=min_periods)
1019310206

10194-
result = self._constructor(base_cov, index=idx, columns=cols)
10207+
result = self._constructor(base_cov, index=idx, columns=cols, copy=False)
1019510208
return result.__finalize__(self, method="cov")
1019610209

1019710210
def corrwith(
@@ -10304,7 +10317,9 @@ def c(x):
1030410317
return nanops.nancorr(x[0], x[1], method=method)
1030510318

1030610319
correl = self._constructor_sliced(
10307-
map(c, zip(left.values.T, right.values.T)), index=left.columns
10320+
map(c, zip(left.values.T, right.values.T)),
10321+
index=left.columns,
10322+
copy=False,
1030810323
)
1030910324

1031010325
else:
@@ -10415,7 +10430,7 @@ def count(self, axis: Axis = 0, numeric_only: bool = False):
1041510430
series_counts = notna(frame).sum(axis=axis)
1041610431
counts = series_counts._values
1041710432
result = self._constructor_sliced(
10418-
counts, index=frame._get_agg_axis(axis)
10433+
counts, index=frame._get_agg_axis(axis), copy=False
1041910434
)
1042010435

1042110436
return result.astype("int64").__finalize__(self, method="count")
@@ -10524,7 +10539,7 @@ def _reduce_axis1(self, name: str, func, skipna: bool) -> Series:
1052410539
middle = func(arr, axis=0, skipna=skipna)
1052510540
result = ufunc(result, middle)
1052610541

10527-
res_ser = self._constructor_sliced(result, index=self.index)
10542+
res_ser = self._constructor_sliced(result, index=self.index, copy=False)
1052810543
return res_ser
1052910544

1053010545
def nunique(self, axis: Axis = 0, dropna: bool = True) -> Series:
@@ -11206,6 +11221,7 @@ def isin(self, values: Series | DataFrame | Sequence | Mapping) -> DataFrame:
1120611221
).reshape(self.shape),
1120711222
self.index,
1120811223
self.columns,
11224+
copy=False,
1120911225
)
1121011226
return result.__finalize__(self, method="isin")
1121111227

pandas/core/generic.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -779,6 +779,8 @@ def swapaxes(
779779
return self._constructor(
780780
new_values,
781781
*new_axes,
782+
# The no-copy case for CoW is handled above
783+
copy=False,
782784
).__finalize__(self, method="swapaxes")
783785

784786
@final
@@ -9629,7 +9631,7 @@ def _where(
96299631
cond = np.asanyarray(cond)
96309632
if cond.shape != self.shape:
96319633
raise ValueError("Array conditional must be same shape as self")
9632-
cond = self._constructor(cond, **self._construct_axes_dict())
9634+
cond = self._constructor(cond, **self._construct_axes_dict(), copy=False)
96339635

96349636
# make sure we are boolean
96359637
fill_value = bool(inplace)
@@ -9704,7 +9706,9 @@ def _where(
97049706

97059707
# we are the same shape, so create an actual object for alignment
97069708
else:
9707-
other = self._constructor(other, **self._construct_axes_dict())
9709+
other = self._constructor(
9710+
other, **self._construct_axes_dict(), copy=False
9711+
)
97089712

97099713
if axis is None:
97109714
axis = 0

pandas/core/series.py

+21-17
Original file line numberDiff line numberDiff line change
@@ -840,7 +840,7 @@ def view(self, dtype: Dtype | None = None) -> Series:
840840
# self.array instead of self._values so we piggyback on PandasArray
841841
# implementation
842842
res_values = self.array.view(dtype)
843-
res_ser = self._constructor(res_values, index=self.index)
843+
res_ser = self._constructor(res_values, index=self.index, copy=False)
844844
if isinstance(res_ser._mgr, SingleBlockManager) and using_copy_on_write():
845845
blk = res_ser._mgr._block
846846
blk.refs = cast("BlockValuesRefs", self._references)
@@ -1073,7 +1073,7 @@ def _get_values_tuple(self, key: tuple):
10731073

10741074
# If key is contained, would have returned by now
10751075
indexer, new_index = self.index.get_loc_level(key)
1076-
new_ser = self._constructor(self._values[indexer], index=new_index)
1076+
new_ser = self._constructor(self._values[indexer], index=new_index, copy=False)
10771077
if using_copy_on_write() and isinstance(indexer, slice):
10781078
new_ser._mgr.add_references(self._mgr) # type: ignore[arg-type]
10791079
return new_ser.__finalize__(self)
@@ -1113,7 +1113,9 @@ def _get_value(self, label, takeable: bool = False):
11131113

11141114
new_index = mi[loc]
11151115
new_index = maybe_droplevels(new_index, label)
1116-
new_ser = self._constructor(new_values, index=new_index, name=self.name)
1116+
new_ser = self._constructor(
1117+
new_values, index=new_index, name=self.name, copy=False
1118+
)
11171119
if using_copy_on_write() and isinstance(loc, slice):
11181120
new_ser._mgr.add_references(self._mgr) # type: ignore[arg-type]
11191121
return new_ser.__finalize__(self)
@@ -1413,7 +1415,7 @@ def repeat(self, repeats: int | Sequence[int], axis: None = None) -> Series:
14131415
nv.validate_repeat((), {"axis": axis})
14141416
new_index = self.index.repeat(repeats)
14151417
new_values = self._values.repeat(repeats)
1416-
return self._constructor(new_values, index=new_index).__finalize__(
1418+
return self._constructor(new_values, index=new_index, copy=False).__finalize__(
14171419
self, method="repeat"
14181420
)
14191421

@@ -1579,7 +1581,7 @@ def reset_index(
15791581
self.index = new_index
15801582
else:
15811583
return self._constructor(
1582-
self._values.copy(), index=new_index
1584+
self._values.copy(), index=new_index, copy=False
15831585
).__finalize__(self, method="reset_index")
15841586
elif inplace:
15851587
raise TypeError(
@@ -2101,7 +2103,7 @@ def mode(self, dropna: bool = True) -> Series:
21012103

21022104
# Ensure index is type stable (should always use int index)
21032105
return self._constructor(
2104-
res_values, index=range(len(res_values)), name=self.name
2106+
res_values, index=range(len(res_values)), name=self.name, copy=False
21052107
)
21062108

21072109
def unique(self) -> ArrayLike: # pylint: disable=useless-parent-delegation
@@ -2365,7 +2367,7 @@ def duplicated(self, keep: DropKeep = "first") -> Series:
23652367
dtype: bool
23662368
"""
23672369
res = self._duplicated(keep=keep)
2368-
result = self._constructor(res, index=self.index)
2370+
result = self._constructor(res, index=self.index, copy=False)
23692371
return result.__finalize__(self, method="duplicated")
23702372

23712373
def idxmin(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashable:
@@ -2543,7 +2545,7 @@ def round(self, decimals: int = 0, *args, **kwargs) -> Series:
25432545
"""
25442546
nv.validate_round(args, kwargs)
25452547
result = self._values.round(decimals)
2546-
result = self._constructor(result, index=self.index).__finalize__(
2548+
result = self._constructor(result, index=self.index, copy=False).__finalize__(
25472549
self, method="round"
25482550
)
25492551

@@ -2844,7 +2846,7 @@ def diff(self, periods: int = 1) -> Series:
28442846
{examples}
28452847
"""
28462848
result = algorithms.diff(self._values, periods)
2847-
return self._constructor(result, index=self.index).__finalize__(
2849+
return self._constructor(result, index=self.index, copy=False).__finalize__(
28482850
self, method="diff"
28492851
)
28502852

@@ -2962,7 +2964,7 @@ def dot(self, other: AnyArrayLike) -> Series | np.ndarray:
29622964

29632965
if isinstance(other, ABCDataFrame):
29642966
return self._constructor(
2965-
np.dot(lvals, rvals), index=other.columns
2967+
np.dot(lvals, rvals), index=other.columns, copy=False
29662968
).__finalize__(self, method="dot")
29672969
elif isinstance(other, Series):
29682970
return np.dot(lvals, rvals)
@@ -3264,7 +3266,7 @@ def combine(
32643266
# try_float=False is to match agg_series
32653267
npvalues = lib.maybe_convert_objects(new_values, try_float=False)
32663268
res_values = maybe_cast_pointwise_result(npvalues, self.dtype, same_dtype=False)
3267-
return self._constructor(res_values, index=new_index, name=new_name)
3269+
return self._constructor(res_values, index=new_index, name=new_name, copy=False)
32683270

32693271
def combine_first(self, other) -> Series:
32703272
"""
@@ -3615,7 +3617,7 @@ def sort_values(
36153617
return self.copy(deep=None)
36163618

36173619
result = self._constructor(
3618-
self._values[sorted_index], index=self.index[sorted_index]
3620+
self._values[sorted_index], index=self.index[sorted_index], copy=False
36193621
)
36203622

36213623
if ignore_index:
@@ -3863,7 +3865,9 @@ def argsort(
38633865
else:
38643866
result = np.argsort(values, kind=kind)
38653867

3866-
res = self._constructor(result, index=self.index, name=self.name, dtype=np.intp)
3868+
res = self._constructor(
3869+
result, index=self.index, name=self.name, dtype=np.intp, copy=False
3870+
)
38673871
return res.__finalize__(self, method="argsort")
38683872

38693873
def nlargest(
@@ -4238,7 +4242,7 @@ def explode(self, ignore_index: bool = False) -> Series:
42384242
else:
42394243
index = self.index.repeat(counts)
42404244

4241-
return self._constructor(values, index=index, name=self.name)
4245+
return self._constructor(values, index=index, name=self.name, copy=False)
42424246

42434247
def unstack(self, level: IndexLabel = -1, fill_value: Hashable = None) -> DataFrame:
42444248
"""
@@ -4369,7 +4373,7 @@ def map(
43694373
dtype: object
43704374
"""
43714375
new_values = self._map_values(arg, na_action=na_action)
4372-
return self._constructor(new_values, index=self.index).__finalize__(
4376+
return self._constructor(new_values, index=self.index, copy=False).__finalize__(
43734377
self, method="map"
43744378
)
43754379

@@ -4663,7 +4667,7 @@ def _reindex_indexer(
46634667
new_values = algorithms.take_nd(
46644668
self._values, indexer, allow_fill=True, fill_value=None
46654669
)
4666-
return self._constructor(new_values, index=new_index)
4670+
return self._constructor(new_values, index=new_index, copy=False)
46674671

46684672
def _needs_reindex_multi(self, axes, method, level) -> bool:
46694673
"""
@@ -5378,7 +5382,7 @@ def isin(self, values) -> Series:
53785382
dtype: bool
53795383
"""
53805384
result = algorithms.isin(self._values, values)
5381-
return self._constructor(result, index=self.index).__finalize__(
5385+
return self._constructor(result, index=self.index, copy=False).__finalize__(
53825386
self, method="isin"
53835387
)
53845388

0 commit comments

Comments
 (0)