Skip to content

Commit 9e2b9a0

Browse files
authored
CoW: Remove copy-keywords as far as possible (#57327)
* CoW: Remove copy-keywords as far as possible * Update
1 parent 4d0068e commit 9e2b9a0

File tree

10 files changed

+85
-188
lines changed

10 files changed

+85
-188
lines changed

pandas/core/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ def _obj_with_exclusions(self):
218218
return self.obj
219219

220220
if self._selection is not None:
221-
return self.obj._getitem_nocopy(self._selection_list)
221+
return self.obj[self._selection_list]
222222

223223
if len(self.exclusions) > 0:
224224
# equivalent to `self.obj.drop(self.exclusions, axis=1)

pandas/core/frame.py

+28-59
Original file line numberDiff line numberDiff line change
@@ -1676,8 +1676,8 @@ def dot(self, other: AnyArrayLike | DataFrame) -> DataFrame | Series:
16761676
if len(common) > len(self.columns) or len(common) > len(other.index):
16771677
raise ValueError("matrices are not aligned")
16781678

1679-
left = self.reindex(columns=common, copy=False)
1680-
right = other.reindex(index=common, copy=False)
1679+
left = self.reindex(columns=common)
1680+
right = other.reindex(index=common)
16811681
lvals = left.values
16821682
rvals = right._values
16831683
else:
@@ -3800,27 +3800,6 @@ def _iter_column_arrays(self) -> Iterator[ArrayLike]:
38003800
for i in range(len(self.columns)):
38013801
yield self._get_column_array(i)
38023802

3803-
def _getitem_nocopy(self, key: list):
3804-
"""
3805-
Behaves like __getitem__, but returns a view in cases where __getitem__
3806-
would make a copy.
3807-
"""
3808-
# TODO(CoW): can be removed if/when we are always Copy-on-Write
3809-
indexer = self.columns._get_indexer_strict(key, "columns")[1]
3810-
new_axis = self.columns[indexer]
3811-
3812-
new_mgr = self._mgr.reindex_indexer(
3813-
new_axis,
3814-
indexer,
3815-
axis=0,
3816-
allow_dups=True,
3817-
copy=False,
3818-
only_slice=True,
3819-
)
3820-
result = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes)
3821-
result = result.__finalize__(self)
3822-
return result
3823-
38243803
def __getitem__(self, key):
38253804
check_dict_or_set_indexers(key)
38263805
key = lib.item_from_zerodim(key)
@@ -3911,7 +3890,7 @@ def _getitem_bool_array(self, key):
39113890
key = check_bool_indexer(self.index, key)
39123891

39133892
if key.all():
3914-
return self.copy(deep=None)
3893+
return self.copy(deep=False)
39153894

39163895
indexer = key.nonzero()[0]
39173896
return self.take(indexer, axis=0)
@@ -4774,7 +4753,7 @@ def predicate(arr: ArrayLike) -> bool:
47744753

47754754
return True
47764755

4777-
mgr = self._mgr._get_data_subset(predicate).copy(deep=None)
4756+
mgr = self._mgr._get_data_subset(predicate).copy(deep=False)
47784757
return self._constructor_from_mgr(mgr, axes=mgr.axes).__finalize__(self)
47794758

47804759
def insert(
@@ -4919,7 +4898,7 @@ def assign(self, **kwargs) -> DataFrame:
49194898
Portland 17.0 62.6 290.15
49204899
Berkeley 25.0 77.0 298.15
49214900
"""
4922-
data = self.copy(deep=None)
4901+
data = self.copy(deep=False)
49234902

49244903
for k, v in kwargs.items():
49254904
data[k] = com.apply_if_callable(v, data)
@@ -4996,7 +4975,6 @@ def _reindex_multi(self, axes: dict[str, Index], fill_value) -> DataFrame:
49964975
else:
49974976
return self._reindex_with_indexers(
49984977
{0: [new_index, row_indexer], 1: [new_columns, col_indexer]},
4999-
copy=False,
50004978
fill_value=fill_value,
50014979
)
50024980

@@ -5038,7 +5016,7 @@ def set_axis(
50385016
axis: Axis = 0,
50395017
copy: bool | None = None,
50405018
) -> DataFrame:
5041-
return super().set_axis(labels, axis=axis, copy=copy)
5019+
return super().set_axis(labels, axis=axis)
50425020

50435021
@doc(
50445022
NDFrame.reindex,
@@ -5065,7 +5043,6 @@ def reindex(
50655043
columns=columns,
50665044
axis=axis,
50675045
method=method,
5068-
copy=copy,
50695046
level=level,
50705047
fill_value=fill_value,
50715048
limit=limit,
@@ -5463,7 +5440,6 @@ def rename(
54635440
index=index,
54645441
columns=columns,
54655442
axis=axis,
5466-
copy=copy,
54675443
inplace=inplace,
54685444
level=level,
54695445
errors=errors,
@@ -5534,7 +5510,7 @@ def _replace_columnwise(
55345510
DataFrame or None
55355511
"""
55365512
# Operate column-wise
5537-
res = self if inplace else self.copy(deep=None)
5513+
res = self if inplace else self.copy(deep=False)
55385514
ax = self.columns
55395515

55405516
for i, ax_value in enumerate(ax):
@@ -5823,8 +5799,7 @@ def set_index(
58235799
if inplace:
58245800
frame = self
58255801
else:
5826-
# GH 49473 Use "lazy copy" with Copy-on-Write
5827-
frame = self.copy(deep=None)
5802+
frame = self.copy(deep=False)
58285803

58295804
arrays: list[Index] = []
58305805
names: list[Hashable] = []
@@ -6114,7 +6089,7 @@ class max type
61146089
if inplace:
61156090
new_obj = self
61166091
else:
6117-
new_obj = self.copy(deep=None)
6092+
new_obj = self.copy(deep=False)
61186093
if allow_duplicates is not lib.no_default:
61196094
allow_duplicates = validate_bool_kwarg(allow_duplicates, "allow_duplicates")
61206095

@@ -6386,7 +6361,7 @@ def dropna(
63866361
raise ValueError(f"invalid how option: {how}")
63876362

63886363
if np.all(mask):
6389-
result = self.copy(deep=None)
6364+
result = self.copy(deep=False)
63906365
else:
63916366
result = self.loc(axis=axis)[mask]
63926367

@@ -6515,7 +6490,7 @@ def drop_duplicates(
65156490
4 Indomie pack 5.0
65166491
"""
65176492
if self.empty:
6518-
return self.copy(deep=None)
6493+
return self.copy(deep=False)
65196494

65206495
inplace = validate_bool_kwarg(inplace, "inplace")
65216496
ignore_index = validate_bool_kwarg(ignore_index, "ignore_index")
@@ -6631,7 +6606,7 @@ def duplicated(
66316606

66326607
def f(vals) -> tuple[np.ndarray, int]:
66336608
labels, shape = algorithms.factorize(vals, size_hint=len(self))
6634-
return labels.astype("i8", copy=False), len(shape)
6609+
return labels.astype("i8"), len(shape)
66356610

66366611
if subset is None:
66376612
# https://github.com/pandas-dev/pandas/issues/28770
@@ -6914,7 +6889,7 @@ def sort_values(
69146889
if inplace:
69156890
return self._update_inplace(self)
69166891
else:
6917-
return self.copy(deep=None)
6892+
return self.copy(deep=False)
69186893

69196894
if is_range_indexer(indexer, len(indexer)):
69206895
result = self.copy(deep=False)
@@ -7570,7 +7545,7 @@ def nsmallest(
75707545
),
75717546
)
75727547
def swaplevel(self, i: Axis = -2, j: Axis = -1, axis: Axis = 0) -> DataFrame:
7573-
result = self.copy(deep=None)
7548+
result = self.copy(deep=False)
75747549

75757550
axis = self._get_axis_number(axis)
75767551

@@ -7630,7 +7605,7 @@ class diet
76307605
if not isinstance(self._get_axis(axis), MultiIndex): # pragma: no cover
76317606
raise TypeError("Can only reorder levels on a hierarchical axis.")
76327607

7633-
result = self.copy(deep=None)
7608+
result = self.copy(deep=False)
76347609

76357610
if axis == 0:
76367611
assert isinstance(result.index, MultiIndex)
@@ -7933,9 +7908,7 @@ def to_series(right):
79337908
if flex is not None and isinstance(right, DataFrame):
79347909
if not left._indexed_same(right):
79357910
if flex:
7936-
left, right = left.align(
7937-
right, join="outer", level=level, copy=False
7938-
)
7911+
left, right = left.align(right, join="outer", level=level)
79397912
else:
79407913
raise ValueError(
79417914
"Can only compare identically-labeled (both index and columns) "
@@ -7948,7 +7921,7 @@ def to_series(right):
79487921
if not left.axes[axis].equals(right.index):
79497922
raise ValueError(
79507923
"Operands are not aligned. Do "
7951-
"`left, right = left.align(right, axis=1, copy=False)` "
7924+
"`left, right = left.align(right, axis=1)` "
79527925
"before operating."
79537926
)
79547927

@@ -7957,7 +7930,6 @@ def to_series(right):
79577930
join="outer",
79587931
axis=axis,
79597932
level=level,
7960-
copy=False,
79617933
)
79627934
right = left._maybe_align_series_as_frame(right, axis)
79637935

@@ -8467,7 +8439,7 @@ def combine(
84678439
"""
84688440
other_idxlen = len(other.index) # save for compare
84698441

8470-
this, other = self.align(other, copy=False)
8442+
this, other = self.align(other)
84718443
new_index = this.index
84728444

84738445
if other.empty and len(new_index) == len(self.index):
@@ -8507,15 +8479,15 @@ def combine(
85078479
# try to promote series, which is all NaN, as other_dtype.
85088480
new_dtype = other_dtype
85098481
try:
8510-
series = series.astype(new_dtype, copy=False)
8482+
series = series.astype(new_dtype)
85118483
except ValueError:
85128484
# e.g. new_dtype is integer types
85138485
pass
85148486
else:
85158487
# if we have different dtypes, possibly promote
85168488
new_dtype = find_common_type([this_dtype, other_dtype])
8517-
series = series.astype(new_dtype, copy=False)
8518-
other_series = other_series.astype(new_dtype, copy=False)
8489+
series = series.astype(new_dtype)
8490+
other_series = other_series.astype(new_dtype)
85198491

85208492
arr = func(series, other_series)
85218493
if isinstance(new_dtype, np.dtype):
@@ -9567,7 +9539,7 @@ def explode(
95679539
result.index = default_index(len(result))
95689540
else:
95699541
result.index = self.index.take(result.index)
9570-
result = result.reindex(columns=self.columns, copy=False)
9542+
result = result.reindex(columns=self.columns)
95719543

95729544
return result.__finalize__(self, method="explode")
95739545

@@ -10263,9 +10235,7 @@ def _append(
1026310235
row_df = other.to_frame().T
1026410236
# infer_objects is needed for
1026510237
# test_append_empty_frame_to_series_with_dateutil_tz
10266-
other = row_df.infer_objects(copy=False).rename_axis(
10267-
index.names, copy=False
10268-
)
10238+
other = row_df.infer_objects().rename_axis(index.names)
1026910239
elif isinstance(other, list):
1027010240
if not other:
1027110241
pass
@@ -10509,7 +10479,7 @@ def join(
1050910479
res = concat(
1051010480
frames, axis=1, join="outer", verify_integrity=True, sort=sort
1051110481
)
10512-
return res.reindex(self.index, copy=False)
10482+
return res.reindex(self.index)
1051310483
else:
1051410484
return concat(
1051510485
frames, axis=1, join=how, verify_integrity=True, sort=sort
@@ -10559,7 +10529,6 @@ def merge(
1055910529
right_index=right_index,
1056010530
sort=sort,
1056110531
suffixes=suffixes,
10562-
copy=copy,
1056310532
indicator=indicator,
1056410533
validate=validate,
1056510534
)
@@ -11024,7 +10993,7 @@ def corrwith(
1102410993

1102510994
if numeric_only:
1102610995
other = other._get_numeric_data()
11027-
left, right = this.align(other, join="inner", copy=False)
10996+
left, right = this.align(other, join="inner")
1102810997

1102910998
if axis == 1:
1103010999
left = left.T
@@ -11161,7 +11130,7 @@ def count(self, axis: Axis = 0, numeric_only: bool = False):
1116111130
else:
1116211131
result = notna(frame).sum(axis=axis)
1116311132

11164-
return result.astype("int64", copy=False).__finalize__(self, method="count")
11133+
return result.astype("int64").__finalize__(self, method="count")
1116511134

1116611135
def _reduce(
1116711136
self,
@@ -11225,7 +11194,7 @@ def _get_data() -> DataFrame:
1122511194
if axis is None:
1122611195
dtype = find_common_type([arr.dtype for arr in df._mgr.arrays])
1122711196
if isinstance(dtype, ExtensionDtype):
11228-
df = df.astype(dtype, copy=False)
11197+
df = df.astype(dtype)
1122911198
arr = concat_compat(list(df._iter_column_arrays()))
1123011199
return arr._reduce(name, skipna=skipna, keepdims=False, **kwds)
1123111200
return func(df.values)
@@ -11257,7 +11226,7 @@ def _get_data() -> DataFrame:
1125711226
# be equivalent to transposing the original frame and aggregating
1125811227
# with axis=0.
1125911228
name = {"argmax": "idxmax", "argmin": "idxmin"}.get(name, name)
11260-
df = df.astype(dtype, copy=False)
11229+
df = df.astype(dtype)
1126111230
arr = concat_compat(list(df._iter_column_arrays()))
1126211231
nrows, ncols = df.shape
1126311232
row_index = np.tile(np.arange(nrows), ncols)

0 commit comments

Comments
 (0)