Skip to content

Commit 3f05c4f

Browse files
authored
CoW: Deprecate copy keyword from first set of methods (#57347)
* CoW: Remove a few copy=False statements * Cow: Deprecate copy keyword from first set of methods * Fixup * Update * Update * Update
1 parent 87dd2ee commit 3f05c4f

23 files changed

+176
-88
lines changed

doc/source/whatsnew/v3.0.0.rst

+23
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,29 @@ Other API changes
102102

103103
Deprecations
104104
~~~~~~~~~~~~
105+
106+
Copy keyword
107+
^^^^^^^^^^^^
108+
109+
The ``copy`` keyword argument in the following methods is deprecated and
110+
will be removed in a future version:
111+
112+
- :meth:`DataFrame.truncate` / :meth:`Series.truncate`
113+
- :meth:`DataFrame.tz_convert` / :meth:`Series.tz_convert`
114+
- :meth:`DataFrame.tz_localize` / :meth:`Series.tz_localize`
115+
- :meth:`DataFrame.infer_objects` / :meth:`Series.infer_objects`
116+
- :meth:`DataFrame.align` / :meth:`Series.align`
117+
- :meth:`DataFrame.astype` / :meth:`Series.astype`
118+
- :meth:`DataFrame.reindex` / :meth:`Series.reindex`
119+
- :meth:`DataFrame.reindex_like` / :meth:`Series.reindex_like`
120+
121+
Copy-on-Write utilizes a lazy copy mechanism that defers copying the data until
122+
necessary. Use ``.copy`` to trigger an eager copy. The copy keyword has no effect
123+
starting with 3.0, so it can be safely removed from your code.
124+
125+
Other Deprecations
126+
^^^^^^^^^^^^^^^^^^
127+
105128
- Deprecated :meth:`Timestamp.utcfromtimestamp`, use ``Timestamp.fromtimestamp(ts, "UTC")`` instead (:issue:`56680`)
106129
- Deprecated :meth:`Timestamp.utcnow`, use ``Timestamp.now("UTC")`` instead (:issue:`56680`)
107130
- Deprecated allowing non-keyword arguments in :meth:`Series.to_markdown` except ``buf``. (:issue:`57280`)

pandas/core/frame.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -5070,7 +5070,7 @@ def reindex(
50705070
columns=None,
50715071
axis: Axis | None = None,
50725072
method: ReindexMethod | None = None,
5073-
copy: bool | None = None,
5073+
copy: bool | lib.NoDefault = lib.no_default,
50745074
level: Level | None = None,
50755075
fill_value: Scalar | None = np.nan,
50765076
limit: int | None = None,
@@ -5086,6 +5086,7 @@ def reindex(
50865086
fill_value=fill_value,
50875087
limit=limit,
50885088
tolerance=tolerance,
5089+
copy=copy,
50895090
)
50905091

50915092
@overload

pandas/core/generic.py

+62-17
Original file line numberDiff line numberDiff line change
@@ -4251,12 +4251,24 @@ def _is_view(self) -> bool:
42514251
"""Return boolean indicating if self is view of another array"""
42524252
return self._mgr.is_view
42534253

4254+
@staticmethod
4255+
def _check_copy_deprecation(copy):
4256+
if copy is not lib.no_default:
4257+
warnings.warn(
4258+
"The copy keyword is deprecated and will be removed in a future "
4259+
"version. Copy-on-Write is active in pandas since 3.0 which utilizes "
4260+
"a lazy copy mechanism that defers copies until necessary. Use "
4261+
".copy() to make an eager copy if necessary.",
4262+
DeprecationWarning,
4263+
stacklevel=find_stack_level(),
4264+
)
4265+
42544266
@final
42554267
def reindex_like(
42564268
self,
42574269
other,
42584270
method: Literal["backfill", "bfill", "pad", "ffill", "nearest"] | None = None,
4259-
copy: bool | None = None,
4271+
copy: bool | lib.NoDefault = lib.no_default,
42604272
limit: int | None = None,
42614273
tolerance=None,
42624274
) -> Self:
@@ -4284,7 +4296,7 @@ def reindex_like(
42844296
* backfill / bfill: use next valid observation to fill gap
42854297
* nearest: use nearest valid observations to fill gap.
42864298
4287-
copy : bool, default True
4299+
copy : bool, default False
42884300
Return a new object, even if the passed indexes are the same.
42894301
42904302
.. note::
@@ -4298,6 +4310,8 @@ def reindex_like(
42984310
42994311
You can already get the future behavior and improvements through
43004312
enabling copy on write ``pd.options.mode.copy_on_write = True``
4313+
4314+
.. deprecated:: 3.0.0
43014315
limit : int, default None
43024316
Maximum number of consecutive labels to fill for inexact matches.
43034317
tolerance : optional
@@ -4366,6 +4380,7 @@ def reindex_like(
43664380
2014-02-14 NaN NaN NaN
43674381
2014-02-15 35.1 NaN medium
43684382
"""
4383+
self._check_copy_deprecation(copy)
43694384
d = other._construct_axes_dict(
43704385
axes=self._AXIS_ORDERS,
43714386
method=method,
@@ -5011,7 +5026,7 @@ def reindex(
50115026
columns=None,
50125027
axis: Axis | None = None,
50135028
method: ReindexMethod | None = None,
5014-
copy: bool | None = None,
5029+
copy: bool | lib.NoDefault = lib.no_default,
50155030
level: Level | None = None,
50165031
fill_value: Scalar | None = np.nan,
50175032
limit: int | None = None,
@@ -5038,7 +5053,7 @@ def reindex(
50385053
* backfill / bfill: Use next valid observation to fill gap.
50395054
* nearest: Use nearest valid observations to fill gap.
50405055
5041-
copy : bool, default True
5056+
copy : bool, default False
50425057
Return a new object, even if the passed indexes are the same.
50435058
50445059
.. note::
@@ -5052,6 +5067,8 @@ def reindex(
50525067
50535068
You can already get the future behavior and improvements through
50545069
enabling copy on write ``pd.options.mode.copy_on_write = True``
5070+
5071+
.. deprecated:: 3.0.0
50555072
level : int or name
50565073
Broadcast across a level, matching Index values on the
50575074
passed MultiIndex level.
@@ -5229,6 +5246,7 @@ def reindex(
52295246
"""
52305247
# TODO: Decide if we care about having different examples for different
52315248
# kinds
5249+
self._check_copy_deprecation(copy)
52325250

52335251
if index is not None and columns is not None and labels is not None:
52345252
raise TypeError("Cannot specify all of 'labels', 'index', 'columns'.")
@@ -6136,7 +6154,10 @@ def dtypes(self):
61366154

61376155
@final
61386156
def astype(
6139-
self, dtype, copy: bool | None = None, errors: IgnoreRaise = "raise"
6157+
self,
6158+
dtype,
6159+
copy: bool | lib.NoDefault = lib.no_default,
6160+
errors: IgnoreRaise = "raise",
61406161
) -> Self:
61416162
"""
61426163
Cast a pandas object to a specified dtype ``dtype``.
@@ -6149,7 +6170,7 @@ def astype(
61496170
mapping, e.g. {col: dtype, ...}, where col is a column label and dtype is
61506171
a numpy.dtype or Python type to cast one or more of the DataFrame's
61516172
columns to column-specific types.
6152-
copy : bool, default True
6173+
copy : bool, default False
61536174
Return a copy when ``copy=True`` (be very careful setting
61546175
``copy=False`` as changes to values then may propagate to other
61556176
pandas objects).
@@ -6165,6 +6186,8 @@ def astype(
61656186
61666187
You can already get the future behavior and improvements through
61676188
enabling copy on write ``pd.options.mode.copy_on_write = True``
6189+
6190+
.. deprecated:: 3.0.0
61686191
errors : {'raise', 'ignore'}, default 'raise'
61696192
Control raising of exceptions on invalid data for provided dtype.
61706193
@@ -6254,6 +6277,7 @@ def astype(
62546277
2 2020-01-03
62556278
dtype: datetime64[ns]
62566279
"""
6280+
self._check_copy_deprecation(copy)
62576281
if is_dict_like(dtype):
62586282
if self.ndim == 1: # i.e. Series
62596283
if len(dtype) > 1 or self.name not in dtype:
@@ -6481,7 +6505,7 @@ def __deepcopy__(self, memo=None) -> Self:
64816505
return self.copy(deep=True)
64826506

64836507
@final
6484-
def infer_objects(self, copy: bool | None = None) -> Self:
6508+
def infer_objects(self, copy: bool | lib.NoDefault = lib.no_default) -> Self:
64856509
"""
64866510
Attempt to infer better dtypes for object columns.
64876511
@@ -6492,7 +6516,7 @@ def infer_objects(self, copy: bool | None = None) -> Self:
64926516
64936517
Parameters
64946518
----------
6495-
copy : bool, default True
6519+
copy : bool, default False
64966520
Whether to make a copy for non-object or non-inferable columns
64976521
or Series.
64986522
@@ -6508,6 +6532,8 @@ def infer_objects(self, copy: bool | None = None) -> Self:
65086532
You can already get the future behavior and improvements through
65096533
enabling copy on write ``pd.options.mode.copy_on_write = True``
65106534
6535+
.. deprecated:: 3.0.0
6536+
65116537
Returns
65126538
-------
65136539
same type as input object
@@ -6537,6 +6563,7 @@ def infer_objects(self, copy: bool | None = None) -> Self:
65376563
A int64
65386564
dtype: object
65396565
"""
6566+
self._check_copy_deprecation(copy)
65406567
new_mgr = self._mgr.convert()
65416568
res = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes)
65426569
return res.__finalize__(self, method="infer_objects")
@@ -9404,7 +9431,7 @@ def align(
94049431
join: AlignJoin = "outer",
94059432
axis: Axis | None = None,
94069433
level: Level | None = None,
9407-
copy: bool | None = None,
9434+
copy: bool | lib.NoDefault = lib.no_default,
94089435
fill_value: Hashable | None = None,
94099436
) -> tuple[Self, NDFrameT]:
94109437
"""
@@ -9429,7 +9456,7 @@ def align(
94299456
level : int or level name, default None
94309457
Broadcast across a level, matching Index values on the
94319458
passed MultiIndex level.
9432-
copy : bool, default True
9459+
copy : bool, default False
94339460
Always returns new objects. If copy=False and no reindexing is
94349461
required then original objects are returned.
94359462
@@ -9444,6 +9471,8 @@ def align(
94449471
94459472
You can already get the future behavior and improvements through
94469473
enabling copy on write ``pd.options.mode.copy_on_write = True``
9474+
9475+
.. deprecated:: 3.0.0
94479476
fill_value : scalar, default np.nan
94489477
Value to use for missing values. Defaults to NaN, but can be any
94499478
"compatible" value.
@@ -9518,6 +9547,8 @@ def align(
95189547
3 60.0 70.0 80.0 90.0 NaN
95199548
4 600.0 700.0 800.0 900.0 NaN
95209549
"""
9550+
self._check_copy_deprecation(copy)
9551+
95219552
_right: DataFrame | Series
95229553
if axis is not None:
95239554
axis = self._get_axis_number(axis)
@@ -10336,7 +10367,7 @@ def truncate(
1033610367
before=None,
1033710368
after=None,
1033810369
axis: Axis | None = None,
10339-
copy: bool | None = None,
10370+
copy: bool | lib.NoDefault = lib.no_default,
1034010371
) -> Self:
1034110372
"""
1034210373
Truncate a Series or DataFrame before and after some index value.
@@ -10353,7 +10384,7 @@ def truncate(
1035310384
axis : {0 or 'index', 1 or 'columns'}, optional
1035410385
Axis to truncate. Truncates the index (rows) by default.
1035510386
For `Series` this parameter is unused and defaults to 0.
10356-
copy : bool, default is True,
10387+
copy : bool, default is False,
1035710388
Return a copy of the truncated section.
1035810389
1035910390
.. note::
@@ -10368,6 +10399,8 @@ def truncate(
1036810399
You can already get the future behavior and improvements through
1036910400
enabling copy on write ``pd.options.mode.copy_on_write = True``
1037010401
10402+
.. deprecated:: 3.0.0
10403+
1037110404
Returns
1037210405
-------
1037310406
type of caller
@@ -10473,6 +10506,8 @@ def truncate(
1047310506
2016-01-10 23:59:58 1
1047410507
2016-01-10 23:59:59 1
1047510508
"""
10509+
self._check_copy_deprecation(copy)
10510+
1047610511
if axis is None:
1047710512
axis = 0
1047810513
axis = self._get_axis_number(axis)
@@ -10511,7 +10546,11 @@ def truncate(
1051110546
@final
1051210547
@doc(klass=_shared_doc_kwargs["klass"])
1051310548
def tz_convert(
10514-
self, tz, axis: Axis = 0, level=None, copy: bool | None = None
10549+
self,
10550+
tz,
10551+
axis: Axis = 0,
10552+
level=None,
10553+
copy: bool | lib.NoDefault = lib.no_default,
1051510554
) -> Self:
1051610555
"""
1051710556
Convert tz-aware axis to target time zone.
@@ -10526,7 +10565,7 @@ def tz_convert(
1052610565
level : int, str, default None
1052710566
If axis is a MultiIndex, convert a specific level. Otherwise
1052810567
must be None.
10529-
copy : bool, default True
10568+
copy : bool, default False
1053010569
Also make a copy of the underlying data.
1053110570
1053210571
.. note::
@@ -10541,6 +10580,8 @@ def tz_convert(
1054110580
You can already get the future behavior and improvements through
1054210581
enabling copy on write ``pd.options.mode.copy_on_write = True``
1054310582
10583+
.. deprecated:: 3.0.0
10584+
1054410585
Returns
1054510586
-------
1054610587
{klass}
@@ -10570,6 +10611,7 @@ def tz_convert(
1057010611
2018-09-14 23:30:00 1
1057110612
dtype: int64
1057210613
"""
10614+
self._check_copy_deprecation(copy)
1057310615
axis = self._get_axis_number(axis)
1057410616
ax = self._get_axis(axis)
1057510617

@@ -10607,7 +10649,7 @@ def tz_localize(
1060710649
tz,
1060810650
axis: Axis = 0,
1060910651
level=None,
10610-
copy: bool | None = None,
10652+
copy: bool | lib.NoDefault = lib.no_default,
1061110653
ambiguous: TimeAmbiguous = "raise",
1061210654
nonexistent: TimeNonexistent = "raise",
1061310655
) -> Self:
@@ -10627,7 +10669,7 @@ def tz_localize(
1062710669
level : int, str, default None
1062810670
If axis ia a MultiIndex, localize a specific level. Otherwise
1062910671
must be None.
10630-
copy : bool, default True
10672+
copy : bool, default False
1063110673
Also make a copy of the underlying data.
1063210674
1063310675
.. note::
@@ -10641,6 +10683,8 @@ def tz_localize(
1064110683
1064210684
You can already get the future behavior and improvements through
1064310685
enabling copy on write ``pd.options.mode.copy_on_write = True``
10686+
10687+
.. deprecated:: 3.0.0
1064410688
ambiguous : 'infer', bool, bool-ndarray, 'NaT', default 'raise'
1064510689
When clocks moved backward due to DST, ambiguous times may arise.
1064610690
For example in Central European Time (UTC+01), when going from
@@ -10766,6 +10810,7 @@ def tz_localize(
1076610810
2015-03-29 03:30:00+02:00 1
1076710811
dtype: int64
1076810812
"""
10813+
self._check_copy_deprecation(copy)
1076910814
nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward")
1077010815
if nonexistent not in nonexistent_options and not isinstance(
1077110816
nonexistent, dt.timedelta
@@ -11720,7 +11765,7 @@ def _inplace_method(self, other, op) -> Self:
1172011765

1172111766
# this makes sure that we are aligned like the input
1172211767
# we are updating inplace
11723-
self._update_inplace(result.reindex_like(self, copy=False))
11768+
self._update_inplace(result.reindex_like(self))
1172411769
return self
1172511770

1172611771
@final

pandas/core/reshape/encoding.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -499,7 +499,7 @@ def from_dummies(
499499

500500
# index data with a list of all columns that are dummies
501501
try:
502-
data_to_decode = data.astype("boolean", copy=False)
502+
data_to_decode = data.astype("boolean")
503503
except TypeError as err:
504504
raise TypeError("Passed DataFrame contains non-dummy data") from err
505505

pandas/core/series.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -4844,7 +4844,7 @@ def reindex( # type: ignore[override]
48444844
*,
48454845
axis: Axis | None = None,
48464846
method: ReindexMethod | None = None,
4847-
copy: bool | None = None,
4847+
copy: bool | lib.NoDefault = lib.no_default,
48484848
level: Level | None = None,
48494849
fill_value: Scalar | None = None,
48504850
limit: int | None = None,
@@ -4857,6 +4857,7 @@ def reindex( # type: ignore[override]
48574857
fill_value=fill_value,
48584858
limit=limit,
48594859
tolerance=tolerance,
4860+
copy=copy,
48604861
)
48614862

48624863
@overload # type: ignore[override]

pandas/core/tools/datetimes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1148,7 +1148,7 @@ def coerce(values):
11481148

11491149
# prevent overflow in case of int8 or int16
11501150
if is_integer_dtype(values.dtype):
1151-
values = values.astype("int64", copy=False)
1151+
values = values.astype("int64")
11521152
return values
11531153

11541154
values = (

0 commit comments

Comments
 (0)