Skip to content

Commit e572098

Browse files
jorisvandenbosschejreback
authored andcommitted
Backport PR #30784: API: DataFrame.take always returns a copy (#31362)
1 parent ada79ce commit e572098

File tree

8 files changed

+82
-35
lines changed

8 files changed

+82
-35
lines changed

doc/source/whatsnew/v1.0.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -753,7 +753,7 @@ Deprecations
753753
- The deprecated internal attributes ``_start``, ``_stop`` and ``_step`` of :class:`RangeIndex` now raise a ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`26581`)
754754
- The ``pandas.util.testing`` module has been deprecated. Use the public API in ``pandas.testing`` documented at :ref:`api.general.testing` (:issue:`16232`).
755755
- ``pandas.SparseArray`` has been deprecated. Use ``pandas.arrays.SparseArray`` (:class:`arrays.SparseArray`) instead. (:issue:`30642`)
756-
- The parameter ``is_copy`` of :meth:`DataFrame.take` has been deprecated and will be removed in a future version. (:issue:`27357`)
756+
- The parameter ``is_copy`` of :meth:`Series.take` and :meth:`DataFrame.take` has been deprecated and will be removed in a future version. (:issue:`27357`)
757757
- Support for multi-dimensional indexing (e.g. ``index[:, None]``) on a :class:`Index` is deprecated and will be removed in a future version, convert to a numpy array before indexing instead (:issue:`30588`)
758758
- The ``pandas.np`` submodule is now deprecated. Import numpy directly instead (:issue:`30296`)
759759
- The ``pandas.datetime`` class is now deprecated. Import from ``datetime`` instead (:issue:`30610`)

pandas/core/frame.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2809,7 +2809,7 @@ def __getitem__(self, key):
28092809
if getattr(indexer, "dtype", None) == bool:
28102810
indexer = np.where(indexer)[0]
28112811

2812-
data = self.take(indexer, axis=1)
2812+
data = self._take_with_is_copy(indexer, axis=1)
28132813

28142814
if is_single_key:
28152815
# What does looking for a single key in a non-unique index return?
@@ -2842,7 +2842,7 @@ def _getitem_bool_array(self, key):
28422842
# be reindexed to match DataFrame rows
28432843
key = check_bool_indexer(self.index, key)
28442844
indexer = key.nonzero()[0]
2845-
return self.take(indexer, axis=0)
2845+
return self._take_with_is_copy(indexer, axis=0)
28462846

28472847
def _getitem_multilevel(self, key):
28482848
# self.columns is a MultiIndex

pandas/core/generic.py

+26-17
Original file line numberDiff line numberDiff line change
@@ -3313,8 +3313,11 @@ def take(
33133313
axis : {0 or 'index', 1 or 'columns', None}, default 0
33143314
The axis on which to select elements. ``0`` means that we are
33153315
selecting rows, ``1`` means that we are selecting columns.
3316-
is_copy : bool, default True
3317-
Whether to return a copy of the original object or not.
3316+
is_copy : bool
3317+
Before pandas 1.0, ``is_copy=False`` can be specified to ensure
3318+
that the return value is an actual copy. Starting with pandas 1.0,
3319+
``take`` always returns a copy, and the keyword is therefore
3320+
deprecated.
33183321
33193322
.. deprecated:: 1.0.0
33203323
**kwargs
@@ -3378,12 +3381,10 @@ class max_speed
33783381
if is_copy is not None:
33793382
warnings.warn(
33803383
"is_copy is deprecated and will be removed in a future version. "
3381-
"take will always return a copy in the future.",
3384+
"'take' always returns a copy, so there is no need to specify this.",
33823385
FutureWarning,
33833386
stacklevel=2,
33843387
)
3385-
else:
3386-
is_copy = True
33873388

33883389
nv.validate_take(tuple(), kwargs)
33893390

@@ -3392,13 +3393,22 @@ class max_speed
33923393
new_data = self._data.take(
33933394
indices, axis=self._get_block_manager_axis(axis), verify=True
33943395
)
3395-
result = self._constructor(new_data).__finalize__(self)
3396+
return self._constructor(new_data).__finalize__(self)
33963397

3397-
# Maybe set copy if we didn't actually change the index.
3398-
if is_copy:
3399-
if not result._get_axis(axis).equals(self._get_axis(axis)):
3400-
result._set_is_copy(self)
3398+
def _take_with_is_copy(
3399+
self: FrameOrSeries, indices, axis=0, **kwargs
3400+
) -> FrameOrSeries:
3401+
"""
3402+
Internal version of the `take` method that sets the `_is_copy`
3403+
attribute to keep track of the parent dataframe (using in indexing
3404+
for the SettingWithCopyWarning).
34013405
3406+
See the docstring of `take` for full explanation of the parameters.
3407+
"""
3408+
result = self.take(indices=indices, axis=axis, **kwargs)
3409+
# Maybe set copy if we didn't actually change the index.
3410+
if not result._get_axis(axis).equals(self._get_axis(axis)):
3411+
result._set_is_copy(self)
34023412
return result
34033413

34043414
def xs(self, key, axis=0, level=None, drop_level: bool_t = True):
@@ -3528,9 +3538,9 @@ class animal locomotion
35283538
if isinstance(loc, np.ndarray):
35293539
if loc.dtype == np.bool_:
35303540
(inds,) = loc.nonzero()
3531-
return self.take(inds, axis=axis)
3541+
return self._take_with_is_copy(inds, axis=axis)
35323542
else:
3533-
return self.take(loc, axis=axis)
3543+
return self._take_with_is_copy(loc, axis=axis)
35343544

35353545
if not is_scalar(loc):
35363546
new_index = self.index[loc]
@@ -3587,7 +3597,7 @@ def _iget_item_cache(self, item):
35873597
if ax.is_unique:
35883598
lower = self._get_item_cache(ax[item])
35893599
else:
3590-
lower = self.take(item, axis=self._info_axis_number)
3600+
lower = self._take_with_is_copy(item, axis=self._info_axis_number)
35913601
return lower
35923602

35933603
def _box_item_values(self, key, values):
@@ -7180,8 +7190,7 @@ def asof(self, where, subset=None):
71807190

71817191
# mask the missing
71827192
missing = locs == -1
7183-
d = self.take(locs)
7184-
data = d.copy()
7193+
data = self.take(locs)
71857194
data.index = where
71867195
data.loc[missing] = np.nan
71877196
return data if is_list else data.iloc[-1]
@@ -7727,7 +7736,7 @@ def at_time(
77277736
except AttributeError:
77287737
raise TypeError("Index must be DatetimeIndex")
77297738

7730-
return self.take(indexer, axis=axis)
7739+
return self._take_with_is_copy(indexer, axis=axis)
77317740

77327741
def between_time(
77337742
self: FrameOrSeries,
@@ -7809,7 +7818,7 @@ def between_time(
78097818
except AttributeError:
78107819
raise TypeError("Index must be DatetimeIndex")
78117820

7812-
return self.take(indexer, axis=axis)
7821+
return self._take_with_is_copy(indexer, axis=axis)
78137822

78147823
def resample(
78157824
self,

pandas/core/groupby/groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -685,7 +685,7 @@ def get_group(self, name, obj=None):
685685
if not len(inds):
686686
raise KeyError(name)
687687

688-
return obj.take(inds, axis=self.axis)
688+
return obj._take_with_is_copy(inds, axis=self.axis)
689689

690690
def __iter__(self):
691691
"""

pandas/core/indexing.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -1588,7 +1588,7 @@ def _getitem_iterable(self, key, axis: int):
15881588
# A boolean indexer
15891589
key = check_bool_indexer(labels, key)
15901590
(inds,) = key.nonzero()
1591-
return self.obj.take(inds, axis=axis)
1591+
return self.obj._take_with_is_copy(inds, axis=axis)
15921592
else:
15931593
# A collection of keys
15941594
keyarr, indexer = self._get_listlike_indexer(key, axis, raise_missing=False)
@@ -1780,7 +1780,7 @@ def _getbool_axis(self, key, axis: int):
17801780
labels = self.obj._get_axis(axis)
17811781
key = check_bool_indexer(labels, key)
17821782
inds = key.nonzero()[0]
1783-
return self.obj.take(inds, axis=axis)
1783+
return self.obj._take_with_is_copy(inds, axis=axis)
17841784

17851785
def _get_slice_axis(self, slice_obj: slice, axis: int):
17861786
"""
@@ -1801,7 +1801,7 @@ def _get_slice_axis(self, slice_obj: slice, axis: int):
18011801
else:
18021802
# DatetimeIndex overrides Index.slice_indexer and may
18031803
# return a DatetimeIndex instead of a slice object.
1804-
return self.obj.take(indexer, axis=axis)
1804+
return self.obj._take_with_is_copy(indexer, axis=axis)
18051805

18061806

18071807
@Appender(IndexingMixin.loc.__doc__)
@@ -2107,7 +2107,7 @@ def _get_list_axis(self, key, axis: int):
21072107
`axis` can only be zero.
21082108
"""
21092109
try:
2110-
return self.obj.take(key, axis=axis)
2110+
return self.obj._take_with_is_copy(key, axis=axis)
21112111
except IndexError:
21122112
# re-raise with different error message
21132113
raise IndexError("positional indexers are out-of-bounds")

pandas/core/series.py

+18-7
Original file line numberDiff line numberDiff line change
@@ -804,7 +804,14 @@ def axes(self):
804804
# Indexing Methods
805805

806806
@Appender(generic.NDFrame.take.__doc__)
807-
def take(self, indices, axis=0, is_copy=False, **kwargs):
807+
def take(self, indices, axis=0, is_copy=None, **kwargs) -> "Series":
808+
if is_copy is not None:
809+
warnings.warn(
810+
"is_copy is deprecated and will be removed in a future version. "
811+
"'take' always returns a copy, so there is no need to specify this.",
812+
FutureWarning,
813+
stacklevel=2,
814+
)
808815
nv.validate_take(tuple(), kwargs)
809816

810817
indices = ensure_platform_int(indices)
@@ -819,16 +826,20 @@ def take(self, indices, axis=0, is_copy=False, **kwargs):
819826
kwargs = {}
820827
new_values = self._values.take(indices, **kwargs)
821828

822-
result = self._constructor(
829+
return self._constructor(
823830
new_values, index=new_index, fastpath=True
824831
).__finalize__(self)
825832

826-
# Maybe set copy if we didn't actually change the index.
827-
if is_copy:
828-
if not result._get_axis(axis).equals(self._get_axis(axis)):
829-
result._set_is_copy(self)
833+
def _take_with_is_copy(self, indices, axis=0, **kwargs):
834+
"""
835+
Internal version of the `take` method that sets the `_is_copy`
836+
attribute to keep track of the parent dataframe (using in indexing
837+
for the SettingWithCopyWarning). For Series this does the same
838+
as the public take (it never sets `_is_copy`).
830839
831-
return result
840+
See the docstring of `take` for full explanation of the parameters.
841+
"""
842+
return self.take(indices=indices, axis=axis, **kwargs)
832843

833844
def _ixs(self, i: int, axis: int = 0):
834845
"""

pandas/tests/frame/methods/test_asof.py

+13
Original file line numberDiff line numberDiff line change
@@ -143,3 +143,16 @@ def test_time_zone_aware_index(self, stamp, expected):
143143

144144
result = df.asof(stamp)
145145
tm.assert_series_equal(result, expected)
146+
147+
def test_is_copy(self, date_range_frame):
148+
# GH-27357, GH-30784: ensure the result of asof is an actual copy and
149+
# doesn't track the parent dataframe / doesn't give SettingWithCopy warnings
150+
df = date_range_frame
151+
N = 50
152+
df.loc[15:30, "A"] = np.nan
153+
dates = date_range("1/1/1990", periods=N * 3, freq="25s")
154+
155+
result = df.asof(dates)
156+
157+
with tm.assert_produces_warning(None):
158+
result["C"] = 1

pandas/tests/generic/test_generic.py

+17-3
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,15 @@ def test_sample_upsampling_without_replacement(self):
446446
with pytest.raises(ValueError, match=msg):
447447
df.sample(frac=2, replace=False)
448448

449+
def test_sample_is_copy(self):
450+
# GH-27357, GH-30784: ensure the result of sample is an actual copy and
451+
# doesn't track the parent dataframe / doesn't give SettingWithCopy warnings
452+
df = pd.DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"])
453+
df2 = df.sample(3)
454+
455+
with tm.assert_produces_warning(None):
456+
df2["d"] = 1
457+
449458
def test_size_compat(self):
450459
# GH8846
451460
# size property should be defined
@@ -820,18 +829,23 @@ def test_take_invalid_kwargs(self):
820829
with pytest.raises(ValueError, match=msg):
821830
obj.take(indices, mode="clip")
822831

823-
def test_depr_take_kwarg_is_copy(self):
832+
@pytest.mark.parametrize("is_copy", [True, False])
833+
def test_depr_take_kwarg_is_copy(self, is_copy):
824834
# GH 27357
825835
df = DataFrame({"A": [1, 2, 3]})
826836
msg = (
827837
"is_copy is deprecated and will be removed in a future version. "
828-
"take will always return a copy in the future."
838+
"'take' always returns a copy, so there is no need to specify this."
829839
)
830840
with tm.assert_produces_warning(FutureWarning) as w:
831-
df.take([0, 1], is_copy=True)
841+
df.take([0, 1], is_copy=is_copy)
832842

833843
assert w[0].message.args[0] == msg
834844

845+
s = Series([1, 2, 3])
846+
with tm.assert_produces_warning(FutureWarning):
847+
s.take([0, 1], is_copy=is_copy)
848+
835849
def test_equals(self):
836850
s1 = pd.Series([1, 2, 3], index=[0, 2, 1])
837851
s2 = s1.copy()

0 commit comments

Comments
 (0)