Skip to content

Commit a9b61a9

Browse files
jorisvandenbosschejreback
authored andcommitted
API: DataFrame.take always returns a copy (#30784)
1 parent e61f47a commit a9b61a9

File tree

8 files changed

+81
-34
lines changed

8 files changed

+81
-34
lines changed

doc/source/whatsnew/v1.0.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -752,7 +752,7 @@ Deprecations
752752
- The deprecated internal attributes ``_start``, ``_stop`` and ``_step`` of :class:`RangeIndex` now raise a ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`26581`)
753753
- The ``pandas.util.testing`` module has been deprecated. Use the public API in ``pandas.testing`` documented at :ref:`api.general.testing` (:issue:`16232`).
754754
- ``pandas.SparseArray`` has been deprecated. Use ``pandas.arrays.SparseArray`` (:class:`arrays.SparseArray`) instead. (:issue:`30642`)
755-
- The parameter ``is_copy`` of :meth:`DataFrame.take` has been deprecated and will be removed in a future version. (:issue:`27357`)
755+
- The parameter ``is_copy`` of :meth:`Series.take` and :meth:`DataFrame.take` has been deprecated and will be removed in a future version. (:issue:`27357`)
756756
- Support for multi-dimensional indexing (e.g. ``index[:, None]``) on a :class:`Index` is deprecated and will be removed in a future version, convert to a numpy array before indexing instead (:issue:`30588`)
757757
- The ``pandas.np`` submodule is now deprecated. Import numpy directly instead (:issue:`30296`)
758758
- The ``pandas.datetime`` class is now deprecated. Import from ``datetime`` instead (:issue:`30610`)

pandas/core/frame.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2805,7 +2805,7 @@ def __getitem__(self, key):
28052805
if getattr(indexer, "dtype", None) == bool:
28062806
indexer = np.where(indexer)[0]
28072807

2808-
data = self.take(indexer, axis=1)
2808+
data = self._take_with_is_copy(indexer, axis=1)
28092809

28102810
if is_single_key:
28112811
# What does looking for a single key in a non-unique index return?
@@ -2838,7 +2838,7 @@ def _getitem_bool_array(self, key):
28382838
# be reindexed to match DataFrame rows
28392839
key = check_bool_indexer(self.index, key)
28402840
indexer = key.nonzero()[0]
2841-
return self.take(indexer, axis=0)
2841+
return self._take_with_is_copy(indexer, axis=0)
28422842

28432843
def _getitem_multilevel(self, key):
28442844
# self.columns is a MultiIndex

pandas/core/generic.py

+26-17
Original file line numberDiff line numberDiff line change
@@ -3211,8 +3211,11 @@ def take(
32113211
axis : {0 or 'index', 1 or 'columns', None}, default 0
32123212
The axis on which to select elements. ``0`` means that we are
32133213
selecting rows, ``1`` means that we are selecting columns.
3214-
is_copy : bool, default True
3215-
Whether to return a copy of the original object or not.
3214+
is_copy : bool
3215+
Before pandas 1.0, ``is_copy=False`` can be specified to ensure
3216+
that the return value is an actual copy. Starting with pandas 1.0,
3217+
``take`` always returns a copy, and the keyword is therefore
3218+
deprecated.
32163219
32173220
.. deprecated:: 1.0.0
32183221
**kwargs
@@ -3276,12 +3279,10 @@ class max_speed
32763279
if is_copy is not None:
32773280
warnings.warn(
32783281
"is_copy is deprecated and will be removed in a future version. "
3279-
"take will always return a copy in the future.",
3282+
"'take' always returns a copy, so there is no need to specify this.",
32803283
FutureWarning,
32813284
stacklevel=2,
32823285
)
3283-
else:
3284-
is_copy = True
32853286

32863287
nv.validate_take(tuple(), kwargs)
32873288

@@ -3290,13 +3291,22 @@ class max_speed
32903291
new_data = self._data.take(
32913292
indices, axis=self._get_block_manager_axis(axis), verify=True
32923293
)
3293-
result = self._constructor(new_data).__finalize__(self)
3294+
return self._constructor(new_data).__finalize__(self)
32943295

3295-
# Maybe set copy if we didn't actually change the index.
3296-
if is_copy:
3297-
if not result._get_axis(axis).equals(self._get_axis(axis)):
3298-
result._set_is_copy(self)
3296+
def _take_with_is_copy(
3297+
self: FrameOrSeries, indices, axis=0, **kwargs
3298+
) -> FrameOrSeries:
3299+
"""
3300+
Internal version of the `take` method that sets the `_is_copy`
3301+
attribute to keep track of the parent dataframe (using in indexing
3302+
for the SettingWithCopyWarning).
32993303
3304+
See the docstring of `take` for full explanation of the parameters.
3305+
"""
3306+
result = self.take(indices=indices, axis=axis, **kwargs)
3307+
# Maybe set copy if we didn't actually change the index.
3308+
if not result._get_axis(axis).equals(self._get_axis(axis)):
3309+
result._set_is_copy(self)
33003310
return result
33013311

33023312
def xs(self, key, axis=0, level=None, drop_level: bool_t = True):
@@ -3426,9 +3436,9 @@ class animal locomotion
34263436
if isinstance(loc, np.ndarray):
34273437
if loc.dtype == np.bool_:
34283438
(inds,) = loc.nonzero()
3429-
return self.take(inds, axis=axis)
3439+
return self._take_with_is_copy(inds, axis=axis)
34303440
else:
3431-
return self.take(loc, axis=axis)
3441+
return self._take_with_is_copy(loc, axis=axis)
34323442

34333443
if not is_scalar(loc):
34343444
new_index = self.index[loc]
@@ -3485,7 +3495,7 @@ def _iget_item_cache(self, item):
34853495
if ax.is_unique:
34863496
lower = self._get_item_cache(ax[item])
34873497
else:
3488-
lower = self.take(item, axis=self._info_axis_number)
3498+
lower = self._take_with_is_copy(item, axis=self._info_axis_number)
34893499
return lower
34903500

34913501
def _box_item_values(self, key, values):
@@ -7003,8 +7013,7 @@ def asof(self, where, subset=None):
70037013

70047014
# mask the missing
70057015
missing = locs == -1
7006-
d = self.take(locs)
7007-
data = d.copy()
7016+
data = self.take(locs)
70087017
data.index = where
70097018
data.loc[missing] = np.nan
70107019
return data if is_list else data.iloc[-1]
@@ -7550,7 +7559,7 @@ def at_time(
75507559
except AttributeError:
75517560
raise TypeError("Index must be DatetimeIndex")
75527561

7553-
return self.take(indexer, axis=axis)
7562+
return self._take_with_is_copy(indexer, axis=axis)
75547563

75557564
def between_time(
75567565
self: FrameOrSeries,
@@ -7632,7 +7641,7 @@ def between_time(
76327641
except AttributeError:
76337642
raise TypeError("Index must be DatetimeIndex")
76347643

7635-
return self.take(indexer, axis=axis)
7644+
return self._take_with_is_copy(indexer, axis=axis)
76367645

76377646
def resample(
76387647
self,

pandas/core/groupby/groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -685,7 +685,7 @@ def get_group(self, name, obj=None):
685685
if not len(inds):
686686
raise KeyError(name)
687687

688-
return obj.take(inds, axis=self.axis)
688+
return obj._take_with_is_copy(inds, axis=self.axis)
689689

690690
def __iter__(self):
691691
"""

pandas/core/indexing.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1509,7 +1509,7 @@ def _getitem_iterable(self, key, axis: int):
15091509
# A boolean indexer
15101510
key = check_bool_indexer(labels, key)
15111511
(inds,) = key.nonzero()
1512-
return self.obj.take(inds, axis=axis)
1512+
return self.obj._take_with_is_copy(inds, axis=axis)
15131513
else:
15141514
# A collection of keys
15151515
keyarr, indexer = self._get_listlike_indexer(key, axis, raise_missing=False)
@@ -1694,7 +1694,7 @@ def _getbool_axis(self, key, axis: int):
16941694
labels = self.obj._get_axis(axis)
16951695
key = check_bool_indexer(labels, key)
16961696
inds = key.nonzero()[0]
1697-
return self.obj.take(inds, axis=axis)
1697+
return self.obj._take_with_is_copy(inds, axis=axis)
16981698

16991699

17001700
@Appender(IndexingMixin.loc.__doc__)
@@ -2009,7 +2009,7 @@ def _get_list_axis(self, key, axis: int):
20092009
`axis` can only be zero.
20102010
"""
20112011
try:
2012-
return self.obj.take(key, axis=axis)
2012+
return self.obj._take_with_is_copy(key, axis=axis)
20132013
except IndexError:
20142014
# re-raise with different error message
20152015
raise IndexError("positional indexers are out-of-bounds")

pandas/core/series.py

+18-7
Original file line numberDiff line numberDiff line change
@@ -786,7 +786,14 @@ def axes(self) -> List[Index]:
786786
# Indexing Methods
787787

788788
@Appender(generic.NDFrame.take.__doc__)
789-
def take(self, indices, axis=0, is_copy=False, **kwargs) -> "Series":
789+
def take(self, indices, axis=0, is_copy=None, **kwargs) -> "Series":
790+
if is_copy is not None:
791+
warnings.warn(
792+
"is_copy is deprecated and will be removed in a future version. "
793+
"'take' always returns a copy, so there is no need to specify this.",
794+
FutureWarning,
795+
stacklevel=2,
796+
)
790797
nv.validate_take(tuple(), kwargs)
791798

792799
indices = ensure_platform_int(indices)
@@ -801,16 +808,20 @@ def take(self, indices, axis=0, is_copy=False, **kwargs) -> "Series":
801808
kwargs = {}
802809
new_values = self._values.take(indices, **kwargs)
803810

804-
result = self._constructor(
811+
return self._constructor(
805812
new_values, index=new_index, fastpath=True
806813
).__finalize__(self)
807814

808-
# Maybe set copy if we didn't actually change the index.
809-
if is_copy:
810-
if not result._get_axis(axis).equals(self._get_axis(axis)):
811-
result._set_is_copy(self)
815+
def _take_with_is_copy(self, indices, axis=0, **kwargs):
816+
"""
817+
Internal version of the `take` method that sets the `_is_copy`
818+
attribute to keep track of the parent dataframe (using in indexing
819+
for the SettingWithCopyWarning). For Series this does the same
820+
as the public take (it never sets `_is_copy`).
812821
813-
return result
822+
See the docstring of `take` for full explanation of the parameters.
823+
"""
824+
return self.take(indices=indices, axis=axis, **kwargs)
814825

815826
def _ixs(self, i: int, axis: int = 0):
816827
"""

pandas/tests/frame/methods/test_asof.py

+13
Original file line numberDiff line numberDiff line change
@@ -143,3 +143,16 @@ def test_time_zone_aware_index(self, stamp, expected):
143143

144144
result = df.asof(stamp)
145145
tm.assert_series_equal(result, expected)
146+
147+
def test_is_copy(self, date_range_frame):
148+
# GH-27357, GH-30784: ensure the result of asof is an actual copy and
149+
# doesn't track the parent dataframe / doesn't give SettingWithCopy warnings
150+
df = date_range_frame
151+
N = 50
152+
df.loc[15:30, "A"] = np.nan
153+
dates = date_range("1/1/1990", periods=N * 3, freq="25s")
154+
155+
result = df.asof(dates)
156+
157+
with tm.assert_produces_warning(None):
158+
result["C"] = 1

pandas/tests/generic/test_generic.py

+17-3
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,15 @@ def test_sample_upsampling_without_replacement(self):
446446
with pytest.raises(ValueError, match=msg):
447447
df.sample(frac=2, replace=False)
448448

449+
def test_sample_is_copy(self):
450+
# GH-27357, GH-30784: ensure the result of sample is an actual copy and
451+
# doesn't track the parent dataframe / doesn't give SettingWithCopy warnings
452+
df = pd.DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"])
453+
df2 = df.sample(3)
454+
455+
with tm.assert_produces_warning(None):
456+
df2["d"] = 1
457+
449458
def test_size_compat(self):
450459
# GH8846
451460
# size property should be defined
@@ -817,18 +826,23 @@ def test_take_invalid_kwargs(self):
817826
with pytest.raises(ValueError, match=msg):
818827
obj.take(indices, mode="clip")
819828

820-
def test_depr_take_kwarg_is_copy(self):
829+
@pytest.mark.parametrize("is_copy", [True, False])
830+
def test_depr_take_kwarg_is_copy(self, is_copy):
821831
# GH 27357
822832
df = DataFrame({"A": [1, 2, 3]})
823833
msg = (
824834
"is_copy is deprecated and will be removed in a future version. "
825-
"take will always return a copy in the future."
835+
"'take' always returns a copy, so there is no need to specify this."
826836
)
827837
with tm.assert_produces_warning(FutureWarning) as w:
828-
df.take([0, 1], is_copy=True)
838+
df.take([0, 1], is_copy=is_copy)
829839

830840
assert w[0].message.args[0] == msg
831841

842+
s = Series([1, 2, 3])
843+
with tm.assert_produces_warning(FutureWarning):
844+
s.take([0, 1], is_copy=is_copy)
845+
832846
def test_equals(self):
833847
s1 = pd.Series([1, 2, 3], index=[0, 2, 1])
834848
s2 = s1.copy()

0 commit comments

Comments
 (0)