pandas-dev · jreback · Jan 28, 2020 · Jan 27, 2020 · Jan 27, 2020
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -753,7 +753,7 @@ Deprecations
 - The deprecated internal attributes ``_start``, ``_stop`` and ``_step`` of :class:`RangeIndex` now raise a ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`26581`)
 - The ``pandas.util.testing`` module has been deprecated. Use the public API in ``pandas.testing`` documented at :ref:`api.general.testing` (:issue:`16232`).
 - ``pandas.SparseArray`` has been deprecated.  Use ``pandas.arrays.SparseArray`` (:class:`arrays.SparseArray`) instead. (:issue:`30642`)
-- The parameter ``is_copy`` of :meth:`DataFrame.take` has been deprecated and will be removed in a future version. (:issue:`27357`)
+- The parameter ``is_copy`` of :meth:`Series.take` and :meth:`DataFrame.take` has been deprecated and will be removed in a future version. (:issue:`27357`)
 - Support for multi-dimensional indexing (e.g. ``index[:, None]``) on a :class:`Index` is deprecated and will be removed in a future version, convert to a numpy array before indexing instead (:issue:`30588`)
 - The ``pandas.np`` submodule is now deprecated. Import numpy directly instead (:issue:`30296`)
 - The ``pandas.datetime`` class is now deprecated. Import from ``datetime`` instead (:issue:`30610`)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2809,7 +2809,7 @@ def __getitem__(self, key):
         if getattr(indexer, "dtype", None) == bool:
             indexer = np.where(indexer)[0]
 
-        data = self.take(indexer, axis=1)
+        data = self._take_with_is_copy(indexer, axis=1)
 
         if is_single_key:
             # What does looking for a single key in a non-unique index return?
@@ -2842,7 +2842,7 @@ def _getitem_bool_array(self, key):
         # be reindexed to match DataFrame rows
         key = check_bool_indexer(self.index, key)
         indexer = key.nonzero()[0]
-        return self.take(indexer, axis=0)
+        return self._take_with_is_copy(indexer, axis=0)
 
     def _getitem_multilevel(self, key):
         # self.columns is a MultiIndex

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -3313,8 +3313,11 @@ def take(
         axis : {0 or 'index', 1 or 'columns', None}, default 0
             The axis on which to select elements. ``0`` means that we are
             selecting rows, ``1`` means that we are selecting columns.
-        is_copy : bool, default True
-            Whether to return a copy of the original object or not.
+        is_copy : bool
+            Before pandas 1.0, ``is_copy=False`` can be specified to ensure
+            that the return value is an actual copy. Starting with pandas 1.0,
+            ``take`` always returns a copy, and the keyword is therefore
+            deprecated.
 
             .. deprecated:: 1.0.0
         **kwargs
@@ -3378,12 +3381,10 @@ class  max_speed
         if is_copy is not None:
             warnings.warn(
                 "is_copy is deprecated and will be removed in a future version. "
-                "take will always return a copy in the future.",
+                "'take' always returns a copy, so there is no need to specify this.",
                 FutureWarning,
                 stacklevel=2,
             )
-        else:
-            is_copy = True
 
         nv.validate_take(tuple(), kwargs)
 
@@ -3392,13 +3393,22 @@ class  max_speed
         new_data = self._data.take(
             indices, axis=self._get_block_manager_axis(axis), verify=True
         )
-        result = self._constructor(new_data).__finalize__(self)
+        return self._constructor(new_data).__finalize__(self)
 
-        # Maybe set copy if we didn't actually change the index.
-        if is_copy:
-            if not result._get_axis(axis).equals(self._get_axis(axis)):
-                result._set_is_copy(self)
+    def _take_with_is_copy(
+        self: FrameOrSeries, indices, axis=0, **kwargs
+    ) -> FrameOrSeries:
+        """
+        Internal version of the `take` method that sets the `_is_copy`
+        attribute to keep track of the parent dataframe (using in indexing
+        for the SettingWithCopyWarning).
 
+        See the docstring of `take` for full explanation of the parameters.
+        """
+        result = self.take(indices=indices, axis=axis, **kwargs)
+        # Maybe set copy if we didn't actually change the index.
+        if not result._get_axis(axis).equals(self._get_axis(axis)):
+            result._set_is_copy(self)
         return result
 
     def xs(self, key, axis=0, level=None, drop_level: bool_t = True):
@@ -3528,9 +3538,9 @@ class   animal   locomotion
             if isinstance(loc, np.ndarray):
                 if loc.dtype == np.bool_:
                     (inds,) = loc.nonzero()
-                    return self.take(inds, axis=axis)
+                    return self._take_with_is_copy(inds, axis=axis)
                 else:
-                    return self.take(loc, axis=axis)
+                    return self._take_with_is_copy(loc, axis=axis)
 
             if not is_scalar(loc):
                 new_index = self.index[loc]
@@ -3587,7 +3597,7 @@ def _iget_item_cache(self, item):
         if ax.is_unique:
             lower = self._get_item_cache(ax[item])
         else:
-            lower = self.take(item, axis=self._info_axis_number)
+            lower = self._take_with_is_copy(item, axis=self._info_axis_number)
         return lower
 
     def _box_item_values(self, key, values):
@@ -7180,8 +7190,7 @@ def asof(self, where, subset=None):
 
         # mask the missing
         missing = locs == -1
-        d = self.take(locs)
-        data = d.copy()
+        data = self.take(locs)
         data.index = where
         data.loc[missing] = np.nan
         return data if is_list else data.iloc[-1]
@@ -7727,7 +7736,7 @@ def at_time(
         except AttributeError:
             raise TypeError("Index must be DatetimeIndex")
 
-        return self.take(indexer, axis=axis)
+        return self._take_with_is_copy(indexer, axis=axis)
 
     def between_time(
         self: FrameOrSeries,
@@ -7809,7 +7818,7 @@ def between_time(
         except AttributeError:
             raise TypeError("Index must be DatetimeIndex")
 
-        return self.take(indexer, axis=axis)
+        return self._take_with_is_copy(indexer, axis=axis)
 
     def resample(
         self,

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -685,7 +685,7 @@ def get_group(self, name, obj=None):
         if not len(inds):
             raise KeyError(name)
 
-        return obj.take(inds, axis=self.axis)
+        return obj._take_with_is_copy(inds, axis=self.axis)
 
     def __iter__(self):
         """

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -1588,7 +1588,7 @@ def _getitem_iterable(self, key, axis: int):
             # A boolean indexer
             key = check_bool_indexer(labels, key)
             (inds,) = key.nonzero()
-            return self.obj.take(inds, axis=axis)
+            return self.obj._take_with_is_copy(inds, axis=axis)
         else:
             # A collection of keys
             keyarr, indexer = self._get_listlike_indexer(key, axis, raise_missing=False)
@@ -1780,7 +1780,7 @@ def _getbool_axis(self, key, axis: int):
         labels = self.obj._get_axis(axis)
         key = check_bool_indexer(labels, key)
         inds = key.nonzero()[0]
-        return self.obj.take(inds, axis=axis)
+        return self.obj._take_with_is_copy(inds, axis=axis)
 
     def _get_slice_axis(self, slice_obj: slice, axis: int):
         """
@@ -1801,7 +1801,7 @@ def _get_slice_axis(self, slice_obj: slice, axis: int):
         else:
             # DatetimeIndex overrides Index.slice_indexer and may
             #  return a DatetimeIndex instead of a slice object.
-            return self.obj.take(indexer, axis=axis)
+            return self.obj._take_with_is_copy(indexer, axis=axis)
 
 
 @Appender(IndexingMixin.loc.__doc__)
@@ -2107,7 +2107,7 @@ def _get_list_axis(self, key, axis: int):
         `axis` can only be zero.
         """
         try:
-            return self.obj.take(key, axis=axis)
+            return self.obj._take_with_is_copy(key, axis=axis)
         except IndexError:
             # re-raise with different error message
             raise IndexError("positional indexers are out-of-bounds")

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -804,7 +804,14 @@ def axes(self):
     # Indexing Methods
 
     @Appender(generic.NDFrame.take.__doc__)
-    def take(self, indices, axis=0, is_copy=False, **kwargs):
+    def take(self, indices, axis=0, is_copy=None, **kwargs) -> "Series":
+        if is_copy is not None:
+            warnings.warn(
+                "is_copy is deprecated and will be removed in a future version. "
+                "'take' always returns a copy, so there is no need to specify this.",
+                FutureWarning,
+                stacklevel=2,
+            )
         nv.validate_take(tuple(), kwargs)
 
         indices = ensure_platform_int(indices)
@@ -819,16 +826,20 @@ def take(self, indices, axis=0, is_copy=False, **kwargs):
             kwargs = {}
         new_values = self._values.take(indices, **kwargs)
 
-        result = self._constructor(
+        return self._constructor(
             new_values, index=new_index, fastpath=True
         ).__finalize__(self)
 
-        # Maybe set copy if we didn't actually change the index.
-        if is_copy:
-            if not result._get_axis(axis).equals(self._get_axis(axis)):
-                result._set_is_copy(self)
+    def _take_with_is_copy(self, indices, axis=0, **kwargs):
+        """
+        Internal version of the `take` method that sets the `_is_copy`
+        attribute to keep track of the parent dataframe (using in indexing
+        for the SettingWithCopyWarning). For Series this does the same
+        as the public take (it never sets `_is_copy`).
 
-        return result
+        See the docstring of `take` for full explanation of the parameters.
+        """
+        return self.take(indices=indices, axis=axis, **kwargs)
 
     def _ixs(self, i: int, axis: int = 0):
         """

diff --git a/pandas/tests/frame/methods/test_asof.py b/pandas/tests/frame/methods/test_asof.py
@@ -143,3 +143,16 @@ def test_time_zone_aware_index(self, stamp, expected):
 
         result = df.asof(stamp)
         tm.assert_series_equal(result, expected)
+
+    def test_is_copy(self, date_range_frame):
+        # GH-27357, GH-30784: ensure the result of asof is an actual copy and
+        # doesn't track the parent dataframe / doesn't give SettingWithCopy warnings
+        df = date_range_frame
+        N = 50
+        df.loc[15:30, "A"] = np.nan
+        dates = date_range("1/1/1990", periods=N * 3, freq="25s")
+
+        result = df.asof(dates)
+
+        with tm.assert_produces_warning(None):
+            result["C"] = 1
diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py
@@ -446,6 +446,15 @@ def test_sample_upsampling_without_replacement(self):
         with pytest.raises(ValueError, match=msg):
             df.sample(frac=2, replace=False)
 
+    def test_sample_is_copy(self):
+        # GH-27357, GH-30784: ensure the result of sample is an actual copy and
+        # doesn't track the parent dataframe / doesn't give SettingWithCopy warnings
+        df = pd.DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"])
+        df2 = df.sample(3)
+
+        with tm.assert_produces_warning(None):
+            df2["d"] = 1
+
     def test_size_compat(self):
         # GH8846
         # size property should be defined
@@ -820,18 +829,23 @@ def test_take_invalid_kwargs(self):
             with pytest.raises(ValueError, match=msg):
                 obj.take(indices, mode="clip")
 
-    def test_depr_take_kwarg_is_copy(self):
+    @pytest.mark.parametrize("is_copy", [True, False])
+    def test_depr_take_kwarg_is_copy(self, is_copy):
         # GH 27357
         df = DataFrame({"A": [1, 2, 3]})
         msg = (
             "is_copy is deprecated and will be removed in a future version. "
-            "take will always return a copy in the future."
+            "'take' always returns a copy, so there is no need to specify this."
         )
         with tm.assert_produces_warning(FutureWarning) as w:
-            df.take([0, 1], is_copy=True)
+            df.take([0, 1], is_copy=is_copy)
 
         assert w[0].message.args[0] == msg
 
+        s = Series([1, 2, 3])
+        with tm.assert_produces_warning(FutureWarning):
+            s.take([0, 1], is_copy=is_copy)
+
     def test_equals(self):
         s1 = pd.Series([1, 2, 3], index=[0, 2, 1])
         s2 = s1.copy()