From 1e1b4f15a7341387dd77b931ae610ebaadcccecf Mon Sep 17 00:00:00 2001 From: Ryan Nazareth Date: Thu, 2 Jan 2020 04:27:12 +0000 Subject: [PATCH 01/12] deprecate is_copy argument of take and remove is_copy=False usage --- pandas/core/generic.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3b8e9cf82f08c..35267ee9f6598 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3275,7 +3275,7 @@ def _clear_item_cache(self) -> None: # Indexing Methods def take( - self: FrameOrSeries, indices, axis=0, is_copy: bool_t = True, **kwargs + self: FrameOrSeries, indices, axis=0, is_copy: bool_t = None, **kwargs ) -> FrameOrSeries: """ Return the elements in the given *positional* indices along an axis. @@ -3293,6 +3293,8 @@ def take( selecting rows, ``1`` means that we are selecting columns. is_copy : bool, default True Whether to return a copy of the original object or not. + + .. deprecated:: 1.0.0 **kwargs For compatibility with :meth:`numpy.take`. Has no effect on the output. @@ -3361,9 +3363,17 @@ class max_speed result = self._constructor(new_data).__finalize__(self) # Maybe set copy if we didn't actually change the index. - if is_copy: + if is_copy is not None: + warnings.warn( + "is_copy is deprecated and will be removed in a future version. " + "take will always return a copy in the future.", + FutureWarning, + stacklevel=2, + ) if not result._get_axis(axis).equals(self._get_axis(axis)): result._set_is_copy(self) + else: + is_copy = True return result @@ -5014,7 +5024,7 @@ def sample( ) locs = rs.choice(axis_length, size=n, replace=replace, p=weights) - return self.take(locs, axis=axis, is_copy=False) + return self.take(locs, axis=axis) _shared_docs[ "pipe" @@ -7011,7 +7021,7 @@ def asof(self, where, subset=None): # mask the missing missing = locs == -1 - data = self.take(locs, is_copy=False) + data = self.take(locs) data.index = where data.loc[missing] = np.nan return data if is_list else data.iloc[-1] From 17ca68620a7242f5f4592bb19ddfc2d9b41caad3 Mon Sep 17 00:00:00 2001 From: Ryan Nazareth Date: Thu, 2 Jan 2020 05:03:16 +0000 Subject: [PATCH 02/12] add test for depr is_copy --- pandas/tests/generic/test_generic.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index cdb79dc6606ff..10a1e09a09bf8 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -820,6 +820,18 @@ def test_take_invalid_kwargs(self): with pytest.raises(ValueError, match=msg): obj.take(indices, mode="clip") + def test_depr_take_kwarg_is_copy(self): + # GH 27357 + df = DataFrame({"A": [1, 2, 3]}) + msg = ( + "is_copy is deprecated and will be removed in a future version. " + "take will always return a copy in the future." + ) + with tm.assert_produces_warning(FutureWarning) as w: + df.take([0, 1], is_copy=True) + + assert w[0].message.args[0] == msg + def test_equals(self): s1 = pd.Series([1, 2, 3], index=[0, 2, 1]) s2 = s1.copy() From 9bd696ce833e67395574901a234c532f47a4bb60 Mon Sep 17 00:00:00 2001 From: Ryan Nazareth Date: Thu, 2 Jan 2020 05:18:44 +0000 Subject: [PATCH 03/12] add whats new note --- doc/source/whatsnew/v1.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 014bd22aa2dab..9213de5bbd1dd 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -578,6 +578,7 @@ Deprecations - :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_feather`, and :meth:`DataFrame.to_parquet` argument "fname" is deprecated, use "path" instead (:issue:`23574`) - The deprecated internal attributes ``_start``, ``_stop`` and ``_step`` of :class:`RangeIndex` now raise a ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`26581`) - The ``pandas.util.testing`` module has been deprecated. Use the public API in ``pandas.testing`` documented at :ref:`api.general.testing` (:issue:`16232`). +- The parameter ``is_copy`` of :meth:`DataFrame.take` has been deprecated and will be removed in a future version. (:issue:`27357`) **Selecting Columns from a Grouped DataFrame** From dabe0517018bb82307d22cfacf747495fb45d7f1 Mon Sep 17 00:00:00 2001 From: Ryan Nazareth Date: Fri, 3 Jan 2020 00:26:32 +0000 Subject: [PATCH 04/12] Fix condition for is_copy to avoid breaking other tests --- pandas/core/generic.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 35267ee9f6598..256719d6ff0b5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3353,6 +3353,16 @@ class max_speed 1 monkey mammal NaN 3 lion mammal 80.5 """ + if is_copy is not None: + warnings.warn( + "is_copy is deprecated and will be removed in a future version. " + "take will always return a copy in the future.", + FutureWarning, + stacklevel=2, + ) + else: + is_copy = True + nv.validate_take(tuple(), kwargs) self._consolidate_inplace() @@ -3363,17 +3373,9 @@ class max_speed result = self._constructor(new_data).__finalize__(self) # Maybe set copy if we didn't actually change the index. - if is_copy is not None: - warnings.warn( - "is_copy is deprecated and will be removed in a future version. " - "take will always return a copy in the future.", - FutureWarning, - stacklevel=2, - ) + if is_copy: if not result._get_axis(axis).equals(self._get_axis(axis)): result._set_is_copy(self) - else: - is_copy = True return result @@ -5024,7 +5026,7 @@ def sample( ) locs = rs.choice(axis_length, size=n, replace=replace, p=weights) - return self.take(locs, axis=axis) + return self.take(locs, is_copy=False, axis=axis) _shared_docs[ "pipe" @@ -7021,7 +7023,7 @@ def asof(self, where, subset=None): # mask the missing missing = locs == -1 - data = self.take(locs) + data = self.take(locs, is_copy=False) data.index = where data.loc[missing] = np.nan return data if is_list else data.iloc[-1] From 8142050b01b9bd54fb9e59937ddc1b0501b2c909 Mon Sep 17 00:00:00 2001 From: Ryan Nazareth Date: Fri, 3 Jan 2020 01:26:19 +0000 Subject: [PATCH 05/12] linting --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 256719d6ff0b5..fe2c897478167 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5026,7 +5026,7 @@ def sample( ) locs = rs.choice(axis_length, size=n, replace=replace, p=weights) - return self.take(locs, is_copy=False, axis=axis) + return self.take(locs, axis=axis, is_copy=False) _shared_docs[ "pipe" From 277d9251bb9857de6c67cac32af03e4e22b1c522 Mon Sep 17 00:00:00 2001 From: Ryan Nazareth Date: Fri, 3 Jan 2020 02:01:31 +0000 Subject: [PATCH 06/12] correct typing for is_copy arg --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fe2c897478167..5486a515daace 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3275,7 +3275,7 @@ def _clear_item_cache(self) -> None: # Indexing Methods def take( - self: FrameOrSeries, indices, axis=0, is_copy: bool_t = None, **kwargs + self: FrameOrSeries, indices, axis=0, is_copy: Optional[bool_t] = None, **kwargs ) -> FrameOrSeries: """ Return the elements in the given *positional* indices along an axis. From d8db903c1b370ada2e8f71197b3fe9634460dba7 Mon Sep 17 00:00:00 2001 From: Ryan Nazareth Date: Fri, 3 Jan 2020 03:16:36 +0000 Subject: [PATCH 07/12] Fix CI errors --- pandas/core/groupby/grouper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 7e7261130ff4a..5436101540c55 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -194,7 +194,7 @@ def _set_grouper(self, obj: FrameOrSeries, sort: bool = False): # use stable sort to support first, last, nth indexer = self.indexer = ax.argsort(kind="mergesort") ax = ax.take(indexer) - obj = obj.take(indexer, axis=self.axis, is_copy=False) + obj = obj.take(indexer, axis=self.axis) self.obj = obj self.grouper = ax From 88dfc503257389186c44870e21ed827826dc0914 Mon Sep 17 00:00:00 2001 From: Ryan Nazareth Date: Sat, 4 Jan 2020 04:57:50 +0000 Subject: [PATCH 08/12] catch warnings in test_asof --- pandas/core/generic.py | 2 +- pandas/tests/frame/methods/test_asof.py | 84 +++++++++++++++---------- 2 files changed, 53 insertions(+), 33 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5486a515daace..54d78e5d8e57c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5026,7 +5026,7 @@ def sample( ) locs = rs.choice(axis_length, size=n, replace=replace, p=weights) - return self.take(locs, axis=axis, is_copy=False) + return self.take(locs, axis=axis) _shared_docs[ "pipe" diff --git a/pandas/tests/frame/methods/test_asof.py b/pandas/tests/frame/methods/test_asof.py index 055f81c9942a6..57671125033b1 100644 --- a/pandas/tests/frame/methods/test_asof.py +++ b/pandas/tests/frame/methods/test_asof.py @@ -2,8 +2,15 @@ import pytest from pandas import DataFrame, Series, Timestamp, date_range, to_datetime + import pandas._testing as tm +import pandas.util.testing as tm +import warnings + +warnings.simplefilter("error") + + @pytest.fixture def date_range_frame(): @@ -24,14 +31,18 @@ def test_basic(self, date_range_frame): df.loc[15:30, "A"] = np.nan dates = date_range("1/1/1990", periods=N * 3, freq="25s") - result = df.asof(dates) - assert result.notna().all(1).all() - lb = df.index[14] - ub = df.index[30] + with warnings.catch_warnings(): + warnings.simplefilter("ignore", FutureWarning) + + result = df.asof(dates) + assert result.notna().all(1).all() + lb = df.index[14] + ub = df.index[30] - dates = list(dates) - result = df.asof(dates) - assert result.notna().all(1).all() + dates = list(dates) + + result = df.asof(dates) + assert result.notna().all(1).all() mask = (result.index >= lb) & (result.index < ub) rs = result[mask] @@ -43,40 +54,46 @@ def test_subset(self, date_range_frame): df.loc[4:8, "A"] = np.nan dates = date_range("1/1/1990", periods=N * 3, freq="25s") - # with a subset of A should be the same - result = df.asof(dates, subset="A") - expected = df.asof(dates) - tm.assert_frame_equal(result, expected) + with warnings.catch_warnings(): + warnings.simplefilter("ignore", FutureWarning) - # same with A/B - result = df.asof(dates, subset=["A", "B"]) - expected = df.asof(dates) - tm.assert_frame_equal(result, expected) + # with a subset of A should be the same + result = df.asof(dates, subset="A") + expected = df.asof(dates) + tm.assert_frame_equal(result, expected) - # B gives df.asof - result = df.asof(dates, subset="B") - expected = df.resample("25s", closed="right").ffill().reindex(dates) - expected.iloc[20:] = 9 + # same with A/B + result = df.asof(dates, subset=["A", "B"]) + expected = df.asof(dates) + tm.assert_frame_equal(result, expected) - tm.assert_frame_equal(result, expected) + # B gives df.asof + result = df.asof(dates, subset="B") + expected = df.resample("25s", closed="right").ffill().reindex(dates) + expected.iloc[20:] = 9 + + tm.assert_frame_equal(result, expected) def test_missing(self, date_range_frame): # GH 15118 # no match found - `where` value before earliest date in index N = 10 df = date_range_frame.iloc[:N].copy() - result = df.asof("1989-12-31") + with warnings.catch_warnings(): + warnings.simplefilter("ignore", FutureWarning) - expected = Series( - index=["A", "B"], name=Timestamp("1989-12-31"), dtype=np.float64 - ) - tm.assert_series_equal(result, expected) + result = df.asof("1989-12-31") - result = df.asof(to_datetime(["1989-12-31"])) - expected = DataFrame( - index=to_datetime(["1989-12-31"]), columns=["A", "B"], dtype="float64" - ) - tm.assert_frame_equal(result, expected) + expected = Series( + index=["A", "B"], name=Timestamp("1989-12-31"), dtype=np.float64 + ) + tm.assert_series_equal(result, expected) + + result = df.asof(to_datetime(["1989-12-31"])) + expected = DataFrame( + index=to_datetime(["1989-12-31"]), columns=["A", "B"], dtype="float64" + ) + tm.assert_frame_equal(result, expected) def test_all_nans(self, date_range_frame): # GH 15713 @@ -132,5 +149,8 @@ def test_time_zone_aware_index(self, stamp, expected): Timestamp("2018-01-01 22:35:10.550+00:00"), ], ) - result = df.asof(stamp) - tm.assert_series_equal(result, expected) + with warnings.catch_warnings(): + warnings.simplefilter("ignore", FutureWarning) + + result = df.asof(stamp) + tm.assert_series_equal(result, expected) From 7c533a1734731d6f72dbb6c44d1fb464a2243e0c Mon Sep 17 00:00:00 2001 From: Ryan Nazareth Date: Sat, 4 Jan 2020 05:10:03 +0000 Subject: [PATCH 09/12] fix merge conflict and linting --- pandas/tests/frame/methods/test_asof.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/pandas/tests/frame/methods/test_asof.py b/pandas/tests/frame/methods/test_asof.py index 57671125033b1..e2ba084b689e7 100644 --- a/pandas/tests/frame/methods/test_asof.py +++ b/pandas/tests/frame/methods/test_asof.py @@ -1,16 +1,11 @@ +import warnings + import numpy as np import pytest from pandas import DataFrame, Series, Timestamp, date_range, to_datetime - import pandas._testing as tm -import pandas.util.testing as tm -import warnings - -warnings.simplefilter("error") - - @pytest.fixture def date_range_frame(): From 87b1ba5618ae295623aa98816bc8214aa3f571ad Mon Sep 17 00:00:00 2001 From: Ryan Nazareth Date: Sun, 5 Jan 2020 03:14:03 +0000 Subject: [PATCH 10/12] revert warnings filtering in tests --- pandas/tests/frame/methods/test_asof.py | 76 +++++++++++-------------- 1 file changed, 32 insertions(+), 44 deletions(-) diff --git a/pandas/tests/frame/methods/test_asof.py b/pandas/tests/frame/methods/test_asof.py index e2ba084b689e7..774369794cb90 100644 --- a/pandas/tests/frame/methods/test_asof.py +++ b/pandas/tests/frame/methods/test_asof.py @@ -1,5 +1,3 @@ -import warnings - import numpy as np import pytest @@ -26,18 +24,15 @@ def test_basic(self, date_range_frame): df.loc[15:30, "A"] = np.nan dates = date_range("1/1/1990", periods=N * 3, freq="25s") - with warnings.catch_warnings(): - warnings.simplefilter("ignore", FutureWarning) - - result = df.asof(dates) - assert result.notna().all(1).all() - lb = df.index[14] - ub = df.index[30] + result = df.asof(dates) + assert result.notna().all(1).all() + lb = df.index[14] + ub = df.index[30] - dates = list(dates) + dates = list(dates) - result = df.asof(dates) - assert result.notna().all(1).all() + result = df.asof(dates) + assert result.notna().all(1).all() mask = (result.index >= lb) & (result.index < ub) rs = result[mask] @@ -49,46 +44,41 @@ def test_subset(self, date_range_frame): df.loc[4:8, "A"] = np.nan dates = date_range("1/1/1990", periods=N * 3, freq="25s") - with warnings.catch_warnings(): - warnings.simplefilter("ignore", FutureWarning) - - # with a subset of A should be the same - result = df.asof(dates, subset="A") - expected = df.asof(dates) - tm.assert_frame_equal(result, expected) + # with a subset of A should be the same + result = df.asof(dates, subset="A") + expected = df.asof(dates) + tm.assert_frame_equal(result, expected) - # same with A/B - result = df.asof(dates, subset=["A", "B"]) - expected = df.asof(dates) - tm.assert_frame_equal(result, expected) + # same with A/B + result = df.asof(dates, subset=["A", "B"]) + expected = df.asof(dates) + tm.assert_frame_equal(result, expected) - # B gives df.asof - result = df.asof(dates, subset="B") - expected = df.resample("25s", closed="right").ffill().reindex(dates) - expected.iloc[20:] = 9 + # B gives df.asof + result = df.asof(dates, subset="B") + expected = df.resample("25s", closed="right").ffill().reindex(dates) + expected.iloc[20:] = 9 - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_missing(self, date_range_frame): # GH 15118 # no match found - `where` value before earliest date in index N = 10 df = date_range_frame.iloc[:N].copy() - with warnings.catch_warnings(): - warnings.simplefilter("ignore", FutureWarning) - result = df.asof("1989-12-31") + result = df.asof("1989-12-31") - expected = Series( - index=["A", "B"], name=Timestamp("1989-12-31"), dtype=np.float64 - ) - tm.assert_series_equal(result, expected) + expected = Series( + index=["A", "B"], name=Timestamp("1989-12-31"), dtype=np.float64 + ) + tm.assert_series_equal(result, expected) - result = df.asof(to_datetime(["1989-12-31"])) - expected = DataFrame( - index=to_datetime(["1989-12-31"]), columns=["A", "B"], dtype="float64" - ) - tm.assert_frame_equal(result, expected) + result = df.asof(to_datetime(["1989-12-31"])) + expected = DataFrame( + index=to_datetime(["1989-12-31"]), columns=["A", "B"], dtype="float64" + ) + tm.assert_frame_equal(result, expected) def test_all_nans(self, date_range_frame): # GH 15713 @@ -144,8 +134,6 @@ def test_time_zone_aware_index(self, stamp, expected): Timestamp("2018-01-01 22:35:10.550+00:00"), ], ) - with warnings.catch_warnings(): - warnings.simplefilter("ignore", FutureWarning) - result = df.asof(stamp) - tm.assert_series_equal(result, expected) + result = df.asof(stamp) + tm.assert_series_equal(result, expected) From 6c1d495fca3db0666eac9c516c170aed470bc985 Mon Sep 17 00:00:00 2001 From: Ryan Nazareth Date: Sun, 5 Jan 2020 03:14:47 +0000 Subject: [PATCH 11/12] switch to is_copy default None in asof --- pandas/core/generic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 54d78e5d8e57c..bf183523ba358 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7023,7 +7023,8 @@ def asof(self, where, subset=None): # mask the missing missing = locs == -1 - data = self.take(locs, is_copy=False) + d = self.take(locs) + data = d.copy() data.index = where data.loc[missing] = np.nan return data if is_list else data.iloc[-1] From e914148c91c28255c723c0c3d9bb6a1922cbbe3e Mon Sep 17 00:00:00 2001 From: Ryan Nazareth Date: Mon, 6 Jan 2020 02:07:51 +0000 Subject: [PATCH 12/12] remove blank line --- doc/source/whatsnew/v1.0.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 202cd10ad0971..3f94894e1da3c 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -581,7 +581,6 @@ Deprecations - ``pandas.SparseArray`` has been deprecated. Use ``pandas.arrays.SparseArray`` (:class:`arrays.SparseArray`) instead. (:issue:`30642`) - The parameter ``is_copy`` of :meth:`DataFrame.take` has been deprecated and will be removed in a future version. (:issue:`27357`) - **Selecting Columns from a Grouped DataFrame** When selecting columns from a :class:`DataFrameGroupBy` object, passing individual keys (or a tuple of keys) inside single brackets is deprecated,