From 8de4ffa0855f5d711ad2b476493a9f9ac449f449 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Sat, 27 May 2023 19:38:15 +0200 Subject: [PATCH 1/8] Deprecating first() --- pandas/core/generic.py | 6 ++ .../frame/methods/test_first_and_last.py | 70 +++++++++++-------- 2 files changed, 46 insertions(+), 30 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index bcfbfa1a2b713..d9168b7387b53 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9162,6 +9162,12 @@ def first(self, offset) -> Self: 3 days observed in the dataset, and therefore data for 2018-04-13 was not returned. """ + # GH45908 & GH#52487 + warnings.warn( + "first is deprecated and will be removed in a future version", + FutureWarning, + stacklevel=find_stack_level(), + ) if not isinstance(self.index, DatetimeIndex): raise TypeError("'first' only supports a DatetimeIndex index") diff --git a/pandas/tests/frame/methods/test_first_and_last.py b/pandas/tests/frame/methods/test_first_and_last.py index 64f6665ecd709..c15bd9b64a80e 100644 --- a/pandas/tests/frame/methods/test_first_and_last.py +++ b/pandas/tests/frame/methods/test_first_and_last.py @@ -10,29 +10,33 @@ ) import pandas._testing as tm +deprecated_msg = "first is deprecated" + class TestFirst: def test_first_subset(self, frame_or_series): ts = tm.makeTimeDataFrame(freq="12h") ts = tm.get_obj(ts, frame_or_series) - result = ts.first("10d") - assert len(result) == 20 + with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): + result = ts.first("10d") - ts = tm.makeTimeDataFrame(freq="D") - ts = tm.get_obj(ts, frame_or_series) - result = ts.first("10d") - assert len(result) == 10 + assert len(result) == 20 - result = ts.first("3M") - expected = ts[:"3/31/2000"] - tm.assert_equal(result, expected) + ts = tm.makeTimeDataFrame(freq="D") + ts = tm.get_obj(ts, frame_or_series) + result = ts.first("10d") + assert len(result) == 10 - result = ts.first("21D") - expected = ts[:21] - tm.assert_equal(result, expected) + result = ts.first("3M") + expected = ts[:"3/31/2000"] + tm.assert_equal(result, expected) - result = ts[:0].first("3M") - tm.assert_equal(result, ts[:0]) + result = ts.first("21D") + expected = ts[:21] + tm.assert_equal(result, expected) + + result = ts[:0].first("3M") + tm.assert_equal(result, ts[:0]) def test_first_last_raises(self, frame_or_series): # GH#20725 @@ -40,8 +44,9 @@ def test_first_last_raises(self, frame_or_series): obj = tm.get_obj(obj, frame_or_series) msg = "'first' only supports a DatetimeIndex index" - with pytest.raises(TypeError, match=msg): # index is not a DatetimeIndex - obj.first("1D") + with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): + with pytest.raises(TypeError, match=msg): # index is not a DatetimeIndex + obj.first("1D") msg = "'last' only supports a DatetimeIndex index" with pytest.raises(TypeError, match=msg): # index is not a DatetimeIndex @@ -73,25 +78,30 @@ def test_last_subset(self, frame_or_series): def test_first_with_first_day_last_of_month(self, frame_or_series, start, periods): # GH#29623 x = frame_or_series([1] * 100, index=bdate_range(start, periods=100)) - result = x.first("1M") - expected = frame_or_series( - [1] * periods, index=bdate_range(start, periods=periods) - ) - tm.assert_equal(result, expected) + with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): + result = x.first("1M") + expected = frame_or_series( + [1] * periods, index=bdate_range(start, periods=periods) + ) + tm.assert_equal(result, expected) def test_first_with_first_day_end_of_frq_n_greater_one(self, frame_or_series): # GH#29623 x = frame_or_series([1] * 100, index=bdate_range("2010-03-31", periods=100)) - result = x.first("2M") - expected = frame_or_series( - [1] * 23, index=bdate_range("2010-03-31", "2010-04-30") - ) - tm.assert_equal(result, expected) - - @pytest.mark.parametrize("func", ["first", "last"]) - def test_empty_not_input(self, func): + with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): + result = x.first("2M") + expected = frame_or_series( + [1] * 23, index=bdate_range("2010-03-31", "2010-04-30") + ) + tm.assert_equal(result, expected) + + def test_empty_not_input(self): # GH#51032 df = DataFrame(index=pd.DatetimeIndex([])) - result = getattr(df, func)(offset=1) + result = getattr(df, "last")(offset=1) + + with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): + result = getattr(df, "first")(offset=1) + tm.assert_frame_equal(df, result) assert df is not result From 96ce1156d15b922870ba118a09bc9874e73c8b07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Mon, 29 May 2023 15:28:32 +0200 Subject: [PATCH 2/8] Added requested changes --- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/core/generic.py | 4 +- .../frame/methods/test_first_and_last.py | 44 ++++++++++++------- 3 files changed, 31 insertions(+), 19 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 2c5263f447951..8795ded0fb25e 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -269,7 +269,7 @@ Deprecations - Deprecated behavior of :func:`assert_series_equal` and :func:`assert_frame_equal` considering NA-like values (e.g. ``NaN`` vs ``None`` as equivalent) (:issue:`52081`) - Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`) - Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`) -- +- Deprecated :meth:`Series.first` and :meth:`DataFrame.fist` (:issue:`45908`), create a mask and filter using ``.loc`` instead .. --------------------------------------------------------------------------- .. _whatsnew_210.performance: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d9168b7387b53..06b5e339e96d1 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9162,9 +9162,9 @@ def first(self, offset) -> Self: 3 days observed in the dataset, and therefore data for 2018-04-13 was not returned. """ - # GH45908 & GH#52487 warnings.warn( - "first is deprecated and will be removed in a future version", + "first is deprecated and will be removed in a future version" + "please create a mask and filter using `.loc` instead", FutureWarning, stacklevel=find_stack_level(), ) diff --git a/pandas/tests/frame/methods/test_first_and_last.py b/pandas/tests/frame/methods/test_first_and_last.py index c15bd9b64a80e..3bf4bceca003e 100644 --- a/pandas/tests/frame/methods/test_first_and_last.py +++ b/pandas/tests/frame/methods/test_first_and_last.py @@ -19,22 +19,31 @@ def test_first_subset(self, frame_or_series): ts = tm.get_obj(ts, frame_or_series) with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): result = ts.first("10d") - assert len(result) == 20 - ts = tm.makeTimeDataFrame(freq="D") - ts = tm.get_obj(ts, frame_or_series) + ts = tm.makeTimeDataFrame(freq="D") + ts = tm.get_obj(ts, frame_or_series) + with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): result = ts.first("10d") assert len(result) == 10 + ts = tm.makeTimeDataFrame(freq="D") + ts = tm.get_obj(ts, frame_or_series) + with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): result = ts.first("3M") expected = ts[:"3/31/2000"] tm.assert_equal(result, expected) + ts = tm.makeTimeDataFrame(freq="D") + ts = tm.get_obj(ts, frame_or_series) + with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): result = ts.first("21D") expected = ts[:21] tm.assert_equal(result, expected) + ts = tm.makeTimeDataFrame(freq="D") + ts = tm.get_obj(ts, frame_or_series) + with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): result = ts[:0].first("3M") tm.assert_equal(result, ts[:0]) @@ -44,9 +53,12 @@ def test_first_last_raises(self, frame_or_series): obj = tm.get_obj(obj, frame_or_series) msg = "'first' only supports a DatetimeIndex index" - with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): - with pytest.raises(TypeError, match=msg): # index is not a DatetimeIndex - obj.first("1D") + with tm.assert_produces_warning( + FutureWarning, match=deprecated_msg + ), pytest.raises( + TypeError, match=msg + ): # index is not a DatetimeIndex + obj.first("1D") msg = "'last' only supports a DatetimeIndex index" with pytest.raises(TypeError, match=msg): # index is not a DatetimeIndex @@ -80,28 +92,28 @@ def test_first_with_first_day_last_of_month(self, frame_or_series, start, period x = frame_or_series([1] * 100, index=bdate_range(start, periods=100)) with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): result = x.first("1M") - expected = frame_or_series( - [1] * periods, index=bdate_range(start, periods=periods) - ) - tm.assert_equal(result, expected) + expected = frame_or_series( + [1] * periods, index=bdate_range(start, periods=periods) + ) + tm.assert_equal(result, expected) def test_first_with_first_day_end_of_frq_n_greater_one(self, frame_or_series): # GH#29623 x = frame_or_series([1] * 100, index=bdate_range("2010-03-31", periods=100)) with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): result = x.first("2M") - expected = frame_or_series( - [1] * 23, index=bdate_range("2010-03-31", "2010-04-30") - ) - tm.assert_equal(result, expected) + expected = frame_or_series( + [1] * 23, index=bdate_range("2010-03-31", "2010-04-30") + ) + tm.assert_equal(result, expected) def test_empty_not_input(self): # GH#51032 df = DataFrame(index=pd.DatetimeIndex([])) - result = getattr(df, "last")(offset=1) + result = df.last(offset=1) with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): - result = getattr(df, "first")(offset=1) + result = df.first(offset=1) tm.assert_frame_equal(df, result) assert df is not result From 75ee3b1e8da2d538654f053dc2f50868de63eb2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Mon, 29 May 2023 16:17:44 +0200 Subject: [PATCH 3/8] removed repeated ts --- pandas/tests/frame/methods/test_first_and_last.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pandas/tests/frame/methods/test_first_and_last.py b/pandas/tests/frame/methods/test_first_and_last.py index 3bf4bceca003e..18173f7c66198 100644 --- a/pandas/tests/frame/methods/test_first_and_last.py +++ b/pandas/tests/frame/methods/test_first_and_last.py @@ -27,22 +27,16 @@ def test_first_subset(self, frame_or_series): result = ts.first("10d") assert len(result) == 10 - ts = tm.makeTimeDataFrame(freq="D") - ts = tm.get_obj(ts, frame_or_series) with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): result = ts.first("3M") expected = ts[:"3/31/2000"] tm.assert_equal(result, expected) - ts = tm.makeTimeDataFrame(freq="D") - ts = tm.get_obj(ts, frame_or_series) with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): result = ts.first("21D") expected = ts[:21] tm.assert_equal(result, expected) - ts = tm.makeTimeDataFrame(freq="D") - ts = tm.get_obj(ts, frame_or_series) with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): result = ts[:0].first("3M") tm.assert_equal(result, ts[:0]) From 6cd46c870609700d24a14d50381183460e3d1138 Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Mon, 29 May 2023 15:39:10 +0100 Subject: [PATCH 4/8] Update doc/source/whatsnew/v2.1.0.rst --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 8795ded0fb25e..e8183e9bfc2ce 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -269,7 +269,7 @@ Deprecations - Deprecated behavior of :func:`assert_series_equal` and :func:`assert_frame_equal` considering NA-like values (e.g. ``NaN`` vs ``None`` as equivalent) (:issue:`52081`) - Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`) - Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`) -- Deprecated :meth:`Series.first` and :meth:`DataFrame.fist` (:issue:`45908`), create a mask and filter using ``.loc`` instead +- Deprecated :meth:`Series.first` and :meth:`DataFrame.first` (please create a mask and filter using ``.loc`` instead) (:issue:`45908`) .. --------------------------------------------------------------------------- .. _whatsnew_210.performance: From b8e30b451f86997125403944c4b4ca1a409c1c2c Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Mon, 29 May 2023 15:39:19 +0100 Subject: [PATCH 5/8] Update pandas/core/generic.py --- pandas/core/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 06b5e339e96d1..1adc331e3cd50 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9163,8 +9163,8 @@ def first(self, offset) -> Self: not returned. """ warnings.warn( - "first is deprecated and will be removed in a future version" - "please create a mask and filter using `.loc` instead", + "first is deprecated and will be removed in a future version. " + "Please create a mask and filter using `.loc` instead", FutureWarning, stacklevel=find_stack_level(), ) From 51e75fe57b9e7d89fc465cf41abae94d1dfb24dd Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 29 May 2023 15:24:26 +0100 Subject: [PATCH 6/8] pre-commit --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index e8183e9bfc2ce..52f016fbc12fa 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -264,12 +264,12 @@ Deprecations - Deprecated unused "closed" and "normalize" keywords in the :class:`DatetimeIndex` constructor (:issue:`52628`) - Deprecated unused "closed" keyword in the :class:`TimedeltaIndex` constructor (:issue:`52628`) - Deprecated logical operation between two non boolean :class:`Series` with different indexes always coercing the result to bool dtype. In a future version, this will maintain the return type of the inputs. (:issue:`52500`, :issue:`52538`) +- Deprecated :meth:`Series.first` and :meth:`DataFrame.first` (please create a mask and filter using ``.loc`` instead) (:issue:`45908`) - Deprecated allowing ``downcast`` keyword other than ``None``, ``False``, "infer", or a dict with these as values in :meth:`Series.fillna`, :meth:`DataFrame.fillna` (:issue:`40988`) - Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`) - Deprecated behavior of :func:`assert_series_equal` and :func:`assert_frame_equal` considering NA-like values (e.g. ``NaN`` vs ``None`` as equivalent) (:issue:`52081`) - Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`) - Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`) -- Deprecated :meth:`Series.first` and :meth:`DataFrame.first` (please create a mask and filter using ``.loc`` instead) (:issue:`45908`) .. --------------------------------------------------------------------------- .. _whatsnew_210.performance: From 8fe71ffd5b0637514bc6280afe8bd2a7568cb749 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Tue, 30 May 2023 10:53:04 +0200 Subject: [PATCH 7/8] Separated test for first() on test_finalize.py --- pandas/tests/generic/test_finalize.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 8159024966b0f..9dfa2c8a5a90a 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -8,6 +8,7 @@ import pytest import pandas as pd +import pandas._testing as tm # TODO: # * Binary methods (mul, div, etc.) @@ -333,16 +334,6 @@ ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), operator.methodcaller("between_time", "12:00", "13:00"), ), - ( - pd.Series, - (1, pd.date_range("2000", periods=4)), - operator.methodcaller("first", "3D"), - ), - ( - pd.DataFrame, - ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), - operator.methodcaller("first", "3D"), - ), ( pd.Series, (1, pd.date_range("2000", periods=4)), @@ -451,6 +442,22 @@ def test_finalize_called(ndframe_method): assert result.attrs == {"a": 1} +@pytest.mark.parametrize( + "data", + [ + pd.Series(1, pd.date_range("2000", periods=4)), + pd.DataFrame({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), + ], +) +def test_finalize_first(data): + deprecated_msg = "first is deprecated" + + data.attrs = {"a": 1} + with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): + result = data.first("3D") + assert result.attrs == {"a": 1} + + @not_implemented_mark def test_finalize_called_eval_numexpr(): pytest.importorskip("numexpr") From e195758c3e1f398405d84e4a29729de4f8b8304f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dea=20Mar=C3=ADa=20L=C3=A9on?= Date: Tue, 30 May 2023 20:01:27 +0200 Subject: [PATCH 8/8] Avoiding docstring on CI --- pandas/conftest.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/conftest.py b/pandas/conftest.py index 077939d2d05ce..7dab1714e0aa3 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -145,6 +145,11 @@ def pytest_collection_modifyitems(items, config) -> None: "(Series|DataFrame).bool is now deprecated and will be removed " "in future version of pandas", ), + ( + "pandas.core.generic.NDFrame.first", + "first is deprecated and will be removed in a future version. " + "Please create a mask and filter using `.loc` instead", + ), ] for item in items: