diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst index 1d9019ff22c23..1ade30faa123b 100644 --- a/doc/source/reference/frame.rst +++ b/doc/source/reference/frame.rst @@ -186,11 +186,9 @@ Reindexing / selection / label manipulation DataFrame.duplicated DataFrame.equals DataFrame.filter - DataFrame.first DataFrame.head DataFrame.idxmax DataFrame.idxmin - DataFrame.last DataFrame.reindex DataFrame.reindex_like DataFrame.rename diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst index a4ea0ec396ceb..28e7cf82b3478 100644 --- a/doc/source/reference/series.rst +++ b/doc/source/reference/series.rst @@ -183,12 +183,10 @@ Reindexing / selection / label manipulation Series.drop_duplicates Series.duplicated Series.equals - Series.first Series.head Series.idxmax Series.idxmin Series.isin - Series.last Series.reindex Series.reindex_like Series.rename diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 806a46c248e15..768bb9e99407a 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -102,13 +102,13 @@ Deprecations Removal of prior version deprecations/changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +- Removed :meth:`DataFrame.first` and :meth:`DataFrame.last` (:issue:`53710`) - Removed :meth:`DataFrameGroupby.fillna` and :meth:`SeriesGroupBy.fillna` (:issue:`55719`) - Removed ``DataFrameGroupBy.grouper`` and ``SeriesGroupBy.grouper`` (:issue:`56521`) - Removed ``axis`` argument from :meth:`DataFrame.groupby`, :meth:`Series.groupby`, :meth:`DataFrame.rolling`, :meth:`Series.rolling`, :meth:`DataFrame.resample`, and :meth:`Series.resample` (:issue:`51203`) - Removed ``axis`` argument from all groupby operations (:issue:`50405`) - Removed deprecated argument ``obj`` in :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` (:issue:`53545`) - Removed the ``ArrayManager`` (:issue:`55043`) -- .. --------------------------------------------------------------------------- .. _whatsnew_300.performance: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 490a47d16871c..676b3741f9843 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -36,7 +36,6 @@ from pandas._libs.lib import is_range_indexer from pandas._libs.tslibs import ( Period, - Tick, Timestamp, to_offset, ) @@ -9646,169 +9645,6 @@ def resample( group_keys=group_keys, ) - @final - def first(self, offset) -> Self: - """ - Select initial periods of time series data based on a date offset. - - .. deprecated:: 2.1 - :meth:`.first` is deprecated and will be removed in a future version. - Please create a mask and filter using `.loc` instead. - - For a DataFrame with a sorted DatetimeIndex, this function can - select the first few rows based on a date offset. - - Parameters - ---------- - offset : str, DateOffset or dateutil.relativedelta - The offset length of the data that will be selected. For instance, - '1ME' will display all the rows having their index within the first month. - - Returns - ------- - Series or DataFrame - A subset of the caller. - - Raises - ------ - TypeError - If the index is not a :class:`DatetimeIndex` - - See Also - -------- - last : Select final periods of time series based on a date offset. - at_time : Select values at a particular time of the day. - between_time : Select values between particular times of the day. - - Examples - -------- - >>> i = pd.date_range('2018-04-09', periods=4, freq='2D') - >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) - >>> ts - A - 2018-04-09 1 - 2018-04-11 2 - 2018-04-13 3 - 2018-04-15 4 - - Get the rows for the first 3 days: - - >>> ts.first('3D') - A - 2018-04-09 1 - 2018-04-11 2 - - Notice the data for 3 first calendar days were returned, not the first - 3 days observed in the dataset, and therefore data for 2018-04-13 was - not returned. - """ - warnings.warn( - "first is deprecated and will be removed in a future version. " - "Please create a mask and filter using `.loc` instead", - FutureWarning, - stacklevel=find_stack_level(), - ) - if not isinstance(self.index, DatetimeIndex): - raise TypeError("'first' only supports a DatetimeIndex index") - - if len(self.index) == 0: - return self.copy(deep=False) - - offset = to_offset(offset) - if not isinstance(offset, Tick) and offset.is_on_offset(self.index[0]): - # GH#29623 if first value is end of period, remove offset with n = 1 - # before adding the real offset - end_date = end = self.index[0] - offset.base + offset - else: - end_date = end = self.index[0] + offset - - # Tick-like, e.g. 3 weeks - if isinstance(offset, Tick) and end_date in self.index: - end = self.index.searchsorted(end_date, side="left") - return self.iloc[:end] - - return self.loc[:end] - - @final - def last(self, offset) -> Self: - """ - Select final periods of time series data based on a date offset. - - .. deprecated:: 2.1 - :meth:`.last` is deprecated and will be removed in a future version. - Please create a mask and filter using `.loc` instead. - - For a DataFrame with a sorted DatetimeIndex, this function - selects the last few rows based on a date offset. - - Parameters - ---------- - offset : str, DateOffset, dateutil.relativedelta - The offset length of the data that will be selected. For instance, - '3D' will display all the rows having their index within the last 3 days. - - Returns - ------- - Series or DataFrame - A subset of the caller. - - Raises - ------ - TypeError - If the index is not a :class:`DatetimeIndex` - - See Also - -------- - first : Select initial periods of time series based on a date offset. - at_time : Select values at a particular time of the day. - between_time : Select values between particular times of the day. - - Notes - ----- - .. deprecated:: 2.1.0 - Please create a mask and filter using `.loc` instead - - Examples - -------- - >>> i = pd.date_range('2018-04-09', periods=4, freq='2D') - >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) - >>> ts - A - 2018-04-09 1 - 2018-04-11 2 - 2018-04-13 3 - 2018-04-15 4 - - Get the rows for the last 3 days: - - >>> ts.last('3D') # doctest: +SKIP - A - 2018-04-13 3 - 2018-04-15 4 - - Notice the data for 3 last calendar days were returned, not the last - 3 observed days in the dataset, and therefore data for 2018-04-11 was - not returned. - """ - warnings.warn( - "last is deprecated and will be removed in a future version. " - "Please create a mask and filter using `.loc` instead", - FutureWarning, - stacklevel=find_stack_level(), - ) - - if not isinstance(self.index, DatetimeIndex): - raise TypeError("'last' only supports a DatetimeIndex index") - - if len(self.index) == 0: - return self.copy(deep=False) - - offset = to_offset(offset) - - start_date = self.index[-1] - offset - start = self.index.searchsorted(start_date, side="right") - return self.iloc[start:] - @final def rank( self, diff --git a/pandas/tests/frame/methods/test_first_and_last.py b/pandas/tests/frame/methods/test_first_and_last.py deleted file mode 100644 index 2170cf254fbe6..0000000000000 --- a/pandas/tests/frame/methods/test_first_and_last.py +++ /dev/null @@ -1,139 +0,0 @@ -""" -Note: includes tests for `last` -""" -import numpy as np -import pytest - -import pandas as pd -from pandas import ( - DataFrame, - Index, - bdate_range, - date_range, -) -import pandas._testing as tm - -deprecated_msg = "first is deprecated" -last_deprecated_msg = "last is deprecated" - - -class TestFirst: - def test_first_subset(self, frame_or_series): - ts = DataFrame( - np.random.default_rng(2).standard_normal((100, 4)), - columns=Index(list("ABCD"), dtype=object), - index=date_range("2000-01-01", periods=100, freq="12h"), - ) - ts = tm.get_obj(ts, frame_or_series) - with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): - result = ts.first("10d") - assert len(result) == 20 - - ts = DataFrame( - np.random.default_rng(2).standard_normal((100, 4)), - columns=Index(list("ABCD"), dtype=object), - index=date_range("2000-01-01", periods=100, freq="D"), - ) - ts = tm.get_obj(ts, frame_or_series) - with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): - result = ts.first("10d") - assert len(result) == 10 - - with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): - result = ts.first("3ME") - expected = ts[:"3/31/2000"] - tm.assert_equal(result, expected) - - with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): - result = ts.first("21D") - expected = ts[:21] - tm.assert_equal(result, expected) - - with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): - result = ts[:0].first("3ME") - tm.assert_equal(result, ts[:0]) - - def test_first_last_raises(self, frame_or_series): - # GH#20725 - obj = DataFrame([[1, 2, 3], [4, 5, 6]]) - obj = tm.get_obj(obj, frame_or_series) - - msg = "'first' only supports a DatetimeIndex index" - with tm.assert_produces_warning( - FutureWarning, match=deprecated_msg - ), pytest.raises(TypeError, match=msg): # index is not a DatetimeIndex - obj.first("1D") - - msg = "'last' only supports a DatetimeIndex index" - with tm.assert_produces_warning( - FutureWarning, match=last_deprecated_msg - ), pytest.raises(TypeError, match=msg): # index is not a DatetimeIndex - obj.last("1D") - - def test_last_subset(self, frame_or_series): - ts = DataFrame( - np.random.default_rng(2).standard_normal((100, 4)), - columns=Index(list("ABCD"), dtype=object), - index=date_range("2000-01-01", periods=100, freq="12h"), - ) - ts = tm.get_obj(ts, frame_or_series) - with tm.assert_produces_warning(FutureWarning, match=last_deprecated_msg): - result = ts.last("10d") - assert len(result) == 20 - - ts = DataFrame( - np.random.default_rng(2).standard_normal((30, 4)), - columns=Index(list("ABCD"), dtype=object), - index=date_range("2000-01-01", periods=30, freq="D"), - ) - ts = tm.get_obj(ts, frame_or_series) - with tm.assert_produces_warning(FutureWarning, match=last_deprecated_msg): - result = ts.last("10d") - assert len(result) == 10 - - with tm.assert_produces_warning(FutureWarning, match=last_deprecated_msg): - result = ts.last("21D") - expected = ts["2000-01-10":] - tm.assert_equal(result, expected) - - with tm.assert_produces_warning(FutureWarning, match=last_deprecated_msg): - result = ts.last("21D") - expected = ts[-21:] - tm.assert_equal(result, expected) - - with tm.assert_produces_warning(FutureWarning, match=last_deprecated_msg): - result = ts[:0].last("3ME") - tm.assert_equal(result, ts[:0]) - - @pytest.mark.parametrize("start, periods", [("2010-03-31", 1), ("2010-03-30", 2)]) - def test_first_with_first_day_last_of_month(self, frame_or_series, start, periods): - # GH#29623 - x = frame_or_series([1] * 100, index=bdate_range(start, periods=100)) - with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): - result = x.first("1ME") - expected = frame_or_series( - [1] * periods, index=bdate_range(start, periods=periods) - ) - tm.assert_equal(result, expected) - - def test_first_with_first_day_end_of_frq_n_greater_one(self, frame_or_series): - # GH#29623 - x = frame_or_series([1] * 100, index=bdate_range("2010-03-31", periods=100)) - with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): - result = x.first("2ME") - expected = frame_or_series( - [1] * 23, index=bdate_range("2010-03-31", "2010-04-30") - ) - tm.assert_equal(result, expected) - - def test_empty_not_input(self): - # GH#51032 - df = DataFrame(index=pd.DatetimeIndex([])) - with tm.assert_produces_warning(FutureWarning, match=last_deprecated_msg): - result = df.last(offset=1) - - with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): - result = df.first(offset=1) - - tm.assert_frame_equal(df, result) - assert df is not result diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index f25e7d4ab8c79..7cf5ccc4ed24f 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -8,7 +8,6 @@ import pytest import pandas as pd -import pandas._testing as tm # TODO: # * Binary methods (mul, div, etc.) @@ -303,16 +302,6 @@ ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), operator.methodcaller("between_time", "12:00", "13:00"), ), - ( - pd.Series, - (1, pd.date_range("2000", periods=4)), - operator.methodcaller("last", "3D"), - ), - ( - pd.DataFrame, - ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), - operator.methodcaller("last", "3D"), - ), (pd.Series, ([1, 2],), operator.methodcaller("rank")), (pd.DataFrame, frame_data, operator.methodcaller("rank")), (pd.Series, ([1, 2],), operator.methodcaller("where", np.array([True, False]))), @@ -388,7 +377,6 @@ def idfn(x): @pytest.mark.filterwarnings( "ignore:DataFrame.fillna with 'method' is deprecated:FutureWarning", - "ignore:last is deprecated:FutureWarning", ) @pytest.mark.parametrize("ndframe_method", _all_methods, ids=lambda x: idfn(x[-1])) def test_finalize_called(ndframe_method): @@ -401,39 +389,6 @@ def test_finalize_called(ndframe_method): assert result.attrs == {"a": 1} -@pytest.mark.parametrize( - "data", - [ - pd.Series(1, pd.date_range("2000", periods=4)), - pd.DataFrame({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), - ], -) -def test_finalize_first(data): - deprecated_msg = "first is deprecated" - - data.attrs = {"a": 1} - with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): - result = data.first("3D") - assert result.attrs == {"a": 1} - - -@pytest.mark.parametrize( - "data", - [ - pd.Series(1, pd.date_range("2000", periods=4)), - pd.DataFrame({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), - ], -) -def test_finalize_last(data): - # GH 53710 - deprecated_msg = "last is deprecated" - - data.attrs = {"a": 1} - with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): - result = data.last("3D") - assert result.attrs == {"a": 1} - - @not_implemented_mark def test_finalize_called_eval_numexpr(): pytest.importorskip("numexpr")