Skip to content

DEPR: Series.first() and DataFrame.first() #53419

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jun 1, 2023
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -264,12 +264,12 @@ Deprecations
- Deprecated unused "closed" and "normalize" keywords in the :class:`DatetimeIndex` constructor (:issue:`52628`)
- Deprecated unused "closed" keyword in the :class:`TimedeltaIndex` constructor (:issue:`52628`)
- Deprecated logical operation between two non boolean :class:`Series` with different indexes always coercing the result to bool dtype. In a future version, this will maintain the return type of the inputs. (:issue:`52500`, :issue:`52538`)
- Deprecated :meth:`Series.first` and :meth:`DataFrame.first` (please create a mask and filter using ``.loc`` instead) (:issue:`45908`)
- Deprecated allowing ``downcast`` keyword other than ``None``, ``False``, "infer", or a dict with these as values in :meth:`Series.fillna`, :meth:`DataFrame.fillna` (:issue:`40988`)
- Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`)
- Deprecated behavior of :func:`assert_series_equal` and :func:`assert_frame_equal` considering NA-like values (e.g. ``NaN`` vs ``None`` as equivalent) (:issue:`52081`)
- Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`)
- Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`)
-

.. ---------------------------------------------------------------------------
.. _whatsnew_210.performance:
Expand Down
5 changes: 5 additions & 0 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,11 @@ def pytest_collection_modifyitems(items, config) -> None:
"(Series|DataFrame).bool is now deprecated and will be removed "
"in future version of pandas",
),
(
"pandas.core.generic.NDFrame.first",
"first is deprecated and will be removed in a future version. "
"Please create a mask and filter using `.loc` instead",
),
]

for item in items:
Expand Down
6 changes: 6 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -9162,6 +9162,12 @@ def first(self, offset) -> Self:
3 days observed in the dataset, and therefore data for 2018-04-13 was
not returned.
"""
warnings.warn(
"first is deprecated and will be removed in a future version. "
"Please create a mask and filter using `.loc` instead",
FutureWarning,
stacklevel=find_stack_level(),
)
if not isinstance(self.index, DatetimeIndex):
raise TypeError("'first' only supports a DatetimeIndex index")

Expand Down
52 changes: 34 additions & 18 deletions pandas/tests/frame/methods/test_first_and_last.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,37 +10,48 @@
)
import pandas._testing as tm

deprecated_msg = "first is deprecated"


class TestFirst:
def test_first_subset(self, frame_or_series):
ts = tm.makeTimeDataFrame(freq="12h")
ts = tm.get_obj(ts, frame_or_series)
result = ts.first("10d")
assert len(result) == 20
with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could we perhaps just do something like

        ts = tm.makeTimeDataFrame(freq="12h")
        ts = tm.get_obj(ts, frame_or_series)
        with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
            result = ts.first("10d")

        ts = tm.makeTimeDataFrame(freq="D")
        ts = tm.get_obj(ts, frame_or_series)
        with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
            result = ts.first("10d")

so that within each assert_produces_warning, there is just a single statement?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure 🫣

result = ts.first("10d")
assert len(result) == 20

ts = tm.makeTimeDataFrame(freq="D")
ts = tm.get_obj(ts, frame_or_series)
result = ts.first("10d")
assert len(result) == 10
with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
result = ts.first("10d")
assert len(result) == 10

result = ts.first("3M")
expected = ts[:"3/31/2000"]
tm.assert_equal(result, expected)
with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
result = ts.first("3M")
expected = ts[:"3/31/2000"]
tm.assert_equal(result, expected)

result = ts.first("21D")
expected = ts[:21]
tm.assert_equal(result, expected)
with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
result = ts.first("21D")
expected = ts[:21]
tm.assert_equal(result, expected)

result = ts[:0].first("3M")
tm.assert_equal(result, ts[:0])
with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
result = ts[:0].first("3M")
tm.assert_equal(result, ts[:0])

def test_first_last_raises(self, frame_or_series):
# GH#20725
obj = DataFrame([[1, 2, 3], [4, 5, 6]])
obj = tm.get_obj(obj, frame_or_series)

msg = "'first' only supports a DatetimeIndex index"
with pytest.raises(TypeError, match=msg): # index is not a DatetimeIndex
with tm.assert_produces_warning(
FutureWarning, match=deprecated_msg
), pytest.raises(
TypeError, match=msg
): # index is not a DatetimeIndex
obj.first("1D")

msg = "'last' only supports a DatetimeIndex index"
Expand Down Expand Up @@ -73,7 +84,8 @@ def test_last_subset(self, frame_or_series):
def test_first_with_first_day_last_of_month(self, frame_or_series, start, periods):
# GH#29623
x = frame_or_series([1] * 100, index=bdate_range(start, periods=100))
result = x.first("1M")
with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
result = x.first("1M")
expected = frame_or_series(
[1] * periods, index=bdate_range(start, periods=periods)
)
Expand All @@ -82,16 +94,20 @@ def test_first_with_first_day_last_of_month(self, frame_or_series, start, period
def test_first_with_first_day_end_of_frq_n_greater_one(self, frame_or_series):
# GH#29623
x = frame_or_series([1] * 100, index=bdate_range("2010-03-31", periods=100))
result = x.first("2M")
with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
result = x.first("2M")
expected = frame_or_series(
[1] * 23, index=bdate_range("2010-03-31", "2010-04-30")
)
tm.assert_equal(result, expected)

@pytest.mark.parametrize("func", ["first", "last"])
def test_empty_not_input(self, func):
def test_empty_not_input(self):
# GH#51032
df = DataFrame(index=pd.DatetimeIndex([]))
result = getattr(df, func)(offset=1)
result = df.last(offset=1)

with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
result = df.first(offset=1)

tm.assert_frame_equal(df, result)
assert df is not result
27 changes: 17 additions & 10 deletions pandas/tests/generic/test_finalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import pytest

import pandas as pd
import pandas._testing as tm

# TODO:
# * Binary methods (mul, div, etc.)
Expand Down Expand Up @@ -333,16 +334,6 @@
({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)),
operator.methodcaller("between_time", "12:00", "13:00"),
),
(
pd.Series,
(1, pd.date_range("2000", periods=4)),
operator.methodcaller("first", "3D"),
),
(
pd.DataFrame,
({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)),
operator.methodcaller("first", "3D"),
),
(
pd.Series,
(1, pd.date_range("2000", periods=4)),
Expand Down Expand Up @@ -451,6 +442,22 @@ def test_finalize_called(ndframe_method):
assert result.attrs == {"a": 1}


@pytest.mark.parametrize(
"data",
[
pd.Series(1, pd.date_range("2000", periods=4)),
pd.DataFrame({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)),
],
)
def test_finalize_first(data):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice! good call to split this into a separate test

deprecated_msg = "first is deprecated"

data.attrs = {"a": 1}
with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
result = data.first("3D")
assert result.attrs == {"a": 1}


@not_implemented_mark
def test_finalize_called_eval_numexpr():
pytest.importorskip("numexpr")
Expand Down