Skip to content

DEPR: Series.first() and DataFrame.first() #53419

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jun 1, 2023
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ Deprecations
- Deprecated behavior of :func:`assert_series_equal` and :func:`assert_frame_equal` considering NA-like values (e.g. ``NaN`` vs ``None`` as equivalent) (:issue:`52081`)
- Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`)
- Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`)
-
- Deprecated :meth:`Series.first` and :meth:`DataFrame.fist` (:issue:`45908`), create a mask and filter using ``.loc`` instead

.. ---------------------------------------------------------------------------
.. _whatsnew_210.performance:
Expand Down
6 changes: 6 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -9162,6 +9162,12 @@ def first(self, offset) -> Self:
3 days observed in the dataset, and therefore data for 2018-04-13 was
not returned.
"""
warnings.warn(
"first is deprecated and will be removed in a future version"
"please create a mask and filter using `.loc` instead",
FutureWarning,
stacklevel=find_stack_level(),
)
if not isinstance(self.index, DatetimeIndex):
raise TypeError("'first' only supports a DatetimeIndex index")

Expand Down
58 changes: 40 additions & 18 deletions pandas/tests/frame/methods/test_first_and_last.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,37 +10,54 @@
)
import pandas._testing as tm

deprecated_msg = "first is deprecated"


class TestFirst:
def test_first_subset(self, frame_or_series):
ts = tm.makeTimeDataFrame(freq="12h")
ts = tm.get_obj(ts, frame_or_series)
result = ts.first("10d")
assert len(result) == 20
with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could we perhaps just do something like

        ts = tm.makeTimeDataFrame(freq="12h")
        ts = tm.get_obj(ts, frame_or_series)
        with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
            result = ts.first("10d")

        ts = tm.makeTimeDataFrame(freq="D")
        ts = tm.get_obj(ts, frame_or_series)
        with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
            result = ts.first("10d")

so that within each assert_produces_warning, there is just a single statement?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure 🫣

result = ts.first("10d")
assert len(result) == 20

ts = tm.makeTimeDataFrame(freq="D")
ts = tm.get_obj(ts, frame_or_series)
result = ts.first("10d")
assert len(result) == 10
with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
result = ts.first("10d")
assert len(result) == 10

result = ts.first("3M")
expected = ts[:"3/31/2000"]
tm.assert_equal(result, expected)
ts = tm.makeTimeDataFrame(freq="D")
ts = tm.get_obj(ts, frame_or_series)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

doesn't look like this was in the original, could we keep it the same please (except for the with context)?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry - copied/pasted more lines than needed. I wasn't adding more tests, but still an error.. :(

with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
result = ts.first("3M")
expected = ts[:"3/31/2000"]
tm.assert_equal(result, expected)

result = ts.first("21D")
expected = ts[:21]
tm.assert_equal(result, expected)
ts = tm.makeTimeDataFrame(freq="D")
ts = tm.get_obj(ts, frame_or_series)
with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
result = ts.first("21D")
expected = ts[:21]
tm.assert_equal(result, expected)

result = ts[:0].first("3M")
tm.assert_equal(result, ts[:0])
ts = tm.makeTimeDataFrame(freq="D")
ts = tm.get_obj(ts, frame_or_series)
with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
result = ts[:0].first("3M")
tm.assert_equal(result, ts[:0])

def test_first_last_raises(self, frame_or_series):
# GH#20725
obj = DataFrame([[1, 2, 3], [4, 5, 6]])
obj = tm.get_obj(obj, frame_or_series)

msg = "'first' only supports a DatetimeIndex index"
with pytest.raises(TypeError, match=msg): # index is not a DatetimeIndex
with tm.assert_produces_warning(
FutureWarning, match=deprecated_msg
), pytest.raises(
TypeError, match=msg
): # index is not a DatetimeIndex
obj.first("1D")

msg = "'last' only supports a DatetimeIndex index"
Expand Down Expand Up @@ -73,7 +90,8 @@ def test_last_subset(self, frame_or_series):
def test_first_with_first_day_last_of_month(self, frame_or_series, start, periods):
# GH#29623
x = frame_or_series([1] * 100, index=bdate_range(start, periods=100))
result = x.first("1M")
with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
result = x.first("1M")
expected = frame_or_series(
[1] * periods, index=bdate_range(start, periods=periods)
)
Expand All @@ -82,16 +100,20 @@ def test_first_with_first_day_last_of_month(self, frame_or_series, start, period
def test_first_with_first_day_end_of_frq_n_greater_one(self, frame_or_series):
# GH#29623
x = frame_or_series([1] * 100, index=bdate_range("2010-03-31", periods=100))
result = x.first("2M")
with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
result = x.first("2M")
expected = frame_or_series(
[1] * 23, index=bdate_range("2010-03-31", "2010-04-30")
)
tm.assert_equal(result, expected)

@pytest.mark.parametrize("func", ["first", "last"])
def test_empty_not_input(self, func):
def test_empty_not_input(self):
# GH#51032
df = DataFrame(index=pd.DatetimeIndex([]))
result = getattr(df, func)(offset=1)
result = df.last(offset=1)

with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
result = df.first(offset=1)

tm.assert_frame_equal(df, result)
assert df is not result