Skip to content

DEPR: Series.first() and DataFrame.first() #53419

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jun 1, 2023
6 changes: 6 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -9162,6 +9162,12 @@ def first(self, offset) -> Self:
3 days observed in the dataset, and therefore data for 2018-04-13 was
not returned.
"""
# GH45908 & GH#52487
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need the issue numbers here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks

warnings.warn(
"first is deprecated and will be removed in a future version",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shall we advise users what to do instead? something like "please create a mask and filter using .loc instead"

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes

FutureWarning,
stacklevel=find_stack_level(),
)
if not isinstance(self.index, DatetimeIndex):
raise TypeError("'first' only supports a DatetimeIndex index")

Expand Down
70 changes: 40 additions & 30 deletions pandas/tests/frame/methods/test_first_and_last.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,38 +10,43 @@
)
import pandas._testing as tm

deprecated_msg = "first is deprecated"


class TestFirst:
def test_first_subset(self, frame_or_series):
ts = tm.makeTimeDataFrame(freq="12h")
ts = tm.get_obj(ts, frame_or_series)
result = ts.first("10d")
assert len(result) == 20
with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could we perhaps just do something like

        ts = tm.makeTimeDataFrame(freq="12h")
        ts = tm.get_obj(ts, frame_or_series)
        with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
            result = ts.first("10d")

        ts = tm.makeTimeDataFrame(freq="D")
        ts = tm.get_obj(ts, frame_or_series)
        with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
            result = ts.first("10d")

so that within each assert_produces_warning, there is just a single statement?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure 🫣

result = ts.first("10d")

ts = tm.makeTimeDataFrame(freq="D")
ts = tm.get_obj(ts, frame_or_series)
result = ts.first("10d")
assert len(result) == 10
assert len(result) == 20

result = ts.first("3M")
expected = ts[:"3/31/2000"]
tm.assert_equal(result, expected)
ts = tm.makeTimeDataFrame(freq="D")
ts = tm.get_obj(ts, frame_or_series)
result = ts.first("10d")
assert len(result) == 10

result = ts.first("21D")
expected = ts[:21]
tm.assert_equal(result, expected)
result = ts.first("3M")
expected = ts[:"3/31/2000"]
tm.assert_equal(result, expected)

result = ts[:0].first("3M")
tm.assert_equal(result, ts[:0])
result = ts.first("21D")
expected = ts[:21]
tm.assert_equal(result, expected)

result = ts[:0].first("3M")
tm.assert_equal(result, ts[:0])

def test_first_last_raises(self, frame_or_series):
# GH#20725
obj = DataFrame([[1, 2, 3], [4, 5, 6]])
obj = tm.get_obj(obj, frame_or_series)

msg = "'first' only supports a DatetimeIndex index"
with pytest.raises(TypeError, match=msg): # index is not a DatetimeIndex
obj.first("1D")
with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
with pytest.raises(TypeError, match=msg): # index is not a DatetimeIndex
obj.first("1D")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is fine, though an alternative would be

with (
    tm.assert_produces_warning(FutureWarning, match=deprecated_msg),
    pytest.raises(TypeError, match=msg)
):
    obj.first('1D')

There's a ruff check for this https://beta.ruff.rs/docs/rules/multiple-with-statements/ , in case you wanted to make a separate issue about it and we can enable it globally in a separate PR

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I opened a new issue.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Imo this looks very ugly as soon as one of both statements gets longer. I would prefer keeping them separate for tests at least

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@phofl Separate like this you mean?:

with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
            with pytest.raises(TypeError, match=msg):  # index is not a DatetimeIndex
                obj.first("1D")


msg = "'last' only supports a DatetimeIndex index"
with pytest.raises(TypeError, match=msg): # index is not a DatetimeIndex
Expand Down Expand Up @@ -73,25 +78,30 @@ def test_last_subset(self, frame_or_series):
def test_first_with_first_day_last_of_month(self, frame_or_series, start, periods):
# GH#29623
x = frame_or_series([1] * 100, index=bdate_range(start, periods=100))
result = x.first("1M")
expected = frame_or_series(
[1] * periods, index=bdate_range(start, periods=periods)
)
tm.assert_equal(result, expected)
with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
result = x.first("1M")
expected = frame_or_series(
[1] * periods, index=bdate_range(start, periods=periods)
)
tm.assert_equal(result, expected)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's take this out of the with context


def test_first_with_first_day_end_of_frq_n_greater_one(self, frame_or_series):
# GH#29623
x = frame_or_series([1] * 100, index=bdate_range("2010-03-31", periods=100))
result = x.first("2M")
expected = frame_or_series(
[1] * 23, index=bdate_range("2010-03-31", "2010-04-30")
)
tm.assert_equal(result, expected)

@pytest.mark.parametrize("func", ["first", "last"])
def test_empty_not_input(self, func):
with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
result = x.first("2M")
expected = frame_or_series(
[1] * 23, index=bdate_range("2010-03-31", "2010-04-30")
)
tm.assert_equal(result, expected)

def test_empty_not_input(self):
# GH#51032
df = DataFrame(index=pd.DatetimeIndex([]))
result = getattr(df, func)(offset=1)
result = getattr(df, "last")(offset=1)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if we're doing getattr with a literal, we may as well just write df.last

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, thanks


with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
result = getattr(df, "first")(offset=1)

tm.assert_frame_equal(df, result)
assert df is not result