diff --git a/doc/source/whatsnew/v2.0.1.rst b/doc/source/whatsnew/v2.0.1.rst index 2e8e2345d4c0a..6a333f00a63ba 100644 --- a/doc/source/whatsnew/v2.0.1.rst +++ b/doc/source/whatsnew/v2.0.1.rst @@ -38,6 +38,7 @@ Bug fixes - Bug in :meth:`Series.dt.tz_localize` incorrectly localizing timestamps with :class:`ArrowDtype` (:issue:`52677`) - Bug in logical and comparison operations between :class:`ArrowDtype` and numpy masked types (e.g. ``"boolean"``) (:issue:`52625`) - Fixed bug in :func:`merge` when merging with ``ArrowDtype`` one one and a NumPy dtype on the other side (:issue:`52406`) +- Fixed bug in :meth:`DataFrame.first` when used with a :class:`DateOffset` (:issue:`45908`) - Fixed segfault in :meth:`Series.to_numpy` with ``null[pyarrow]`` dtype (:issue:`52443`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index bf8cf831b942a..fb2479b2d58a8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -41,6 +41,7 @@ Period, Tick, Timestamp, + offsets, to_offset, ) from pandas._typing import ( @@ -9091,8 +9092,17 @@ def first(self, offset) -> Self: if len(self.index) == 0: return self.copy(deep=False) - offset = to_offset(offset) - if not isinstance(offset, Tick) and offset.is_on_offset(self.index[0]): + if isinstance(offset, offsets.DateOffset): + input_is_offset = True + else: + input_is_offset = False + offset = to_offset(offset) + + if ( + not isinstance(offset, Tick) + and offset.is_on_offset(self.index[0]) + and not input_is_offset + ): # GH#29623 if first value is end of period, remove offset with n = 1 # before adding the real offset end_date = end = self.index[0] - offset.base + offset @@ -9100,7 +9110,7 @@ def first(self, offset) -> Self: end_date = end = self.index[0] + offset # Tick-like, e.g. 3 weeks - if isinstance(offset, Tick) and end_date in self.index: + if isinstance(offset, Tick) or input_is_offset and end_date in self.index: end = self.index.searchsorted(end_date, side="left") return self.iloc[:end] diff --git a/pandas/tests/frame/methods/test_first_and_last.py b/pandas/tests/frame/methods/test_first_and_last.py index 64f6665ecd709..4e285832b6b8b 100644 --- a/pandas/tests/frame/methods/test_first_and_last.py +++ b/pandas/tests/frame/methods/test_first_and_last.py @@ -1,6 +1,7 @@ """ Note: includes tests for `last` """ +import numpy as np import pytest import pandas as pd @@ -95,3 +96,50 @@ def test_empty_not_input(self, func): result = getattr(df, func)(offset=1) tm.assert_frame_equal(df, result) assert df is not result + + @pytest.mark.parametrize("start, periods", [("2010-03-31", 1), ("2010-03-30", 2)]) + def test_last_day_of_months_with_date_offset(self, frame_or_series, start, periods): + x = frame_or_series([1] * 100, index=pd.date_range(start, periods=100)) + result = x.first(pd.DateOffset(days=periods)) + expected = frame_or_series( + [1] * periods, index=pd.date_range(start, periods=periods) + ) + tm.assert_equal(result, expected) + + def test_date_offset_multiple_days(self, frame_or_series): + x = frame_or_series([1] * 100, index=pd.date_range("2010-03-31", periods=100)) + result = x.first(pd.DateOffset(days=2)) + expected = frame_or_series( + [1] * 2, index=pd.date_range("2010-03-31", "2010-04-01") + ) + tm.assert_equal(result, expected) + + def test_first_with_date_offset(self): + # GH#51284 + i = pd.to_datetime(["2018-04-09", "2018-04-10", "2018-04-11", "2018-04-12"]) + x = DataFrame({"A": [1, 2, 3, 4]}, index=i) + result = x.first(pd.DateOffset(days=2)) + expected = DataFrame( + {"A": [1, 2]}, index=pd.to_datetime(["2018-04-09", "2018-04-10"]) + ) + tm.assert_equal(result, expected) + + def test_date_offset_15_days(self): + # GH#45908 + i = pd.date_range("2018-04-09", periods=30, freq="2D") + x = DataFrame({"A": np.arange(30)}, index=i) + result = x.first(pd.DateOffset(days=15)) + i2 = pd.date_range("2018-04-09", periods=8, freq="2D") + expected = DataFrame({"A": np.arange(8)}, index=i2) + tm.assert_equal(result, expected) + + def test_first_with_date_offset_months(self, frame_or_series): + periods = 40 + x = frame_or_series( + [1] * periods, index=pd.date_range("2010-03-31", periods=periods) + ) + result = x.first(pd.DateOffset(months=1)) + expected = frame_or_series( + [1] * 30, index=pd.date_range("2010-03-31", periods=30) + ) + tm.assert_equal(result, expected)