From 83670886c112e8c2b24ba257ee1208ff677383a9 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 22 Nov 2020 13:39:21 +0100 Subject: [PATCH 1/5] Bug in first with offset --- pandas/core/generic.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3aa692c5d3d43..c6e5ab05c6379 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8395,7 +8395,10 @@ def first(self: FrameOrSeries, offset) -> FrameOrSeries: return self offset = to_offset(offset) - end_date = end = self.index[0] + offset + if offset.is_on_offset(self.index[0]): + end_date = end = self.index[0] + else: + end_date = end = self.index[0] + offset # Tick-like, e.g. 3 weeks if isinstance(offset, Tick): From 2b691040dcdcf84e8239f67c5a356e6ab3eb46c7 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 6 Dec 2020 18:56:28 +0100 Subject: [PATCH 2/5] Add tests --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/tests/frame/methods/test_first_and_last.py | 12 +++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 04db52c5bfa13..76104bd15329f 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -538,6 +538,7 @@ Datetimelike - Bug in :meth:`DatetimeArray.shift` incorrectly allowing ``fill_value`` with a mismatched timezone (:issue:`37299`) - Bug in adding a :class:`BusinessDay` with nonzero ``offset`` to a non-scalar other (:issue:`37457`) - Bug in :func:`to_datetime` with a read-only array incorrectly raising (:issue:`34857`) +- Bug in :meth:`DataFrame.first` and :meth:`Series.first` returning two months for offset one month when first day is last calendar day (:issue:`29623`) Timedelta ^^^^^^^^^ diff --git a/pandas/tests/frame/methods/test_first_and_last.py b/pandas/tests/frame/methods/test_first_and_last.py index d21e1eee54e16..4ee7016004137 100644 --- a/pandas/tests/frame/methods/test_first_and_last.py +++ b/pandas/tests/frame/methods/test_first_and_last.py @@ -3,7 +3,7 @@ """ import pytest -from pandas import DataFrame +from pandas import DataFrame, bdate_range import pandas._testing as tm @@ -69,3 +69,13 @@ def test_last_subset(self, frame_or_series): result = ts[:0].last("3M") tm.assert_equal(result, ts[:0]) + + @pytest.mark.parametrize("start, periods", [("2010-03-31", 1), ("2010-03-30", 2)]) + def test_first_with_first_day_last_of_month(self, frame_or_series, start, periods): + # GH#29623 + x = frame_or_series([1] * 100, index=bdate_range(start, periods=100)) + result = x.first("1M") + expected = frame_or_series( + [1] * periods, index=bdate_range(start, periods=periods) + ) + tm.assert_equal(result, expected) From ba727d25095e8d66e7f94fa6e000d21e1b901813 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 6 Dec 2020 20:14:09 +0100 Subject: [PATCH 3/5] Improve if condition --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6be8e4461a79d..ff5645072699e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8426,7 +8426,7 @@ def first(self: FrameOrSeries, offset) -> FrameOrSeries: return self offset = to_offset(offset) - if offset.is_on_offset(self.index[0]): + if offset._day_opt == "end" and offset.is_on_offset(self.index[0]): end_date = end = self.index[0] else: end_date = end = self.index[0] + offset From 379e2d29980a5ba28812185b07d5b5aebd4ba636 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 11 Dec 2020 18:30:34 +0100 Subject: [PATCH 4/5] Use tick --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ff5645072699e..9d375455dd237 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8426,7 +8426,7 @@ def first(self: FrameOrSeries, offset) -> FrameOrSeries: return self offset = to_offset(offset) - if offset._day_opt == "end" and offset.is_on_offset(self.index[0]): + if not isinstance(offset, Tick) and offset.is_on_offset(self.index[0]): end_date = end = self.index[0] else: end_date = end = self.index[0] + offset From 1da3f6e3da9efa5cfacf342575804d769ab4b675 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 12 Dec 2020 01:57:42 +0100 Subject: [PATCH 5/5] Add gh refernce --- pandas/core/generic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9d375455dd237..26a09058c1c52 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8427,6 +8427,7 @@ def first(self: FrameOrSeries, offset) -> FrameOrSeries: offset = to_offset(offset) if not isinstance(offset, Tick) and offset.is_on_offset(self.index[0]): + # GH#29623 if first value is end of period end_date = end = self.index[0] else: end_date = end = self.index[0] + offset