From bab1995c0b73c868f210a3725603583bf7253a00 Mon Sep 17 00:00:00 2001 From: Steven Rotondo Date: Mon, 8 Aug 2022 10:50:05 -0700 Subject: [PATCH 1/9] BUG: Fixed ignoring of nanoseconds when adding to series #47856 --- doc/source/whatsnew/v1.5.0.rst | 1 + pandas/_libs/tslibs/offsets.pyx | 8 +++--- pandas/tests/tseries/offsets/test_offsets.py | 26 +++++++++++++++++++- 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 0b450fab53137..b93b3bd29442b 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -824,6 +824,7 @@ Datetimelike - Bug in :meth:`DatetimeIndex.resolution` incorrectly returning "day" instead of "nanosecond" for nanosecond-resolution indexes (:issue:`46903`) - Bug in :class:`Timestamp` with an integer or float value and ``unit="Y"`` or ``unit="M"`` giving slightly-wrong results (:issue:`47266`) - Bug in :class:`.DatetimeArray` construction when passed another :class:`.DatetimeArray` and ``freq=None`` incorrectly inferring the freq from the given array (:issue:`47296`) +- Bug in :class:`RelativeDeltaOffset` which caused adding a ``DateOffset`` to a ``Series`` to not add the ``nanoseconds`` field (:issue:`47856`) - Timedelta diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 81b59db6f0e18..460c615d265bc 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -297,8 +297,8 @@ _relativedelta_kwds = {"years", "months", "weeks", "days", "year", "month", cdef _determine_offset(kwds): # timedelta is used for sub-daily plural offsets and all singular - # offsets relativedelta is used for plural offsets of daily length or - # more nanosecond(s) are handled by apply_wraps + # offsets, relativedelta is used for plural offsets of daily length or + # more, nanosecond(s) are handled by apply_wraps kwds_no_nanos = dict( (k, v) for k, v in kwds.items() if k not in ('nanosecond', 'nanoseconds') @@ -1157,7 +1157,9 @@ cdef class RelativeDeltaOffset(BaseOffset): return dt64other elif not self._use_relativedelta and hasattr(self, "_offset"): # timedelta - delta = Timedelta(self._offset * self.n) + num_nano = getattr(self, "nanoseconds", 0) + rem_nano = Timedelta(nanoseconds=num_nano) + delta = Timedelta((self._offset + rem_nano) * self.n) td = (<_Timedelta>delta)._as_reso(reso) return dt64other + td else: diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index 49661fe1ec8ce..ebdf89de56930 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -33,6 +33,7 @@ from pandas import ( DatetimeIndex, + Series, date_range, ) import pandas._testing as tm @@ -987,7 +988,7 @@ def test_dateoffset_add_sub(offset_kwargs, expected_arg): assert result == expected -def test_dataoffset_add_sub_timestamp_with_nano(): +def test_dateoffset_add_sub_timestamp_with_nano(): offset = DateOffset(minutes=2, nanoseconds=9) ts = Timestamp(4) result = ts + offset @@ -1032,3 +1033,26 @@ def test_construct_int_arg_no_kwargs_assumed_days(n): result = Timestamp(2022, 1, 2) + offset expected = Timestamp(2022, 1, 2 + n) assert result == expected + + +@pytest.mark.parametrize( + "offset, expected", + [ + ( + DateOffset(minutes=7, nanoseconds=18), + Timestamp("2022-01-01 00:07:00.000000018"), + ), + (DateOffset(nanoseconds=3), Timestamp("2022-01-01 00:00:00.000000003")), + ], +) +def test_dateoffset_add_sub_timestamp_series_with_nano(offset, expected): + # GH 47856 + t = Timestamp("2022-01-01") + teststamp = t + s = Series([t]) + s = s + offset + assert s[0] == expected + s -= offset + assert s[0] == teststamp + s = offset + s + assert s[0] == expected From c62dde39b08699605680a71551040ba0dfdeab18 Mon Sep 17 00:00:00 2001 From: srotondo <97266896+srotondo@users.noreply.github.com> Date: Mon, 8 Aug 2022 14:14:46 -0700 Subject: [PATCH 2/9] Update doc/source/whatsnew/v1.5.0.rst Co-authored-by: Matthew Roeschke --- doc/source/whatsnew/v1.5.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index b93b3bd29442b..aece8caef65f1 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -824,7 +824,7 @@ Datetimelike - Bug in :meth:`DatetimeIndex.resolution` incorrectly returning "day" instead of "nanosecond" for nanosecond-resolution indexes (:issue:`46903`) - Bug in :class:`Timestamp` with an integer or float value and ``unit="Y"`` or ``unit="M"`` giving slightly-wrong results (:issue:`47266`) - Bug in :class:`.DatetimeArray` construction when passed another :class:`.DatetimeArray` and ``freq=None`` incorrectly inferring the freq from the given array (:issue:`47296`) -- Bug in :class:`RelativeDeltaOffset` which caused adding a ``DateOffset`` to a ``Series`` to not add the ``nanoseconds`` field (:issue:`47856`) +- Bug when adding a :class:`DateOffset` to a :class:`Series` would not add the ``nanoseconds`` field (:issue:`47856`) - Timedelta From ee8752f8ee1cd2180b7532d4b059101246f9bbf0 Mon Sep 17 00:00:00 2001 From: Steven Rotondo Date: Mon, 8 Aug 2022 14:20:05 -0700 Subject: [PATCH 3/9] BUG: Renamed test variables #47856 --- pandas/tests/tseries/offsets/test_offsets.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index ebdf89de56930..bca4ba98f37b7 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -1047,12 +1047,12 @@ def test_construct_int_arg_no_kwargs_assumed_days(n): ) def test_dateoffset_add_sub_timestamp_series_with_nano(offset, expected): # GH 47856 - t = Timestamp("2022-01-01") - teststamp = t - s = Series([t]) - s = s + offset - assert s[0] == expected - s -= offset - assert s[0] == teststamp - s = offset + s - assert s[0] == expected + start_time = Timestamp("2022-01-01") + teststamp = start_time + testseries = Series([start_time]) + testseries = testseries + offset + assert testseries[0] == expected + testseries -= offset + assert testseries[0] == teststamp + testseries = offset + testseries + assert testseries[0] == expected From e91a1d85d4d5700e33640f189a9c3935e3b79d7c Mon Sep 17 00:00:00 2001 From: Steven Rotondo Date: Wed, 17 Aug 2022 10:34:50 -0700 Subject: [PATCH 4/9] BUG: Fixed inconsistent offset behavior for series #43784 --- doc/source/whatsnew/v1.5.0.rst | 1 + pandas/core/arrays/datetimes.py | 10 +++++++-- pandas/tests/tseries/offsets/test_dst.py | 26 ++++++++++++++++++++++++ 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index aece8caef65f1..63093c4609ef0 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -825,6 +825,7 @@ Datetimelike - Bug in :class:`Timestamp` with an integer or float value and ``unit="Y"`` or ``unit="M"`` giving slightly-wrong results (:issue:`47266`) - Bug in :class:`.DatetimeArray` construction when passed another :class:`.DatetimeArray` and ``freq=None`` incorrectly inferring the freq from the given array (:issue:`47296`) - Bug when adding a :class:`DateOffset` to a :class:`Series` would not add the ``nanoseconds`` field (:issue:`47856`) +- Bug where adding a :class:`DateOffset` to a :class:`DatetimeIndex` or :class:`Series` over a Daylight Savings Time boundary would produce an incorrect result (:issue:`43784`) - Timedelta diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index c9f5946c30c8c..fc75bf96e8a4a 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -687,7 +687,10 @@ def _add_offset(self, offset) -> DatetimeArray: assert not isinstance(offset, Tick) if self.tz is not None: - values = self.tz_localize(None) + if not offset._use_relativedelta: + values = self.tz_convert("utc").tz_localize(None) + else: + values = self.tz_localize(None) else: values = self @@ -708,7 +711,10 @@ def _add_offset(self, offset) -> DatetimeArray: result = DatetimeArray._simple_new(result, dtype=result.dtype) if self.tz is not None: # FIXME: tz_localize with non-nano - result = result.tz_localize(self.tz) + if not offset._use_relativedelta: + result = result.tz_localize("utc").tz_convert(self.tz) + else: + result = result.tz_localize(self.tz) return result diff --git a/pandas/tests/tseries/offsets/test_dst.py b/pandas/tests/tseries/offsets/test_dst.py index 50c5a91fc2390..db837c1c09eee 100644 --- a/pandas/tests/tseries/offsets/test_dst.py +++ b/pandas/tests/tseries/offsets/test_dst.py @@ -30,6 +30,8 @@ YearEnd, ) +import pandas as pd +import pandas._testing as tm from pandas.tests.tseries.offsets.test_offsets import get_utc_offset_hours @@ -225,3 +227,27 @@ def test_nontick_offset_with_ambiguous_time_error(original_dt, target_dt, offset msg = f"Cannot infer dst time from {target_dt}, try using the 'ambiguous' argument" with pytest.raises(pytz.AmbiguousTimeError, match=msg): localized_dt + offset + + +def test_series_dst_addition(): + # GH#43784 + startdates = pd.Series( + [ + Timestamp("2020-10-25", tz="Europe/Berlin"), + Timestamp("2017-03-12", tz="US/Pacific"), + ] + ) + offset1 = DateOffset(hours=3) + offset2 = DateOffset(days=1) + + expected1 = pd.Series( + [Timestamp("2020-10-25 02:00:00+01:00"), Timestamp("2017-03-12 04:00:00-07:00")] + ) + + expected2 = pd.Series( + [Timestamp("2020-10-26 00:00:00+01:00"), Timestamp("2017-03-13 00:00:00-07:00")] + ) + + tm.assert_series_equal((startdates + offset1), expected1) + + tm.assert_series_equal((startdates + offset2), expected2) From c43f2be16ede2660da28f2b49849abc2ef630e2a Mon Sep 17 00:00:00 2001 From: Steven Rotondo Date: Mon, 22 Aug 2022 08:59:19 -0700 Subject: [PATCH 5/9] BUG: Fixed code change #43784 --- pandas/core/arrays/datetimes.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index fc75bf96e8a4a..fc97b2d28ed0c 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -39,6 +39,7 @@ tz_convert_from_utc, tzconversion, ) +from pandas._libs.tslibs.offsets import DateOffset from pandas._typing import npt from pandas.errors import ( OutOfBoundsDatetime, @@ -687,7 +688,7 @@ def _add_offset(self, offset) -> DatetimeArray: assert not isinstance(offset, Tick) if self.tz is not None: - if not offset._use_relativedelta: + if not offset._use_relativedelta and type(offset) == DateOffset: values = self.tz_convert("utc").tz_localize(None) else: values = self.tz_localize(None) @@ -711,7 +712,7 @@ def _add_offset(self, offset) -> DatetimeArray: result = DatetimeArray._simple_new(result, dtype=result.dtype) if self.tz is not None: # FIXME: tz_localize with non-nano - if not offset._use_relativedelta: + if not offset._use_relativedelta and type(offset) == DateOffset: result = result.tz_localize("utc").tz_convert(self.tz) else: result = result.tz_localize(self.tz) From 71ef3d2d48a457c0bf4974117b60b2d5fb6fcc8a Mon Sep 17 00:00:00 2001 From: Steven Rotondo Date: Tue, 13 Sep 2022 10:26:28 -0700 Subject: [PATCH 6/9] BUG: Moved release note and changed test #43784 --- doc/source/whatsnew/v1.5.0.rst | 1 - doc/source/whatsnew/v1.6.0.rst | 1 + pandas/tests/tseries/offsets/test_dst.py | 7 +++++-- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 7512b6d4a56f6..d81562e45f223 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -995,7 +995,6 @@ Datetimelike - Bug in :class:`Timestamp` with an integer or float value and ``unit="Y"`` or ``unit="M"`` giving slightly-wrong results (:issue:`47266`) - Bug in :class:`.DatetimeArray` construction when passed another :class:`.DatetimeArray` and ``freq=None`` incorrectly inferring the freq from the given array (:issue:`47296`) - Bug when adding a :class:`DateOffset` to a :class:`Series` would not add the ``nanoseconds`` field (:issue:`47856`) -- Bug where adding a :class:`DateOffset` to a :class:`DatetimeIndex` or :class:`Series` over a Daylight Savings Time boundary would produce an incorrect result (:issue:`43784`) - Bug in :func:`to_datetime` where ``OutOfBoundsDatetime`` would be thrown even if ``errors=coerce`` if there were more than 50 rows (:issue:`45319`) - diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index 07d406ae7d779..fce92f2aabc8f 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -127,6 +127,7 @@ Categorical Datetimelike ^^^^^^^^^^^^ - Bug in :func:`pandas.infer_freq`, raising ``TypeError`` when inferred on :class:`RangeIndex` (:issue:`47084`) +- Bug where adding a :class:`DateOffset` to a :class:`DatetimeIndex` or :class:`Series` over a Daylight Savings Time boundary would produce an incorrect result (:issue:`43784`) - Timedelta diff --git a/pandas/tests/tseries/offsets/test_dst.py b/pandas/tests/tseries/offsets/test_dst.py index 00775913b22af..9e61a119994ab 100644 --- a/pandas/tests/tseries/offsets/test_dst.py +++ b/pandas/tests/tseries/offsets/test_dst.py @@ -251,6 +251,9 @@ def test_series_dst_addition(): [Timestamp("2020-10-26 00:00:00+01:00"), Timestamp("2017-03-13 00:00:00-07:00")] ) - tm.assert_series_equal((startdates + offset1), expected1) + result1 = startdates + offset1 + result2 = startdates + offset2 - tm.assert_series_equal((startdates + offset2), expected2) + tm.assert_series_equal(result1, expected1) + + tm.assert_series_equal(result2, expected2) From b9a54799b7733d4ae4c4ece2a7b6451c874ed89d Mon Sep 17 00:00:00 2001 From: Steven Rotondo Date: Wed, 14 Sep 2022 10:37:06 -0700 Subject: [PATCH 7/9] BUG: Fixed accidental reordering #43784 --- doc/source/whatsnew/v1.5.0.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index d81562e45f223..1243b5c0596ad 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -994,8 +994,9 @@ Datetimelike - Bug in :meth:`DatetimeIndex.resolution` incorrectly returning "day" instead of "nanosecond" for nanosecond-resolution indexes (:issue:`46903`) - Bug in :class:`Timestamp` with an integer or float value and ``unit="Y"`` or ``unit="M"`` giving slightly-wrong results (:issue:`47266`) - Bug in :class:`.DatetimeArray` construction when passed another :class:`.DatetimeArray` and ``freq=None`` incorrectly inferring the freq from the given array (:issue:`47296`) -- Bug when adding a :class:`DateOffset` to a :class:`Series` would not add the ``nanoseconds`` field (:issue:`47856`) - Bug in :func:`to_datetime` where ``OutOfBoundsDatetime`` would be thrown even if ``errors=coerce`` if there were more than 50 rows (:issue:`45319`) +- Bug when adding a :class:`DateOffset` to a :class:`Series` would not add the ``nanoseconds`` field (:issue:`47856`) +- Bug where adding a :class:`DateOffset` to a :class:`DatetimeIndex` or :class:`Series` over a Daylight Savings Time boundary would produce an incorrect result (:issue:`43784`) - Timedelta From 7672f40ad9b46bb7be4d00aa02df0dc41f62d55b Mon Sep 17 00:00:00 2001 From: Steven Rotondo Date: Wed, 14 Sep 2022 10:39:07 -0700 Subject: [PATCH 8/9] BUG: Unintentional change #43784 --- doc/source/whatsnew/v1.5.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 1243b5c0596ad..d8a319da2065e 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -996,7 +996,6 @@ Datetimelike - Bug in :class:`.DatetimeArray` construction when passed another :class:`.DatetimeArray` and ``freq=None`` incorrectly inferring the freq from the given array (:issue:`47296`) - Bug in :func:`to_datetime` where ``OutOfBoundsDatetime`` would be thrown even if ``errors=coerce`` if there were more than 50 rows (:issue:`45319`) - Bug when adding a :class:`DateOffset` to a :class:`Series` would not add the ``nanoseconds`` field (:issue:`47856`) -- Bug where adding a :class:`DateOffset` to a :class:`DatetimeIndex` or :class:`Series` over a Daylight Savings Time boundary would produce an incorrect result (:issue:`43784`) - Timedelta From 073b187eb222c0e6e4792073982b3e2e04cfa42f Mon Sep 17 00:00:00 2001 From: Steven Rotondo Date: Fri, 16 Sep 2022 17:56:16 -0700 Subject: [PATCH 9/9] BUG: Changed if statement #43784 --- pandas/_libs/tslibs/offsets.pyx | 11 +++++++++++ pandas/core/arrays/datetimes.py | 5 ++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 7be7381bcb4d1..6d6999d11ed81 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -1148,6 +1148,7 @@ cdef class Day(Tick): _td64_unit = "D" _period_dtype_code = PeriodDtypeCode.D _reso = NPY_DATETIMEUNIT.NPY_FR_D + _use_relativedelta = True cdef class Hour(Tick): @@ -1495,6 +1496,7 @@ cdef class BusinessMixin(SingleConstructorOffset): """ Mixin to business types to provide related functions. """ + _use_relativedelta = True cdef readonly: timedelta _offset @@ -2068,6 +2070,7 @@ cdef class WeekOfMonthMixin(SingleConstructorOffset): """ Mixin for methods common to WeekOfMonth and LastWeekOfMonth. """ + _use_relativedelta = True cdef readonly: int weekday, week @@ -2112,6 +2115,7 @@ cdef class YearOffset(SingleConstructorOffset): DateOffset that just needs a month. """ _attributes = tuple(["n", "normalize", "month"]) + _use_relativedelta = True # FIXME(cython#4446): python annotation here gives compile-time errors # _default_month: int @@ -2277,6 +2281,7 @@ cdef class QuarterOffset(SingleConstructorOffset): # FIXME(cython#4446): python annotation here gives compile-time errors # _default_starting_month: int # _from_name_starting_month: int + _use_relativedelta = True cdef readonly: int startingMonth @@ -2448,6 +2453,8 @@ cdef class QuarterBegin(QuarterOffset): # Month-Based Offset Classes cdef class MonthOffset(SingleConstructorOffset): + _use_relativedelta = True + def is_on_offset(self, dt: datetime) -> bool: if self.normalize and not _is_normalized(dt): return False @@ -2548,6 +2555,7 @@ cdef class SemiMonthOffset(SingleConstructorOffset): _default_day_of_month = 15 _min_day_of_month = 2 _attributes = tuple(["n", "normalize", "day_of_month"]) + _use_relativedelta = True cdef readonly: int day_of_month @@ -2750,6 +2758,7 @@ cdef class Week(SingleConstructorOffset): _inc = timedelta(weeks=1) _prefix = "W" _attributes = tuple(["n", "normalize", "weekday"]) + _use_relativedelta = True cdef readonly: object weekday # int or None @@ -3027,6 +3036,7 @@ cdef class LastWeekOfMonth(WeekOfMonthMixin): # Special Offset Classes cdef class FY5253Mixin(SingleConstructorOffset): + _use_relativedelta = True cdef readonly: int startingMonth int weekday @@ -3496,6 +3506,7 @@ cdef class Easter(SingleConstructorOffset): >>> ts + pd.offsets.Easter() Timestamp('2022-04-17 00:00:00') """ + _use_relativedelta = True cpdef __setstate__(self, state): self.n = state.pop("n") diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index f2c5d2a9e4db5..5d6941ade2849 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -41,7 +41,6 @@ tz_convert_from_utc, tzconversion, ) -from pandas._libs.tslibs.offsets import DateOffset from pandas._typing import npt from pandas.errors import ( OutOfBoundsDatetime, @@ -695,7 +694,7 @@ def _add_offset(self, offset) -> DatetimeArray: assert not isinstance(offset, Tick) if self.tz is not None: - if not offset._use_relativedelta and type(offset) == DateOffset: + if not offset._use_relativedelta: values = self.tz_convert("utc").tz_localize(None) else: values = self.tz_localize(None) @@ -720,7 +719,7 @@ def _add_offset(self, offset) -> DatetimeArray: result = DatetimeArray._simple_new(result, dtype=result.dtype) if self.tz is not None: # FIXME: tz_localize with non-nano - if not offset._use_relativedelta and type(offset) == DateOffset: + if not offset._use_relativedelta: result = result.tz_localize("utc").tz_convert(self.tz) else: result = result.tz_localize(self.tz)