From edb505e37050b7265b0c487e566dc257b56f109e Mon Sep 17 00:00:00 2001 From: RajatS Mukherjee Date: Wed, 14 Jun 2023 15:46:27 +0000 Subject: [PATCH 1/9] BUG: Fixed inconsistent multiplication #47953 --- doc/source/whatsnew/v2.0.3.rst | 4 ++ pandas/_libs/tslibs/offsets.pyx | 13 +++--- pandas/tests/tseries/offsets/test_offsets.py | 42 ++++++++++++++++++++ 3 files changed, 52 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v2.0.3.rst b/doc/source/whatsnew/v2.0.3.rst index 3da469c2e1fe6..6592c210f362e 100644 --- a/doc/source/whatsnew/v2.0.3.rst +++ b/doc/source/whatsnew/v2.0.3.rst @@ -21,7 +21,11 @@ Fixed regressions Bug fixes ~~~~~~~~~ +<<<<<<< Updated upstream - Bug in :func:`RangeIndex.union` when using ``sort=True`` with another :class:`RangeIndex` (:issue:`53490`) +======= +- Bug in :class:`RelativeDeltaOffset` which caused inconsistent behavior upon multiplying a :class:`DateOffset` (:issue:`47953`) +>>>>>>> Stashed changes - Bug in :func:`read_csv` when defining ``dtype`` with ``bool[pyarrow]`` for the ``"c"`` and ``"python"`` engines (:issue:`53390`) - Bug in :meth:`Series.str.split` and :meth:`Series.str.rsplit` with ``expand=True`` for :class:`ArrowDtype` with ``pyarrow.string`` (:issue:`53532`) - diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 0248214d4e0a8..107b02d59295d 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -1221,15 +1221,14 @@ cdef class RelativeDeltaOffset(BaseOffset): # perform calculation in UTC other = other.replace(tzinfo=None) - if self.n > 0: - for i in range(self.n): - other = other + self._offset + if hasattr(self, "nanoseconds"): + td_nano = Timedelta(nanoseconds=self.nanoseconds) + other = self.n * td_nano + other else: - for i in range(-self.n): - other = other - self._offset + td_nano = Timedelta(0) + + other = other + ((self.offset + td_nano) * self.n) - if hasattr(self, "nanoseconds"): - other = self.n * Timedelta(nanoseconds=self.nanoseconds) + other if other_nanos != 0: other = Timedelta(nanoseconds=other_nanos) + other diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index bfc5139c78b91..78f59fda9cfeb 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -32,6 +32,7 @@ from pandas.errors import PerformanceWarning from pandas import ( + DataFrame, DatetimeIndex, Series, date_range, @@ -1075,3 +1076,44 @@ def test_dateoffset_add_sub_timestamp_series_with_nano(offset, expected): assert testseries[0] == teststamp testseries = offset + testseries assert testseries[0] == expected + + +@pytest.mark.parametrize( + "month_value, scaling_factor, start_timestamp, expected_timestamp", + [(1, 2, "2020-01-30", "2020-03-30"), (2, 1, "2020-01-30", "2020-03-30")], +) +def test_offset_multiplication( + month_value, scaling_factor, start_timestamp, expected_timestamp +): + # GH 47953 + mo1 = DateOffset(months=month_value) + + startscalar = Timestamp(start_timestamp) + startarray = Series([startscalar]) + + resultscalar = startscalar + (mo1 * scaling_factor) + resultarray = startarray + (mo1 * scaling_factor) + + expectedscalar = Timestamp(expected_timestamp) + expectedarray = Series([expectedscalar]) + assert resultscalar == expectedscalar + + tm.assert_series_equal(resultarray, expectedarray) + + +def test_dateoffset_operations_on_dataframes(): + # GH 47953 + df = DataFrame({"T": [Timestamp("2019-04-30")], "D": [DateOffset(months=1)]}) + frameresult1 = df["T"] + 26 * df["D"] + df2 = DataFrame( + { + "T": [Timestamp("2019-04-30"), Timestamp("2019-04-30")], + "D": [DateOffset(months=1), DateOffset(months=1)], + } + ) + expecteddate = Timestamp("2021-06-30") + with tm.assert_produces_warning(PerformanceWarning): + frameresult2 = df2["T"] + 26 * df2["D"] + + assert frameresult1[0] == expecteddate + assert frameresult2[0] == expecteddate From 4161627a0d8953f72506b621730a7619ec777c85 Mon Sep 17 00:00:00 2001 From: RajatS Mukherjee Date: Thu, 15 Jun 2023 06:35:27 +0000 Subject: [PATCH 2/9] Fixed release note --- doc/source/whatsnew/v2.0.3.rst | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v2.0.3.rst b/doc/source/whatsnew/v2.0.3.rst index 6592c210f362e..35be69447bb2b 100644 --- a/doc/source/whatsnew/v2.0.3.rst +++ b/doc/source/whatsnew/v2.0.3.rst @@ -21,11 +21,8 @@ Fixed regressions Bug fixes ~~~~~~~~~ -<<<<<<< Updated upstream -- Bug in :func:`RangeIndex.union` when using ``sort=True`` with another :class:`RangeIndex` (:issue:`53490`) -======= - Bug in :class:`RelativeDeltaOffset` which caused inconsistent behavior upon multiplying a :class:`DateOffset` (:issue:`47953`) ->>>>>>> Stashed changes +- Bug in :func:`RangeIndex.union` when using ``sort=True`` with another :class:`RangeIndex` (:issue:`53490`) - Bug in :func:`read_csv` when defining ``dtype`` with ``bool[pyarrow]`` for the ``"c"`` and ``"python"`` engines (:issue:`53390`) - Bug in :meth:`Series.str.split` and :meth:`Series.str.rsplit` with ``expand=True`` for :class:`ArrowDtype` with ``pyarrow.string`` (:issue:`53532`) - From 79258ceb327f6334b43b03992a09606156ac36f8 Mon Sep 17 00:00:00 2001 From: RajatS Mukherjee Date: Thu, 15 Jun 2023 07:36:40 +0000 Subject: [PATCH 3/9] Fixed attribute name --- pandas/_libs/tslibs/offsets.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 107b02d59295d..7a9c5fe1977ff 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -1227,7 +1227,7 @@ cdef class RelativeDeltaOffset(BaseOffset): else: td_nano = Timedelta(0) - other = other + ((self.offset + td_nano) * self.n) + other = other + ((self._offset + td_nano) * self.n) if other_nanos != 0: other = Timedelta(nanoseconds=other_nanos) + other From 78479a7574f95f4e8be8b10f37adebce99fa94b6 Mon Sep 17 00:00:00 2001 From: RajatS Mukherjee Date: Thu, 15 Jun 2023 14:49:45 +0000 Subject: [PATCH 4/9] Changed offset apply logic --- pandas/_libs/tslibs/offsets.pyx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 7a9c5fe1977ff..6cd9d0aef75bc 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -1223,9 +1223,8 @@ cdef class RelativeDeltaOffset(BaseOffset): if hasattr(self, "nanoseconds"): td_nano = Timedelta(nanoseconds=self.nanoseconds) - other = self.n * td_nano + other else: - td_nano = Timedelta(0) + td_nano = Timedelta(other_nanos) other = other + ((self._offset + td_nano) * self.n) From 8b70342a732547d98574d0b984aaaf302941e826 Mon Sep 17 00:00:00 2001 From: RajatS Mukherjee Date: Thu, 15 Jun 2023 16:37:20 +0000 Subject: [PATCH 5/9] Addressed #46877 re-occurence --- pandas/_libs/tslibs/offsets.pyx | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 6cd9d0aef75bc..f81e969b14c9c 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -1221,12 +1221,12 @@ cdef class RelativeDeltaOffset(BaseOffset): # perform calculation in UTC other = other.replace(tzinfo=None) - if hasattr(self, "nanoseconds"): - td_nano = Timedelta(nanoseconds=self.nanoseconds) - else: - td_nano = Timedelta(other_nanos) + other = other + (self._offset * self.n) - other = other + ((self._offset + td_nano) * self.n) + if hasattr(self, "nanoseconds"): + other = self.n * Timedelta(nanoseconds=self.nanoseconds) + other + if other_nanos != 0: + other = Timedelta(nanoseconds=other_nanos) + other if other_nanos != 0: other = Timedelta(nanoseconds=other_nanos) + other From 5c91caaf379a97de20d48bfdc59e5df0d8b08bb1 Mon Sep 17 00:00:00 2001 From: RajatS Mukherjee Date: Thu, 15 Jun 2023 17:23:44 +0000 Subject: [PATCH 6/9] Removed dulicate code --- pandas/_libs/tslibs/offsets.pyx | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index f81e969b14c9c..84b102bd4a262 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -1228,9 +1228,6 @@ cdef class RelativeDeltaOffset(BaseOffset): if other_nanos != 0: other = Timedelta(nanoseconds=other_nanos) + other - if other_nanos != 0: - other = Timedelta(nanoseconds=other_nanos) + other - if tzinfo is not None and self._use_relativedelta: # bring tz back from UTC calculation other = localize_pydatetime(other, tzinfo) From 62c1b070b44c68b8eb3a9c10bc05f8c34a9425fd Mon Sep 17 00:00:00 2001 From: RajatS Mukherjee Date: Thu, 15 Jun 2023 21:13:22 +0000 Subject: [PATCH 7/9] Addressed comments --- doc/source/whatsnew/v2.0.3.rst | 2 +- pandas/tests/tseries/offsets/test_offsets.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v2.0.3.rst b/doc/source/whatsnew/v2.0.3.rst index 35be69447bb2b..80975b30e498d 100644 --- a/doc/source/whatsnew/v2.0.3.rst +++ b/doc/source/whatsnew/v2.0.3.rst @@ -21,7 +21,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- Bug in :class:`RelativeDeltaOffset` which caused inconsistent behavior upon multiplying a :class:`DateOffset` (:issue:`47953`) +- Bug in :class:`DateOffset` which had inconsistent behavior when multiplying a :class:`DateOffset` object by a constant (:issue:`47953`) - Bug in :func:`RangeIndex.union` when using ``sort=True`` with another :class:`RangeIndex` (:issue:`53490`) - Bug in :func:`read_csv` when defining ``dtype`` with ``bool[pyarrow]`` for the ``"c"`` and ``"python"`` engines (:issue:`53390`) - Bug in :meth:`Series.str.split` and :meth:`Series.str.rsplit` with ``expand=True`` for :class:`ArrowDtype` with ``pyarrow.string`` (:issue:`53532`) diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index 78f59fda9cfeb..5827ee4f394d6 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -1079,14 +1079,14 @@ def test_dateoffset_add_sub_timestamp_series_with_nano(offset, expected): @pytest.mark.parametrize( - "month_value, scaling_factor, start_timestamp, expected_timestamp", + "n_months, scaling_factor, start_timestamp, expected_timestamp", [(1, 2, "2020-01-30", "2020-03-30"), (2, 1, "2020-01-30", "2020-03-30")], ) def test_offset_multiplication( - month_value, scaling_factor, start_timestamp, expected_timestamp + n_months, scaling_factor, start_timestamp, expected_timestamp ): # GH 47953 - mo1 = DateOffset(months=month_value) + mo1 = DateOffset(months=n_months) startscalar = Timestamp(start_timestamp) startarray = Series([startscalar]) From dcb424425ceab339153b38c47eac7c16c571c2e0 Mon Sep 17 00:00:00 2001 From: RajatS Mukherjee Date: Fri, 16 Jun 2023 07:43:19 +0000 Subject: [PATCH 8/9] Added unit test cases --- pandas/tests/tseries/offsets/test_offsets.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index 5827ee4f394d6..15019204fab78 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -1080,7 +1080,14 @@ def test_dateoffset_add_sub_timestamp_series_with_nano(offset, expected): @pytest.mark.parametrize( "n_months, scaling_factor, start_timestamp, expected_timestamp", - [(1, 2, "2020-01-30", "2020-03-30"), (2, 1, "2020-01-30", "2020-03-30")], + [ + (1, 2, "2020-01-30", "2020-03-30"), + (2, 1, "2020-01-30", "2020-03-30"), + (1, 0, "2020-01-30", "2020-01-30"), + (2, 0, "2020-01-30", "2020-01-30"), + (1, -1, "2020-01-30", "2019-12-30"), + (2, -1, "2020-01-30", "2019-11-30"), + ], ) def test_offset_multiplication( n_months, scaling_factor, start_timestamp, expected_timestamp From 9ec067697085471c1d4d398f135b8af444888777 Mon Sep 17 00:00:00 2001 From: RajatS Mukherjee Date: Wed, 21 Jun 2023 16:14:52 +0000 Subject: [PATCH 9/9] Added mistakenly removed comment --- doc/source/whatsnew/v2.0.3.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.0.3.rst b/doc/source/whatsnew/v2.0.3.rst index 6eff51755e8ed..3e12af946e661 100644 --- a/doc/source/whatsnew/v2.0.3.rst +++ b/doc/source/whatsnew/v2.0.3.rst @@ -22,6 +22,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ +- Bug in :func:`DataFrame.convert_dtype` and :func:`Series.convert_dtype` when trying to convert :class:`ArrowDtype` with ``dtype_backend="nullable_numpy"`` (:issue:`53648`) - Bug in :func:`RangeIndex.union` when using ``sort=True`` with another :class:`RangeIndex` (:issue:`53490`) - Bug in :func:`Series.reindex` when expanding a non-nanosecond datetime or timedelta :class:`Series` would not fill with ``NaT`` correctly (:issue:`53497`) - Bug in :func:`read_csv` when defining ``dtype`` with ``bool[pyarrow]`` for the ``"c"`` and ``"python"`` engines (:issue:`53390`)