From 5a62136680c2bf415a9639a4e51b220407ce4528 Mon Sep 17 00:00:00 2001 From: Diogo Miranda Date: Mon, 1 Apr 2024 22:09:59 +0100 Subject: [PATCH 1/5] BUG: Timestamp.replace now reflects changes onto Timestamp.unit (#57749) --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/_libs/tslibs/timestamps.pyx | 26 +++++++++++++++---- .../scalar/timestamp/methods/test_replace.py | 10 +++++++ 3 files changed, 32 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index c0a2dc7e39f29..3c09213c0b42f 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -346,6 +346,7 @@ Bug fixes - Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`) - Fixed bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`) - Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`) +- Fixed bug in :meth:`Timestamp.replace` where it would not reflect changes into :meth:`Timestamp.unit`. (:issue:`57749`) Categorical ^^^^^^^^^^^ diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index d4cd90613ca5b..4c87132fbeeda 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -2439,10 +2439,12 @@ default 'raise' datetime ts_input tzinfo_type tzobj _TSObject ts + NPY_DATETIMEUNIT rep_reso # set to naive if needed tzobj = self.tzinfo value = self._value + rep_reso = self._creso # GH 37610. Preserve fold when replacing. if fold is None: @@ -2466,40 +2468,54 @@ default 'raise' if year is not None: dts.year = validate("year", year) + rep_reso = NPY_DATETIMEUNIT.NPY_FR_Y if month is not None: dts.month = validate("month", month) + rep_reso = NPY_DATETIMEUNIT.NPY_FR_M if day is not None: dts.day = validate("day", day) + rep_reso = NPY_DATETIMEUNIT.NPY_FR_D if hour is not None: dts.hour = validate("hour", hour) + rep_reso = NPY_DATETIMEUNIT.NPY_FR_h if minute is not None: dts.min = validate("minute", minute) + rep_reso = NPY_DATETIMEUNIT.NPY_FR_m if second is not None: dts.sec = validate("second", second) + rep_reso = NPY_DATETIMEUNIT.NPY_FR_s if microsecond is not None: dts.us = validate("microsecond", microsecond) + if microsecond > 999: + rep_reso = NPY_DATETIMEUNIT.NPY_FR_us + else: + rep_reso = NPY_DATETIMEUNIT.NPY_FR_ms if nanosecond is not None: dts.ps = validate("nanosecond", nanosecond) * 1000 + rep_reso = NPY_DATETIMEUNIT.NPY_FR_ns if tzinfo is not object: tzobj = tzinfo + if rep_reso < self._creso: + rep_reso = self._creso + # reconstruct & check bounds if tzobj is None: # We can avoid going through pydatetime paths, which is robust # to datetimes outside of pydatetime range. ts = _TSObject() try: - ts.value = npy_datetimestruct_to_datetime(self._creso, &dts) + ts.value = npy_datetimestruct_to_datetime(rep_reso, &dts) except OverflowError as err: fmt = dts_to_iso_string(&dts) raise OutOfBoundsDatetime( f"Out of bounds timestamp: {fmt} with frequency '{self.unit}'" ) from err ts.dts = dts - ts.creso = self._creso + ts.creso = rep_reso ts.fold = fold return create_timestamp_from_ts( - ts.value, dts, tzobj, fold, reso=self._creso + ts.value, dts, tzobj, fold, reso=rep_reso ) elif tzobj is not None and treat_tz_as_pytz(tzobj): @@ -2518,10 +2534,10 @@ default 'raise' ts_input = datetime(**kwargs) ts = convert_datetime_to_tsobject( - ts_input, tzobj, nanos=dts.ps // 1000, reso=self._creso + ts_input, tzobj, nanos=dts.ps // 1000, reso=rep_reso ) return create_timestamp_from_ts( - ts.value, dts, tzobj, fold, reso=self._creso + ts.value, dts, tzobj, fold, reso=rep_reso ) def to_julian_date(self) -> np.float64: diff --git a/pandas/tests/scalar/timestamp/methods/test_replace.py b/pandas/tests/scalar/timestamp/methods/test_replace.py index d67de79a8dd10..70ab1a7580bda 100644 --- a/pandas/tests/scalar/timestamp/methods/test_replace.py +++ b/pandas/tests/scalar/timestamp/methods/test_replace.py @@ -189,3 +189,13 @@ def test_replace_preserves_fold(self, fold): ts_replaced = ts.replace(second=1) assert ts_replaced.fold == fold + + def test_replace_unit(self): + # GH#57749 + ts = Timestamp("2023-07-15 23:08:12") + ts1 = Timestamp("2023-07-15 23:08:12.134567") + ts2 = Timestamp("2023-07-15 23:08:12.134567123") + ts = ts.replace(microsecond=ts1.microsecond) + assert ts == ts1 + ts = ts.replace(nanosecond=ts2.nanosecond) + assert ts == ts2 From 0d6efa093ae0e5c5cbfa8bf5374ac17b1358ebeb Mon Sep 17 00:00:00 2001 From: Diogo Miranda Date: Mon, 8 Apr 2024 18:07:42 +0100 Subject: [PATCH 2/5] TST: test_replace.py now explicitly check the Timestamp.unit --- pandas/tests/scalar/timestamp/methods/test_replace.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/scalar/timestamp/methods/test_replace.py b/pandas/tests/scalar/timestamp/methods/test_replace.py index 70ab1a7580bda..62f9ecc9ccf2c 100644 --- a/pandas/tests/scalar/timestamp/methods/test_replace.py +++ b/pandas/tests/scalar/timestamp/methods/test_replace.py @@ -195,7 +195,11 @@ def test_replace_unit(self): ts = Timestamp("2023-07-15 23:08:12") ts1 = Timestamp("2023-07-15 23:08:12.134567") ts2 = Timestamp("2023-07-15 23:08:12.134567123") + ts = ts.replace(microsecond=999) + assert ts.unit == "ms" ts = ts.replace(microsecond=ts1.microsecond) + assert ts.unit == "us" assert ts == ts1 ts = ts.replace(nanosecond=ts2.nanosecond) + assert ts.unit == "ns" assert ts == ts2 From 13fdc2a3f16b093a1c683e9b68d11a239b0b2ca5 Mon Sep 17 00:00:00 2001 From: Diogo Miranda Date: Wed, 19 Jun 2024 15:20:32 +0100 Subject: [PATCH 3/5] DOC: Change whatsnew3.0.0 to only have this bugfix --- doc/source/whatsnew/v3.0.0.rst | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 43382d72e6476..bdde8de83d98d 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -361,17 +361,6 @@ Performance improvements Bug fixes ~~~~~~~~~ -- Fixed bug in :class:`SparseDtype` for equal comparison with na fill value. (:issue:`54770`) -- Fixed bug in :meth:`.DataFrameGroupBy.median` where nat values gave an incorrect result. (:issue:`57926`) -- Fixed bug in :meth:`DataFrame.cumsum` which was raising ``IndexError`` if dtype is ``timedelta64[ns]`` (:issue:`57956`) -- Fixed bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`) -- Fixed bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`) -- Fixed bug in :meth:`DataFrame.transform` that was returning the wrong order unless the index was monotonically increasing. (:issue:`57069`) -- Fixed bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`) -- Fixed bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`) -- Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`) -- Fixed bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`) -- Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`) - Fixed bug in :meth:`Timestamp.replace` where it would not reflect changes into :meth:`Timestamp.unit`. (:issue:`57749`) Categorical From f2ca887d15db03fe19bfc98deca3b3ddf62bd899 Mon Sep 17 00:00:00 2001 From: Diogo Miranda Date: Sat, 22 Jun 2024 15:25:28 +0100 Subject: [PATCH 4/5] BUG: Fix bug where when replaced with 0 it Timestamp.replace didn't recalculate precision --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/_libs/tslibs/timestamps.pyx | 23 ++++++++++++++++++- .../scalar/timestamp/methods/test_replace.py | 16 +++++++++++++ 3 files changed, 39 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 626727a64fea7..1b7d666859491 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -473,7 +473,6 @@ Performance improvements Bug fixes ~~~~~~~~~ -- Fixed bug in :meth:`Timestamp.replace` where it would not reflect changes into :meth:`Timestamp.unit`. (:issue:`57749`) Categorical ^^^^^^^^^^^ @@ -490,6 +489,7 @@ Datetimelike - Bug in :meth:`Dataframe.agg` with df with missing values resulting in IndexError (:issue:`58810`) - Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` does not raise on Custom business days frequencies bigger then "1C" (:issue:`58664`) - Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` returning ``False`` on double-digit frequencies (:issue:`58523`) +- Bug in :meth:`Timestamp.replace` where it would not reflect changes into :meth:`Timestamp.unit`. (:issue:`57749`) - Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`) Timedelta diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 9130c80c66e81..7dad2fd5e4f4b 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -2633,6 +2633,8 @@ default 'raise' pandas_datetime_to_datetimestruct(value, self._creso, &dts) dts.ps = self.nanosecond * 1000 + zero_set = False + # replace def validate(k, v): """ validate integers """ @@ -2666,13 +2668,32 @@ default 'raise' rep_reso = NPY_DATETIMEUNIT.NPY_FR_us else: rep_reso = NPY_DATETIMEUNIT.NPY_FR_ms + if microsecond == 0: + zero_set = True if nanosecond is not None: dts.ps = validate("nanosecond", nanosecond) * 1000 rep_reso = NPY_DATETIMEUNIT.NPY_FR_ns + if nanosecond == 0: + zero_set = True if tzinfo is not object: tzobj = tzinfo - if rep_reso < self._creso: + # Recalculate the replacement resolution if a unit was replaced with 0 + if zero_set: + if dts.ps != 0: + if dts.ps % 1000 != 0: + rep_reso = NPY_DATETIMEUNIT.NPY_FR_ps + else: + rep_reso = NPY_DATETIMEUNIT.NPY_FR_ns + elif dts.us != 0: + if dts.us % 1000 != 0: + rep_reso = NPY_DATETIMEUNIT.NPY_FR_us + else: + rep_reso = NPY_DATETIMEUNIT.NPY_FR_ms + else: + rep_reso = NPY_DATETIMEUNIT.NPY_FR_s + + if rep_reso < self._creso and not zero_set: rep_reso = self._creso # reconstruct & check bounds diff --git a/pandas/tests/scalar/timestamp/methods/test_replace.py b/pandas/tests/scalar/timestamp/methods/test_replace.py index 5d511947ffdbf..26daf22be95d8 100644 --- a/pandas/tests/scalar/timestamp/methods/test_replace.py +++ b/pandas/tests/scalar/timestamp/methods/test_replace.py @@ -206,3 +206,19 @@ def test_replace_unit(self): ts = ts.replace(nanosecond=ts2.nanosecond) assert ts.unit == "ns" assert ts == ts2 + + def test_replace_resets_to_more_precise_s(self): + # GH#57749 + ts = Timestamp(year=2023, month=1, day=1, nanosecond=5) + result = ts.replace(nanosecond=0) + assert result.unit == "s" + + def test_replace_resets_to_more_precise_ms(self): + ts = Timestamp(year=2020, month=1, day=1, microsecond=5, nanosecond=5) + result = ts.replace(nanosecond=0) + assert result.unit == "us" + + def test_replace_resets_to_more_precise_us(self): + ts = Timestamp(year=2020, month=1, day=1, microsecond=2000, nanosecond=5) + result = ts.replace(nanosecond=0) + assert result.unit == "ms" From 636e018cd0b76e1cc57871e66c95553bca52c3f0 Mon Sep 17 00:00:00 2001 From: Diogo Miranda Date: Thu, 27 Jun 2024 01:23:53 +0100 Subject: [PATCH 5/5] DOC: Change whatsnew 3.0.0 to be in alphabetical order --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 7a3dc3240ca6d..1efb6b414a8b8 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -498,8 +498,8 @@ Datetimelike - Bug in :meth:`Dataframe.agg` with df with missing values resulting in IndexError (:issue:`58810`) - Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` does not raise on Custom business days frequencies bigger then "1C" (:issue:`58664`) - Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` returning ``False`` on double-digit frequencies (:issue:`58523`) -- Bug in :meth:`Timestamp.replace` where it would not reflect changes into :meth:`Timestamp.unit`. (:issue:`57749`) - Bug in :meth:`DatetimeIndex.union` when ``unit`` was non-nanosecond (:issue:`59036`) +- Bug in :meth:`Timestamp.replace` where it would not reflect changes into :meth:`Timestamp.unit`. (:issue:`57749`) - Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`) Timedelta