From 60cf071bebceb17cf7c59ca80cbaf1719e367814 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 31 Jan 2020 17:34:01 -0800 Subject: [PATCH 1/6] Handle time-part of datetime in int64 nanoseconds rather than float seconds --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/_libs/tslibs/period.pyx | 104 ++++++++-------------- pandas/tests/scalar/period/test_asfreq.py | 21 ++++- 3 files changed, 58 insertions(+), 68 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index e07a8fa0469f4..7db306d7ff82b 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -105,6 +105,7 @@ Datetimelike - Bug in :class:`Timestamp` where constructing :class:`Timestamp` from ambiguous epoch time and calling constructor again changed :meth:`Timestamp.value` property (:issue:`24329`) - :meth:`DatetimeArray.searchsorted`, :meth:`TimedeltaArray.searchsorted`, :meth:`PeriodArray.searchsorted` not recognizing non-pandas scalars and incorrectly raising ``ValueError`` instead of ``TypeError`` (:issue:`30950`) - Bug in :class:`Timestamp` where constructing :class:`Timestamp` with dateutil timezone less than 128 nanoseconds before daylight saving time switch from winter to summer would result in nonexistent time (:issue:`31043`) +- Bug in :meth:`Period.to_timestamp`, :meth:`Period.start_time` with microsecond frequency returning a timestamp one nanosecond earlier than the correct time (:issue:`31475`) Timedelta ^^^^^^^^^ diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 3dd560ece188d..9ab2fd0b6521d 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -473,9 +473,6 @@ cdef int DtoQ_yq(int64_t ordinal, asfreq_info *af_info, int *year) nogil: int quarter pandas_datetime_to_datetimestruct(ordinal, NPY_FR_D, &dts) - # TODO: Another version of this function used - # date_info_from_days_and_time(&dts, unix_date, 0) - # instead of pandas_datetime_to_datetimestruct; is one more performant? if af_info.to_end != 12: dts.month -= af_info.to_end if dts.month <= 0: @@ -816,24 +813,22 @@ cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) nogil: # raise ValueError -cdef void get_date_info(int64_t ordinal, int freq, - npy_datetimestruct *dts) nogil: +cdef void get_date_info(int64_t ordinal, int freq, npy_datetimestruct *dts) nogil: cdef: - int64_t unix_date - double abstime + int64_t unix_date, nanos + npy_datetimestruct dts2 unix_date = get_unix_date(ordinal, freq) - abstime = get_abs_time(freq, unix_date, ordinal) - - while abstime < 0: - abstime += 86400 - unix_date -= 1 + nanos = get_time_nanos(freq, unix_date, ordinal) - while abstime >= 86400: - abstime -= 86400 - unix_date += 1 + pandas_datetime_to_datetimestruct(unix_date, NPY_FR_D, dts) - date_info_from_days_and_time(dts, unix_date, abstime) + dt64_to_dtstruct(nanos, &dts2) + dts.hour = dts2.hour + dts.min = dts2.min + dts.sec = dts2.sec + dts.us = dts2.us + dts.ps = dts2.ps cdef int64_t get_unix_date(int64_t period_ordinal, int freq) nogil: @@ -865,74 +860,49 @@ cdef int64_t get_unix_date(int64_t period_ordinal, int freq) nogil: @cython.cdivision -cdef void date_info_from_days_and_time(npy_datetimestruct *dts, - int64_t unix_date, - double abstime) nogil: +cdef int64_t get_time_nanos(int freq, int64_t unix_date, int64_t ordinal) nogil: """ - Set the instance's value using the given date and time. + Find the number of nanoseconds after midnight on the given unix_date + that the ordinal represents in the given frequency. Parameters ---------- - dts : npy_datetimestruct* + freq : int unix_date : int64_t - days elapsed since datetime(1970, 1, 1) - abstime : double - seconds elapsed since beginning of day described by unix_date + ordinal : int64_t - Notes - ----- - Updates dts inplace + Returns + ------- + int64_t """ cdef: - int inttime - int hour, minute - double second, subsecond_fraction + int64_t sub, factor - # Bounds check - # The calling function is responsible for ensuring that - # abstime >= 0.0 and abstime <= 86400 + freq = get_freq_group(freq) - # Calculate the date - pandas_datetime_to_datetimestruct(unix_date, NPY_FR_D, dts) - - # Calculate the time - inttime = abstime - hour = inttime / 3600 - minute = (inttime % 3600) / 60 - second = abstime - (hour * 3600 + minute * 60) - - dts.hour = hour - dts.min = minute - dts.sec = second - - subsecond_fraction = second - dts.sec - dts.us = int((subsecond_fraction) * 1e6) - dts.ps = int(((subsecond_fraction) * 1e6 - dts.us) * 1e6) + if freq <= FR_DAY: + return 0 + if freq == FR_NS: + factor = 1 -@cython.cdivision -cdef double get_abs_time(int freq, int64_t unix_date, int64_t ordinal) nogil: - cdef: - int freq_index, day_index, base_index - int64_t per_day, start_ord - double unit, result + if freq == FR_US: + factor = 10**3 - if freq <= FR_DAY: - return 0 + if freq == FR_MS: + factor = 10**6 - freq_index = freq // 1000 - day_index = FR_DAY // 1000 - base_index = FR_SEC // 1000 + if freq == FR_SEC: + factor = 10 **9 - per_day = get_daytime_conversion_factor(day_index, freq_index) - unit = get_daytime_conversion_factor(freq_index, base_index) + if freq == FR_MIN: + factor = 10**9 * 60 - if base_index < freq_index: - unit = 1 / unit + if freq == FR_HR: + factor = 10**9 * 3600 - start_ord = unix_date * per_day - result = (unit * (ordinal - start_ord)) - return result + sub = ordinal - unix_date * 24 * 3600 * 10**9 / factor + return sub * factor cdef int get_yq(int64_t ordinal, int freq, int *quarter, int *year): diff --git a/pandas/tests/scalar/period/test_asfreq.py b/pandas/tests/scalar/period/test_asfreq.py index 357274e724c68..2ee83a9ff1596 100644 --- a/pandas/tests/scalar/period/test_asfreq.py +++ b/pandas/tests/scalar/period/test_asfreq.py @@ -3,7 +3,7 @@ from pandas._libs.tslibs.frequencies import INVALID_FREQ_ERR_MSG, _period_code_map from pandas.errors import OutOfBoundsDatetime -from pandas import Period, offsets +from pandas import Period, Timestamp, offsets class TestFreqConversion: @@ -656,6 +656,25 @@ def test_conv_secondly(self): assert ival_S.asfreq("S") == ival_S + def test_conv_microsecond(self): + # GH#31475 Avoid floating point errors dropping the start_time to + # before the beginning of the Period + per = Period("2020-01-30 15:57:27.576166", freq="U") + assert per.ordinal == 1580399847576166 + + start = per.start_time + expected = Timestamp("2020-01-30 15:57:27.576166") + assert start == expected + assert start.value == per.ordinal * 1000 + + def test_conv_microsecond_out_of_bounds(self): + # GH#31475 + per2 = Period("2300-01-01", "us") + with pytest.raises(OutOfBoundsDatetime, match="2300-01-01"): + per2.start_time + with pytest.raises(OutOfBoundsDatetime, match="2300-01-01"): + per2.end_time + def test_asfreq_mult(self): # normal freq to mult freq p = Period(freq="A", year=2007) From b5d563bf41def6685d70da4d36f522f09702ac41 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 9 Feb 2020 16:49:02 -0800 Subject: [PATCH 2/6] remove workaround --- pandas/_libs/tslibs/period.pyx | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 2373d39386020..abfdbf6e79943 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1142,11 +1142,7 @@ cdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) except? -1: if ordinal == NPY_NAT: return NPY_NAT - if freq == 11000: - # Microsecond, avoid get_date_info to prevent floating point errors - pandas_datetime_to_datetimestruct(ordinal, NPY_FR_us, &dts) - else: - get_date_info(ordinal, freq, &dts) + get_date_info(ordinal, freq, &dts) check_dts_bounds(&dts) return dtstruct_to_dt64(&dts) From 50480c3546d042f2e51bc881151a60dfc7f7ff51 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 12 Apr 2020 13:30:12 -0700 Subject: [PATCH 3/6] if -> elif --- pandas/_libs/tslibs/period.pyx | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 977a3305541e2..e667dad874074 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -876,22 +876,22 @@ cdef int64_t get_time_nanos(int freq, int64_t unix_date, int64_t ordinal) nogil: if freq <= FR_DAY: return 0 - if freq == FR_NS: + elif freq == FR_NS: factor = 1 - if freq == FR_US: + elif freq == FR_US: factor = 10**3 - if freq == FR_MS: + elif freq == FR_MS: factor = 10**6 - if freq == FR_SEC: + elif freq == FR_SEC: factor = 10 **9 - if freq == FR_MIN: + elif freq == FR_MIN: factor = 10**9 * 60 - if freq == FR_HR: + elif freq == FR_HR: factor = 10**9 * 3600 sub = ordinal - unix_date * 24 * 3600 * 10**9 / factor From b911eb28b4de0c5b2a0da1ab8ea96e2c12a23f57 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 13 Apr 2020 19:56:24 -0700 Subject: [PATCH 4/6] dummy commit to force CI From a0d475ffba8b3f62fc5eba9aa3af1096543bcab4 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 14 Apr 2020 07:05:28 -0700 Subject: [PATCH 5/6] Dummy commit to force CI From e82a8c6d555e3505c3463dc60a78e3e29acf7654 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 14 Apr 2020 07:39:37 -0700 Subject: [PATCH 6/6] Fix compiler warning --- pandas/_libs/tslibs/period.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index e667dad874074..c4a7df0017619 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -891,7 +891,8 @@ cdef int64_t get_time_nanos(int freq, int64_t unix_date, int64_t ordinal) nogil: elif freq == FR_MIN: factor = 10**9 * 60 - elif freq == FR_HR: + else: + # We must have freq == FR_HR factor = 10**9 * 3600 sub = ordinal - unix_date * 24 * 3600 * 10**9 / factor