From 1b80475f9cecd7842f78100aff63ec656014be4c Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 25 Mar 2022 16:05:38 -0700 Subject: [PATCH] REF: _infer_tsobject_fold to infer_datetuil_fold --- pandas/_libs/tslibs/conversion.pxd | 2 - pandas/_libs/tslibs/conversion.pyx | 69 ++-------------------------- pandas/_libs/tslibs/timestamps.pxd | 2 + pandas/_libs/tslibs/timestamps.pyx | 21 ++++++++- pandas/_libs/tslibs/tzconversion.pxd | 12 ++++- pandas/_libs/tslibs/tzconversion.pyx | 45 ++++++++++++++++++ pandas/_libs/tslibs/vectorized.pyx | 7 +-- 7 files changed, 85 insertions(+), 73 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index 227cf454700d5..206e0171e0a55 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -31,5 +31,3 @@ cdef int64_t get_datetime64_nanos(object val) except? -1 cpdef datetime localize_pydatetime(datetime dt, tzinfo tz) cdef int64_t cast_from_unit(object ts, str unit) except? -1 cpdef (int64_t, int) precision_from_unit(str unit) - -cdef int64_t normalize_i8_stamp(int64_t local_val) nogil diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 132d742b78e9c..f51f25c2065f2 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -72,6 +72,7 @@ from pandas._libs.tslibs.nattype cimport ( ) from pandas._libs.tslibs.tzconversion cimport ( bisect_right_i8, + infer_datetuil_fold, localize_tzinfo_api, tz_localize_to_utc_single, ) @@ -530,7 +531,7 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts, if typ == 'dateutil': tdata = cnp.PyArray_DATA(trans) pos = bisect_right_i8(tdata, obj.value, trans.shape[0]) - 1 - obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos) + obj.fold = infer_datetuil_fold(obj.value, trans, deltas, pos) # Keep the converter same as PyDateTime's dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day, @@ -714,7 +715,7 @@ cdef inline void _localize_tso(_TSObject obj, tzinfo tz): local_val = obj.value + deltas[pos] # dateutil supports fold, so we infer fold from value - obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos) + obj.fold = infer_datetuil_fold(obj.value, trans, deltas, pos) else: # All other cases have len(deltas) == 1. As of 2018-07-17 # (and 2022-03-07), all test cases that get here have @@ -726,49 +727,6 @@ cdef inline void _localize_tso(_TSObject obj, tzinfo tz): obj.tzinfo = tz -cdef inline bint _infer_tsobject_fold( - _TSObject obj, - const int64_t[:] trans, - const int64_t[:] deltas, - intp_t pos, -): - """ - Infer _TSObject fold property from value by assuming 0 and then setting - to 1 if necessary. - - Parameters - ---------- - obj : _TSObject - trans : ndarray[int64_t] - ndarray of offset transition points in nanoseconds since epoch. - deltas : int64_t[:] - array of offsets corresponding to transition points in trans. - pos : intp_t - Position of the last transition point before taking fold into account. - - Returns - ------- - bint - Due to daylight saving time, one wall clock time can occur twice - when shifting from summer to winter time; fold describes whether the - datetime-like corresponds to the first (0) or the second time (1) - the wall clock hits the ambiguous time - - References - ---------- - .. [1] "PEP 495 - Local Time Disambiguation" - https://www.python.org/dev/peps/pep-0495/#the-fold-attribute - """ - cdef: - bint fold = 0 - - if pos > 0: - fold_delta = deltas[pos - 1] - deltas[pos] - if obj.value - fold_delta < trans[pos]: - fold = 1 - - return fold - cdef inline datetime _localize_pydatetime(datetime dt, tzinfo tz): """ Take a datetime/Timestamp in UTC and localizes to timezone tz. @@ -802,24 +760,3 @@ cpdef inline datetime localize_pydatetime(datetime dt, tzinfo tz): elif isinstance(dt, ABCTimestamp): return dt.tz_localize(tz) return _localize_pydatetime(dt, tz) - - -# ---------------------------------------------------------------------- -# Normalization - -@cython.cdivision(False) -cdef inline int64_t normalize_i8_stamp(int64_t local_val) nogil: - """ - Round the localized nanosecond timestamp down to the previous midnight. - - Parameters - ---------- - local_val : int64_t - - Returns - ------- - int64_t - """ - cdef: - int64_t day_nanos = 24 * 3600 * 1_000_000_000 - return local_val - (local_val % day_nanos) diff --git a/pandas/_libs/tslibs/timestamps.pxd b/pandas/_libs/tslibs/timestamps.pxd index 8833a611b0722..9b05fbc5be915 100644 --- a/pandas/_libs/tslibs/timestamps.pxd +++ b/pandas/_libs/tslibs/timestamps.pxd @@ -28,3 +28,5 @@ cdef class _Timestamp(ABCTimestamp): int op) except -1 cpdef void _set_freq(self, freq) cdef _warn_on_field_deprecation(_Timestamp self, freq, str field) + +cdef int64_t normalize_i8_stamp(int64_t local_val) nogil diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 2afceb827e49a..a0958e11e28b3 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -51,7 +51,6 @@ from pandas._libs.tslibs.conversion cimport ( _TSObject, convert_datetime_to_tsobject, convert_to_tsobject, - normalize_i8_stamp, ) from pandas._libs.tslibs.util cimport ( is_array, @@ -2116,3 +2115,23 @@ cdef int64_t _NS_LOWER_BOUND = NPY_NAT + 1 Timestamp.min = Timestamp(_NS_LOWER_BOUND) Timestamp.max = Timestamp(_NS_UPPER_BOUND) Timestamp.resolution = Timedelta(nanoseconds=1) # GH#21336, GH#21365 + + +# ---------------------------------------------------------------------- +# Scalar analogues to functions in vectorized.pyx + + +@cython.cdivision(False) +cdef inline int64_t normalize_i8_stamp(int64_t local_val) nogil: + """ + Round the localized nanosecond timestamp down to the previous midnight. + + Parameters + ---------- + local_val : int64_t + + Returns + ------- + int64_t + """ + return local_val - (local_val % ccalendar.DAY_NANOS) diff --git a/pandas/_libs/tslibs/tzconversion.pxd b/pandas/_libs/tslibs/tzconversion.pxd index 136e62985995e..74aab9f297379 100644 --- a/pandas/_libs/tslibs/tzconversion.pxd +++ b/pandas/_libs/tslibs/tzconversion.pxd @@ -1,5 +1,8 @@ from cpython.datetime cimport tzinfo -from numpy cimport int64_t +from numpy cimport ( + int64_t, + intp_t, +) cdef int64_t localize_tzinfo_api( @@ -11,3 +14,10 @@ cdef int64_t tz_localize_to_utc_single( ) except? -1 cdef Py_ssize_t bisect_right_i8(int64_t *data, int64_t val, Py_ssize_t n) + +cdef bint infer_datetuil_fold( + int64_t value, + const int64_t[::1] trans, + const int64_t[::1] deltas, + intp_t pos, +) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 9190585b2882d..a63a27b8194de 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -632,3 +632,48 @@ cdef int64_t _tz_localize_using_tzinfo_api( td = tz.utcoffset(dt) delta = int(td.total_seconds() * 1_000_000_000) return delta + + +# NB: relies on dateutil internals, subject to change. +cdef bint infer_datetuil_fold( + int64_t value, + const int64_t[::1] trans, + const int64_t[::1] deltas, + intp_t pos, +): + """ + Infer _TSObject fold property from value by assuming 0 and then setting + to 1 if necessary. + + Parameters + ---------- + value : int64_t + trans : ndarray[int64_t] + ndarray of offset transition points in nanoseconds since epoch. + deltas : int64_t[:] + array of offsets corresponding to transition points in trans. + pos : intp_t + Position of the last transition point before taking fold into account. + + Returns + ------- + bint + Due to daylight saving time, one wall clock time can occur twice + when shifting from summer to winter time; fold describes whether the + datetime-like corresponds to the first (0) or the second time (1) + the wall clock hits the ambiguous time + + References + ---------- + .. [1] "PEP 495 - Local Time Disambiguation" + https://www.python.org/dev/peps/pep-0495/#the-fold-attribute + """ + cdef: + bint fold = 0 + + if pos > 0: + fold_delta = deltas[pos - 1] - deltas[pos] + if value - fold_delta < trans[pos]: + fold = 1 + + return fold diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx index 07121396df4a2..a37e348154e22 100644 --- a/pandas/_libs/tslibs/vectorized.pyx +++ b/pandas/_libs/tslibs/vectorized.pyx @@ -18,8 +18,6 @@ from numpy cimport ( cnp.import_array() -from .conversion cimport normalize_i8_stamp - from .dtypes import Resolution from .ccalendar cimport DAY_NANOS @@ -34,7 +32,10 @@ from .np_datetime cimport ( ) from .offsets cimport BaseOffset from .period cimport get_period_ordinal -from .timestamps cimport create_timestamp_from_ts +from .timestamps cimport ( + create_timestamp_from_ts, + normalize_i8_stamp, +) from .timezones cimport ( get_dst_info, is_tzlocal,