From 539826231868e20f88f1fbfe93fc5f9c5b363974 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 7 Mar 2022 16:48:16 -0800 Subject: [PATCH 1/3] REF: use standard patterns in tslibs --- pandas/_libs/tslibs/conversion.pxd | 4 +- pandas/_libs/tslibs/conversion.pyx | 40 +++++++--------- pandas/_libs/tslibs/tzconversion.pyx | 72 ++++++++++++++-------------- pandas/_libs/tslibs/vectorized.pyx | 45 ++++++++--------- 4 files changed, 76 insertions(+), 85 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index 5b80193c1f27a..227cf454700d5 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -12,10 +12,10 @@ from pandas._libs.tslibs.np_datetime cimport npy_datetimestruct cdef class _TSObject: - cdef: + cdef readonly: npy_datetimestruct dts # npy_datetimestruct int64_t value # numpy dt64 - object tzinfo + tzinfo tzinfo bint fold diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index e30a91ae3e10a..7ea3c85fe9ddb 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -370,18 +370,13 @@ cdef class _TSObject: # cdef: # npy_datetimestruct dts # npy_datetimestruct # int64_t value # numpy dt64 - # object tzinfo + # tzinfo tzinfo # bint fold def __cinit__(self): # GH 25057. As per PEP 495, set fold to 0 by default self.fold = 0 - @property - def value(self): - # This is needed in order for `value` to be accessible in lib.pyx - return self.value - cdef convert_to_tsobject(object ts, tzinfo tz, str unit, bint dayfirst, bint yearfirst, int32_t nanos=0): @@ -541,7 +536,7 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts, int64_t value # numpy dt64 datetime dt ndarray[int64_t] trans - int64_t[:] deltas + int64_t[::1] deltas value = dtstruct_to_dt64(&dts) obj.dts = dts @@ -711,7 +706,7 @@ cdef inline void _localize_tso(_TSObject obj, tzinfo tz): """ cdef: ndarray[int64_t] trans - int64_t[:] deltas + int64_t[::1] deltas int64_t local_val Py_ssize_t pos str typ @@ -729,26 +724,27 @@ cdef inline void _localize_tso(_TSObject obj, tzinfo tz): # Adjust datetime64 timestamp, recompute datetimestruct trans, deltas, typ = get_dst_info(tz) - if is_fixed_offset(tz): - # static/fixed tzinfo; in this case we know len(deltas) == 1 - # This can come back with `typ` of either "fixed" or None - dt64_to_dtstruct(obj.value + deltas[0], &obj.dts) - elif typ == 'pytz': + if typ == "pytz": # i.e. treat_tz_as_pytz(tz) - pos = trans.searchsorted(obj.value, side='right') - 1 + pos = trans.searchsorted(obj.value, side="right") - 1 + local_val = obj.value + deltas[pos] + + # find right representation of dst etc in pytz timezone tz = tz._tzinfos[tz._transition_info[pos]] - dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) - elif typ == 'dateutil': + elif typ == "dateutil": # i.e. treat_tz_as_dateutil(tz) - pos = trans.searchsorted(obj.value, side='right') - 1 - dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) + pos = trans.searchsorted(obj.value, side="right") - 1 + local_val = obj.value + deltas[pos] + # dateutil supports fold, so we infer fold from value obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos) else: - # Note: as of 2018-07-17 all tzinfo objects that are _not_ - # either pytz or dateutil have is_fixed_offset(tz) == True, - # so this branch will never be reached. - pass + # All other cases have len(deltas) == 1. As of 2018-07-17 + # (and 2022-03-07), all test cases that get here have + # is_fixed_offset(tz). + local_val = obj.value + deltas[0] + + dt64_to_dtstruct(local_val, &obj.dts) obj.tzinfo = tz diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 4dbfabad5dc84..8ef52eeab706e 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -52,7 +52,7 @@ cdef int64_t tz_localize_to_utc_single( """See tz_localize_to_utc.__doc__""" cdef: int64_t delta - int64_t[:] deltas + int64_t[::1] deltas if val == NPY_NAT: return val @@ -115,9 +115,10 @@ timedelta-like} localized : ndarray[int64_t] """ cdef: - int64_t[:] deltas, idx_shifted, idx_shifted_left, idx_shifted_right + int64_t[::1] deltas + int64_t[:] idx_shifted, idx_shifted_left, idx_shifted_right ndarray[uint8_t, cast=True] ambiguous_array, both_nat, both_eq - Py_ssize_t i, idx, pos, ntrans, n = len(vals) + Py_ssize_t i, idx, pos, ntrans, n = vals.shape[0] Py_ssize_t delta_idx_offset, delta_idx, pos_left, pos_right int64_t *tdata int64_t v, left, right, val, v_left, v_right, new_local, remaining_mins @@ -184,7 +185,7 @@ timedelta-like} trans, deltas, _ = get_dst_info(tz) tdata = cnp.PyArray_DATA(trans) - ntrans = len(trans) + ntrans = trans.shape[0] # Determine whether each date lies left of the DST transition (store in # result_a) or right of the DST transition (store in result_b) @@ -400,7 +401,7 @@ cpdef int64_t tz_convert_from_utc_single(int64_t val, tzinfo tz): """ cdef: int64_t delta - int64_t[:] deltas + int64_t[::1] deltas ndarray[int64_t, ndim=1] trans intp_t pos @@ -437,11 +438,11 @@ def tz_convert_from_utc(const int64_t[:] vals, tzinfo tz): cdef: const int64_t[:] converted - if len(vals) == 0: + if vals.shape[0] == 0: return np.array([], dtype=np.int64) converted = _tz_convert_from_utc(vals, tz) - return np.array(converted, dtype=np.int64) + return np.asarray(converted, dtype=np.int64) @cython.boundscheck(False) @@ -460,53 +461,48 @@ cdef const int64_t[:] _tz_convert_from_utc(const int64_t[:] vals, tzinfo tz): converted : ndarray[int64_t] """ cdef: - int64_t[:] converted, deltas - Py_ssize_t i, n = len(vals) + int64_t[::1] converted, deltas + Py_ssize_t i, n = vals.shape[0]#len(vals) int64_t val, delta intp_t[:] pos ndarray[int64_t] trans str typ + bint use_tzlocal = False, use_fixed = False, use_utc = True if is_utc(tz): - return vals + # Much faster than going through the "standard" pattern below + return vals.copy() + + if is_utc(tz) or tz is None: + use_utc = True elif is_tzlocal(tz): - converted = np.empty(n, dtype=np.int64) - for i in range(n): - val = vals[i] - if val == NPY_NAT: - converted[i] = NPY_NAT - else: - converted[i] = _tz_convert_tzlocal_utc(val, tz, to_utc=False) + use_tzlocal = True else: - converted = np.empty(n, dtype=np.int64) - trans, deltas, typ = get_dst_info(tz) if typ not in ["pytz", "dateutil"]: # FixedOffset, we know len(deltas) == 1 delta = deltas[0] - - for i in range(n): - val = vals[i] - if val == NPY_NAT: - converted[i] = val - else: - converted[i] = val + delta - + use_fixed = True else: pos = trans.searchsorted(vals, side="right") - 1 - for i in range(n): - val = vals[i] - if val == NPY_NAT: - converted[i] = val - else: - if pos[i] < 0: - # TODO: How is this reached? Should we be checking for - # it elsewhere? - raise ValueError("First time before start of DST info") + converted = np.empty(n, dtype=np.int64) - converted[i] = val + deltas[pos[i]] + for i in range(n): + val = vals[i] + if val == NPY_NAT: + converted[i] = NPY_NAT + continue + + # The pattern used in vectorized.pyx checks for use_utc here, + # but we handle that case above. + if use_tzlocal: + converted[i] = _tz_convert_tzlocal_utc(val, tz, to_utc=False) + elif use_fixed: + converted[i] = val + delta + else: + converted[i] = val + deltas[pos[i]] return converted @@ -547,8 +543,10 @@ cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True, timedelta td dt64_to_dtstruct(val, &dts) + dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us) + # tz.utcoffset only makes sense if datetime # is _wall time_, so if val is a UTC timestamp convert to wall time if not to_utc: diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx index 17720de33ab33..bc254b6c5a5cf 100644 --- a/pandas/_libs/tslibs/vectorized.pyx +++ b/pandas/_libs/tslibs/vectorized.pyx @@ -121,7 +121,7 @@ def ints_to_pydatetime( cdef: Py_ssize_t i, n = len(stamps) ndarray[int64_t] trans - int64_t[:] deltas + int64_t[::1] deltas intp_t[:] pos npy_datetimestruct dts object dt, new_tz @@ -167,26 +167,23 @@ def ints_to_pydatetime( if value == NPY_NAT: result[i] = NaT + continue + + if use_utc: + local_val = value + elif use_tzlocal: + local_val = tz_convert_utc_to_tzlocal(value, tz) + elif use_fixed: + local_val = value + delta else: - if use_utc: - local_val = value - elif use_tzlocal: - local_val = tz_convert_utc_to_tzlocal(value, tz) - elif use_fixed: - local_val = value + delta - elif not use_pytz: - # i.e. dateutil - # no zone-name change for dateutil tzs - dst etc - # represented in single object. - local_val = value + deltas[pos[i]] - else: - # pytz - # find right representation of dst etc in pytz timezone - new_tz = tz._tzinfos[tz._transition_info[pos[i]]] - local_val = value + deltas[pos[i]] - - dt64_to_dtstruct(local_val, &dts) - result[i] = func_create(value, dts, new_tz, freq, fold) + local_val = value + deltas[pos[i]] + + if use_pytz: + # find right representation of dst etc in pytz timezone + new_tz = tz._tzinfos[tz._transition_info[pos[i]]] + + dt64_to_dtstruct(local_val, &dts) + result[i] = func_create(value, dts, new_tz, freq, fold) return result @@ -226,7 +223,7 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None) -> Resolution: npy_datetimestruct dts int reso = RESO_DAY, curr_reso ndarray[int64_t] trans - int64_t[:] deltas + int64_t[::1] deltas intp_t[:] pos int64_t local_val, delta = NPY_NAT bint use_utc = False, use_tzlocal = False, use_fixed = False @@ -288,7 +285,7 @@ cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo t Py_ssize_t i, n = len(stamps) int64_t[:] result = np.empty(n, dtype=np.int64) ndarray[int64_t] trans - int64_t[:] deltas + int64_t[::1] deltas str typ Py_ssize_t[:] pos int64_t local_val, delta = NPY_NAT @@ -346,7 +343,7 @@ def is_date_array_normalized(const int64_t[:] stamps, tzinfo tz=None) -> bool: cdef: Py_ssize_t i, n = len(stamps) ndarray[int64_t] trans - int64_t[:] deltas + int64_t[::1] deltas intp_t[:] pos int64_t local_val, delta = NPY_NAT str typ @@ -392,7 +389,7 @@ def dt64arr_to_periodarr(const int64_t[:] stamps, int freq, tzinfo tz): Py_ssize_t i, n = len(stamps) int64_t[:] result = np.empty(n, dtype=np.int64) ndarray[int64_t] trans - int64_t[:] deltas + int64_t[::1] deltas Py_ssize_t[:] pos npy_datetimestruct dts int64_t local_val, delta = NPY_NAT From ac2f964368c8d0c1a71add880e54bf84afa8d6d7 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 7 Mar 2022 18:16:31 -0800 Subject: [PATCH 2/3] lint fixup --- pandas/_libs/tslibs/tzconversion.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 8ef52eeab706e..1af095ff2ce84 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -462,7 +462,7 @@ cdef const int64_t[:] _tz_convert_from_utc(const int64_t[:] vals, tzinfo tz): """ cdef: int64_t[::1] converted, deltas - Py_ssize_t i, n = vals.shape[0]#len(vals) + Py_ssize_t i, n = vals.shape[0] int64_t val, delta intp_t[:] pos ndarray[int64_t] trans From bf1a19c6e53b60fd42bd94b98165a076922dcc11 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 7 Mar 2022 19:42:30 -0800 Subject: [PATCH 3/3] avoid not-initialized-warning --- pandas/_libs/tslibs/tzconversion.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 1af095ff2ce84..2173ca37da7fa 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -463,7 +463,7 @@ cdef const int64_t[:] _tz_convert_from_utc(const int64_t[:] vals, tzinfo tz): cdef: int64_t[::1] converted, deltas Py_ssize_t i, n = vals.shape[0] - int64_t val, delta + int64_t val, delta = 0 # avoid not-initialized-warning intp_t[:] pos ndarray[int64_t] trans str typ