diff --git a/pandas/_libs/tslibs/tzconversion.pxd b/pandas/_libs/tslibs/tzconversion.pxd index a34161b20e2ff..600ac54639dfc 100644 --- a/pandas/_libs/tslibs/tzconversion.pxd +++ b/pandas/_libs/tslibs/tzconversion.pxd @@ -6,9 +6,6 @@ from numpy cimport ( ) -cdef int64_t localize_tzinfo_api( - int64_t utc_val, tzinfo tz, bint* fold=* -) except? -1 cdef int64_t tz_convert_from_utc_single( int64_t utc_val, tzinfo tz, bint* fold=?, Py_ssize_t* outpos=? ) except? -1 @@ -16,15 +13,6 @@ cdef int64_t tz_localize_to_utc_single( int64_t val, tzinfo tz, object ambiguous=*, object nonexistent=* ) except? -1 -cdef Py_ssize_t bisect_right_i8(int64_t *data, int64_t val, Py_ssize_t n) - -cdef bint infer_dateutil_fold( - int64_t value, - const int64_t[::1] trans, - const int64_t[::1] deltas, - Py_ssize_t pos, -) - cdef class Localizer: cdef: diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 8d307e324ba4e..fede9768f5fee 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -50,7 +50,6 @@ cdef const int64_t[::1] _deltas_placeholder = np.array([], dtype=np.int64) @cython.freelist(16) -#@cython.internal @cython.final cdef class Localizer: # cdef: @@ -102,13 +101,15 @@ cdef class Localizer: if self.use_utc: return utc_val elif self.use_tzlocal: - return utc_val + localize_tzinfo_api(utc_val, self.tz, fold) + return utc_val + _tz_localize_using_tzinfo_api( + utc_val, self.tz, to_utc=False, fold=fold + ) elif self.use_fixed: return utc_val + self.delta else: pos[0] = bisect_right_i8(self.tdata, utc_val, self.ntrans) - 1 if fold is not NULL: - fold[0] = infer_dateutil_fold( + fold[0] = _infer_dateutil_fold( utc_val, self.trans, self.deltas, pos[0] ) @@ -184,10 +185,10 @@ timedelta-like} cdef: const int64_t[::1] deltas ndarray[uint8_t, cast=True] ambiguous_array - Py_ssize_t i, isl, isr, idx, pos, ntrans, n = vals.shape[0] + Py_ssize_t i, idx, pos, ntrans, n = vals.shape[0] Py_ssize_t delta_idx_offset, delta_idx, pos_left, pos_right int64_t *tdata - int64_t v, left, right, val, v_left, v_right, new_local, remaining_mins + int64_t v, left, right, val, new_local, remaining_mins int64_t first_delta, delta int64_t shift_delta = 0 ndarray[int64_t] trans, result_a, result_b, dst_hours @@ -202,7 +203,7 @@ timedelta-like} if is_utc(tz) or tz is None: return vals.copy() - result = np.empty(n, dtype=np.int64) + result = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0) if is_tzlocal(tz) or is_zoneinfo(tz): for i in range(n): @@ -265,40 +266,7 @@ timedelta-like} # Determine whether each date lies left of the DST transition (store in # result_a) or right of the DST transition (store in result_b) - result_a = np.empty(n, dtype=np.int64) - result_b = np.empty(n, dtype=np.int64) - - for i in range(n): - # This loops resembles the "Find the two best possibilities" block - # in pytz's DstTZInfo.localize method. - result_a[i] = NPY_NAT - result_b[i] = NPY_NAT - - val = vals[i] - if val == NPY_NAT: - continue - - # TODO: be careful of overflow in val-DAY_NANOS - isl = bisect_right_i8(tdata, val - DAY_NANOS, ntrans) - 1 - if isl < 0: - isl = 0 - - v_left = val - deltas[isl] - pos_left = bisect_right_i8(tdata, v_left, ntrans) - 1 - # timestamp falls to the left side of the DST transition - if v_left + deltas[pos_left] == val: - result_a[i] = v_left - - # TODO: be careful of overflow in val+DAY_NANOS - isr = bisect_right_i8(tdata, val + DAY_NANOS, ntrans) - 1 - if isr < 0: - isr = 0 - - v_right = val - deltas[isr] - pos_right = bisect_right_i8(tdata, v_right, ntrans) - 1 - # timestamp falls to the right side of the DST transition - if v_right + deltas[pos_right] == val: - result_b[i] = v_right + result_a, result_b =_get_utc_bounds(vals, tdata, ntrans, deltas) # silence false-positive compiler warning dst_hours = np.empty(0, dtype=np.int64) @@ -417,6 +385,59 @@ cdef inline str _render_tstamp(int64_t val): return str(Timestamp(val)) +cdef _get_utc_bounds( + ndarray vals, + int64_t* tdata, + Py_ssize_t ntrans, + const int64_t[::1] deltas, +): + # Determine whether each date lies left of the DST transition (store in + # result_a) or right of the DST transition (store in result_b) + + cdef: + ndarray result_a, result_b + Py_ssize_t i, n = vals.size + int64_t val, v_left, v_right + Py_ssize_t isl, isr, pos_left, pos_right + + result_a = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0) + result_b = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0) + + for i in range(n): + # This loops resembles the "Find the two best possibilities" block + # in pytz's DstTZInfo.localize method. + result_a[i] = NPY_NAT + result_b[i] = NPY_NAT + + val = vals[i] + if val == NPY_NAT: + continue + + # TODO: be careful of overflow in val-DAY_NANOS + isl = bisect_right_i8(tdata, val - DAY_NANOS, ntrans) - 1 + if isl < 0: + isl = 0 + + v_left = val - deltas[isl] + pos_left = bisect_right_i8(tdata, v_left, ntrans) - 1 + # timestamp falls to the left side of the DST transition + if v_left + deltas[pos_left] == val: + result_a[i] = v_left + + # TODO: be careful of overflow in val+DAY_NANOS + isr = bisect_right_i8(tdata, val + DAY_NANOS, ntrans) - 1 + if isr < 0: + isr = 0 + + v_right = val - deltas[isr] + pos_right = bisect_right_i8(tdata, v_right, ntrans) - 1 + # timestamp falls to the right side of the DST transition + if v_right + deltas[pos_right] == val: + result_b[i] = v_right + + return result_a, result_b + + @cython.boundscheck(False) cdef ndarray[int64_t] _get_dst_hours( # vals only needed here to potential render an exception message @@ -433,10 +454,10 @@ cdef ndarray[int64_t] _get_dst_hours( intp_t switch_idx int64_t left, right - dst_hours = np.empty(n, dtype=np.int64) + dst_hours = cnp.PyArray_EMPTY(result_a.ndim, result_a.shape, cnp.NPY_INT64, 0) dst_hours[:] = NPY_NAT - mismatch = np.zeros(n, dtype=bool) + mismatch = cnp.PyArray_ZEROS(result_a.ndim, result_a.shape, cnp.NPY_BOOL, 0) for i in range(n): left = result_a[i] @@ -450,6 +471,7 @@ cdef ndarray[int64_t] _get_dst_hours( trans_idx = mismatch.nonzero()[0] if trans_idx.size == 1: + # TODO: not reached in tests 2022-05-02; possible? stamp = _render_tstamp(vals[trans_idx[0]]) raise pytz.AmbiguousTimeError( f"Cannot infer dst time from {stamp} as there " @@ -471,6 +493,7 @@ cdef ndarray[int64_t] _get_dst_hours( delta = np.diff(result_a[grp]) if grp.size == 1 or np.all(delta > 0): + # TODO: not reached in tests 2022-05-02; possible? stamp = _render_tstamp(vals[grp[0]]) raise pytz.AmbiguousTimeError(stamp) @@ -478,6 +501,7 @@ cdef ndarray[int64_t] _get_dst_hours( # for standard switch_idxs = (delta <= 0).nonzero()[0] if switch_idxs.size > 1: + # TODO: not reached in tests 2022-05-02; possible? raise pytz.AmbiguousTimeError( f"There are {switch_idxs.size} dst switches when " "there should only be 1." @@ -495,15 +519,6 @@ cdef ndarray[int64_t] _get_dst_hours( # ---------------------------------------------------------------------- # Timezone Conversion -cdef int64_t localize_tzinfo_api( - int64_t utc_val, tzinfo tz, bint* fold=NULL -) except? -1: - """ - See _tz_localize_using_tzinfo_api.__doc__ - """ - return _tz_localize_using_tzinfo_api(utc_val, tz, to_utc=False, fold=fold) - - def py_tz_convert_from_utc_single(int64_t utc_val, tzinfo tz): # The 'bint* fold=NULL' in tz_convert_from_utc_single means we cannot # make it cdef, so this is version exposed for testing from python. @@ -608,7 +623,7 @@ cdef int64_t _tz_localize_using_tzinfo_api( # NB: relies on dateutil internals, subject to change. @cython.boundscheck(False) @cython.wraparound(False) -cdef bint infer_dateutil_fold( +cdef bint _infer_dateutil_fold( int64_t value, const int64_t[::1] trans, const int64_t[::1] deltas,