CLN: tzconversion (#46926)

jbrockmendel · web-flow · commit 60ac973e64ba · 2022-05-02T16:10:42.000-04:00
diff --git a/pandas/_libs/tslibs/tzconversion.pxd b/pandas/_libs/tslibs/tzconversion.pxd
@@ -6,25 +6,13 @@ from numpy cimport (
 )
 
 
-cdef int64_t localize_tzinfo_api(
-    int64_t utc_val, tzinfo tz, bint* fold=*
-) except? -1
 cdef int64_t tz_convert_from_utc_single(
     int64_t utc_val, tzinfo tz, bint* fold=?, Py_ssize_t* outpos=?
 ) except? -1
 cdef int64_t tz_localize_to_utc_single(
     int64_t val, tzinfo tz, object ambiguous=*, object nonexistent=*
 ) except? -1
 
-cdef Py_ssize_t bisect_right_i8(int64_t *data, int64_t val, Py_ssize_t n)
-
-cdef bint infer_dateutil_fold(
-    int64_t value,
-    const int64_t[::1] trans,
-    const int64_t[::1] deltas,
-    Py_ssize_t pos,
-)
-
 
 cdef class Localizer:
     cdef:
diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx
@@ -50,7 +50,6 @@ cdef const int64_t[::1] _deltas_placeholder = np.array([], dtype=np.int64)
 
 
 @cython.freelist(16)
-#@cython.internal
 @cython.final
 cdef class Localizer:
     # cdef:
@@ -102,13 +101,15 @@ cdef class Localizer:
         if self.use_utc:
             return utc_val
         elif self.use_tzlocal:
-            return utc_val + localize_tzinfo_api(utc_val, self.tz, fold)
+            return utc_val + _tz_localize_using_tzinfo_api(
+                utc_val, self.tz, to_utc=False, fold=fold
+            )
         elif self.use_fixed:
             return utc_val + self.delta
         else:
             pos[0] = bisect_right_i8(self.tdata, utc_val, self.ntrans) - 1
             if fold is not NULL:
-                fold[0] = infer_dateutil_fold(
+                fold[0] = _infer_dateutil_fold(
                     utc_val, self.trans, self.deltas, pos[0]
                 )
 
@@ -184,10 +185,10 @@ timedelta-like}
     cdef:
         const int64_t[::1] deltas
         ndarray[uint8_t, cast=True] ambiguous_array
-        Py_ssize_t i, isl, isr, idx, pos, ntrans, n = vals.shape[0]
+        Py_ssize_t i, idx, pos, ntrans, n = vals.shape[0]
         Py_ssize_t delta_idx_offset, delta_idx, pos_left, pos_right
         int64_t *tdata
-        int64_t v, left, right, val, v_left, v_right, new_local, remaining_mins
+        int64_t v, left, right, val, new_local, remaining_mins
         int64_t first_delta, delta
         int64_t shift_delta = 0
         ndarray[int64_t] trans, result_a, result_b, dst_hours
@@ -202,7 +203,7 @@ timedelta-like}
     if is_utc(tz) or tz is None:
         return vals.copy()
 
-    result = np.empty(n, dtype=np.int64)
+    result = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0)
 
     if is_tzlocal(tz) or is_zoneinfo(tz):
         for i in range(n):
@@ -265,40 +266,7 @@ timedelta-like}
 
     # Determine whether each date lies left of the DST transition (store in
     # result_a) or right of the DST transition (store in result_b)
-    result_a = np.empty(n, dtype=np.int64)
-    result_b = np.empty(n, dtype=np.int64)
-
-    for i in range(n):
-        # This loops resembles the "Find the two best possibilities" block
-        #  in pytz's DstTZInfo.localize method.
-        result_a[i] = NPY_NAT
-        result_b[i] = NPY_NAT
-
-        val = vals[i]
-        if val == NPY_NAT:
-            continue
-
-        # TODO: be careful of overflow in val-DAY_NANOS
-        isl = bisect_right_i8(tdata, val - DAY_NANOS, ntrans) - 1
-        if isl < 0:
-            isl = 0
-
-        v_left = val - deltas[isl]
-        pos_left = bisect_right_i8(tdata, v_left, ntrans) - 1
-        # timestamp falls to the left side of the DST transition
-        if v_left + deltas[pos_left] == val:
-            result_a[i] = v_left
-
-        # TODO: be careful of overflow in val+DAY_NANOS
-        isr = bisect_right_i8(tdata, val + DAY_NANOS, ntrans) - 1
-        if isr < 0:
-            isr = 0
-
-        v_right = val - deltas[isr]
-        pos_right = bisect_right_i8(tdata, v_right, ntrans) - 1
-        # timestamp falls to the right side of the DST transition
-        if v_right + deltas[pos_right] == val:
-            result_b[i] = v_right
+    result_a, result_b =_get_utc_bounds(vals, tdata, ntrans, deltas)
 
     # silence false-positive compiler warning
     dst_hours = np.empty(0, dtype=np.int64)
@@ -417,6 +385,59 @@ cdef inline str _render_tstamp(int64_t val):
     return str(Timestamp(val))
 
 
+cdef _get_utc_bounds(
+    ndarray vals,
+    int64_t* tdata,
+    Py_ssize_t ntrans,
+    const int64_t[::1] deltas,
+):
+    # Determine whether each date lies left of the DST transition (store in
+    # result_a) or right of the DST transition (store in result_b)
+
+    cdef:
+        ndarray result_a, result_b
+        Py_ssize_t i, n = vals.size
+        int64_t val, v_left, v_right
+        Py_ssize_t isl, isr, pos_left, pos_right
+
+    result_a = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0)
+    result_b = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0)
+
+    for i in range(n):
+        # This loops resembles the "Find the two best possibilities" block
+        #  in pytz's DstTZInfo.localize method.
+        result_a[i] = NPY_NAT
+        result_b[i] = NPY_NAT
+
+        val = vals[i]
+        if val == NPY_NAT:
+            continue
+
+        # TODO: be careful of overflow in val-DAY_NANOS
+        isl = bisect_right_i8(tdata, val - DAY_NANOS, ntrans) - 1
+        if isl < 0:
+            isl = 0
+
+        v_left = val - deltas[isl]
+        pos_left = bisect_right_i8(tdata, v_left, ntrans) - 1
+        # timestamp falls to the left side of the DST transition
+        if v_left + deltas[pos_left] == val:
+            result_a[i] = v_left
+
+        # TODO: be careful of overflow in val+DAY_NANOS
+        isr = bisect_right_i8(tdata, val + DAY_NANOS, ntrans) - 1
+        if isr < 0:
+            isr = 0
+
+        v_right = val - deltas[isr]
+        pos_right = bisect_right_i8(tdata, v_right, ntrans) - 1
+        # timestamp falls to the right side of the DST transition
+        if v_right + deltas[pos_right] == val:
+            result_b[i] = v_right
+
+    return result_a, result_b
+
+
 @cython.boundscheck(False)
 cdef ndarray[int64_t] _get_dst_hours(
     # vals only needed here to potential render an exception message
@@ -433,10 +454,10 @@ cdef ndarray[int64_t] _get_dst_hours(
         intp_t switch_idx
         int64_t left, right
 
-    dst_hours = np.empty(n, dtype=np.int64)
+    dst_hours = cnp.PyArray_EMPTY(result_a.ndim, result_a.shape, cnp.NPY_INT64, 0)
     dst_hours[:] = NPY_NAT
 
-    mismatch = np.zeros(n, dtype=bool)
+    mismatch = cnp.PyArray_ZEROS(result_a.ndim, result_a.shape, cnp.NPY_BOOL, 0)
 
     for i in range(n):
         left = result_a[i]
@@ -450,6 +471,7 @@ cdef ndarray[int64_t] _get_dst_hours(
     trans_idx = mismatch.nonzero()[0]
 
     if trans_idx.size == 1:
+        # TODO: not reached in tests 2022-05-02; possible?
         stamp = _render_tstamp(vals[trans_idx[0]])
         raise pytz.AmbiguousTimeError(
             f"Cannot infer dst time from {stamp} as there "
@@ -471,13 +493,15 @@ cdef ndarray[int64_t] _get_dst_hours(
 
             delta = np.diff(result_a[grp])
             if grp.size == 1 or np.all(delta > 0):
+                # TODO: not reached in tests 2022-05-02; possible?
                 stamp = _render_tstamp(vals[grp[0]])
                 raise pytz.AmbiguousTimeError(stamp)
 
             # Find the index for the switch and pull from a for dst and b
             # for standard
             switch_idxs = (delta <= 0).nonzero()[0]
             if switch_idxs.size > 1:
+                # TODO: not reached in tests 2022-05-02; possible?
                 raise pytz.AmbiguousTimeError(
                     f"There are {switch_idxs.size} dst switches when "
                     "there should only be 1."
@@ -495,15 +519,6 @@ cdef ndarray[int64_t] _get_dst_hours(
 # ----------------------------------------------------------------------
 # Timezone Conversion
 
-cdef int64_t localize_tzinfo_api(
-    int64_t utc_val, tzinfo tz, bint* fold=NULL
-) except? -1:
-    """
-    See _tz_localize_using_tzinfo_api.__doc__
-    """
-    return _tz_localize_using_tzinfo_api(utc_val, tz, to_utc=False, fold=fold)
-
-
 def py_tz_convert_from_utc_single(int64_t utc_val, tzinfo tz):
     # The 'bint* fold=NULL' in tz_convert_from_utc_single means we cannot
     #  make it cdef, so this is version exposed for testing from python.
@@ -608,7 +623,7 @@ cdef int64_t _tz_localize_using_tzinfo_api(
 # NB: relies on dateutil internals, subject to change.
 @cython.boundscheck(False)
 @cython.wraparound(False)
-cdef bint infer_dateutil_fold(
+cdef bint _infer_dateutil_fold(
     int64_t value,
     const int64_t[::1] trans,
     const int64_t[::1] deltas,