REF: separate out _get_dst_hours (#46399)

jbrockmendel · web-flow · commit 4ff2efc528c6 · 2022-03-17T19:02:14.000-04:00
diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx
@@ -115,21 +115,19 @@ timedelta-like}
     localized : ndarray[int64_t]
     """
     cdef:
-        int64_t[::1] deltas
-        ndarray[uint8_t, cast=True] ambiguous_array, both_nat, both_eq
+        const int64_t[::1] deltas
+        ndarray[uint8_t, cast=True] ambiguous_array
         Py_ssize_t i, isl, isr, idx, pos, ntrans, n = vals.shape[0]
         Py_ssize_t delta_idx_offset, delta_idx, pos_left, pos_right
         int64_t *tdata
         int64_t v, left, right, val, v_left, v_right, new_local, remaining_mins
         int64_t first_delta
         int64_t shift_delta = 0
-        ndarray[int64_t] trans, result, result_a, result_b, dst_hours, delta
-        ndarray trans_idx, grp, a_idx, b_idx, one_diff
+        ndarray[int64_t] trans, result, result_a, result_b, dst_hours
         npy_datetimestruct dts
         bint infer_dst = False, is_dst = False, fill = False
         bint shift_forward = False, shift_backward = False
         bint fill_nonexist = False
-        list trans_grp
         str stamp
 
     # Vectorized version of DstTzInfo.localize
@@ -223,49 +221,20 @@ timedelta-like}
     # silence false-positive compiler warning
     dst_hours = np.empty(0, dtype=np.int64)
     if infer_dst:
-        dst_hours = np.empty(n, dtype=np.int64)
-        dst_hours[:] = NPY_NAT
-
-        # Get the ambiguous hours (given the above, these are the hours
-        # where result_a != result_b and neither of them are NAT)
-        both_nat = np.logical_and(result_a != NPY_NAT, result_b != NPY_NAT)
-        both_eq = result_a == result_b
-        trans_idx = np.squeeze(np.nonzero(np.logical_and(both_nat, ~both_eq)))
-        if trans_idx.size == 1:
-            stamp = _render_tstamp(vals[trans_idx])
-            raise pytz.AmbiguousTimeError(
-                f"Cannot infer dst time from {stamp} as there "
-                f"are no repeated times")
-        # Split the array into contiguous chunks (where the difference between
-        # indices is 1).  These are effectively dst transitions in different
-        # years which is useful for checking that there is not an ambiguous
-        # transition in an individual year.
-        if trans_idx.size > 0:
-            one_diff = np.where(np.diff(trans_idx) != 1)[0] + 1
-            trans_grp = np.array_split(trans_idx, one_diff)
-
-            # Iterate through each day, if there are no hours where the
-            # delta is negative (indicates a repeat of hour) the switch
-            # cannot be inferred
-            for grp in trans_grp:
-
-                delta = np.diff(result_a[grp])
-                if grp.size == 1 or np.all(delta > 0):
-                    stamp = _render_tstamp(vals[grp[0]])
-                    raise pytz.AmbiguousTimeError(stamp)
-
-                # Find the index for the switch and pull from a for dst and b
-                # for standard
-                switch_idx = (delta <= 0).nonzero()[0]
-                if switch_idx.size > 1:
-                    raise pytz.AmbiguousTimeError(
-                        f"There are {switch_idx.size} dst switches when "
-                        f"there should only be 1.")
-                switch_idx = switch_idx[0] + 1
-                # Pull the only index and adjust
-                a_idx = grp[:switch_idx]
-                b_idx = grp[switch_idx:]
-                dst_hours[grp] = np.hstack((result_a[a_idx], result_b[b_idx]))
+        dst_hours = _get_dst_hours(vals, result_a, result_b)
+
+    # Pre-compute delta_idx_offset that will be used if we go down non-existent
+    #  paths.
+    # Shift the delta_idx by if the UTC offset of
+    # the target tz is greater than 0 and we're moving forward
+    # or vice versa
+    first_delta = deltas[0]
+    if (shift_forward or shift_delta > 0) and first_delta > 0:
+        delta_idx_offset = 1
+    elif (shift_backward or shift_delta < 0) and first_delta < 0:
+        delta_idx_offset = 1
+    else:
+        delta_idx_offset = 0
 
     for i in range(n):
         val = vals[i]
@@ -290,7 +259,8 @@ timedelta-like}
                     stamp = _render_tstamp(val)
                     raise pytz.AmbiguousTimeError(
                         f"Cannot infer dst time from {stamp}, try using the "
-                        f"'ambiguous' argument")
+                        "'ambiguous' argument"
+                    )
         elif left != NPY_NAT:
             result[i] = left
         elif right != NPY_NAT:
@@ -305,7 +275,7 @@ timedelta-like}
                     # time
                     if -1 < shift_delta + remaining_mins < HOUR_NANOS:
                         raise ValueError(
-                            f"The provided timedelta will relocalize on a "
+                            "The provided timedelta will relocalize on a "
                             f"nonexistent time: {nonexistent}"
                         )
                     new_local = val + shift_delta
@@ -318,16 +288,6 @@ timedelta-like}
 
                 delta_idx = bisect_right_i8(tdata, new_local, ntrans)
 
-                # Shift the delta_idx by if the UTC offset of
-                # the target tz is greater than 0 and we're moving forward
-                # or vice versa
-                first_delta = deltas[0]
-                if (shift_forward or shift_delta > 0) and first_delta > 0:
-                    delta_idx_offset = 1
-                elif (shift_backward or shift_delta < 0) and first_delta < 0:
-                    delta_idx_offset = 1
-                else:
-                    delta_idx_offset = 0
                 delta_idx = delta_idx - delta_idx_offset
                 result[i] = new_local - deltas[delta_idx]
             elif fill_nonexist:
@@ -375,6 +335,70 @@ cdef inline str _render_tstamp(int64_t val):
     return str(Timestamp(val))
 
 
+cdef ndarray[int64_t] _get_dst_hours(
+    # vals only needed here to potential render an exception message
+    ndarray[int64_t] vals,
+    ndarray[int64_t] result_a,
+    ndarray[int64_t] result_b,
+):
+    cdef:
+        Py_ssize_t n = vals.shape[0]
+        ndarray[uint8_t, cast=True] both_nat, both_eq
+        ndarray[int64_t] delta, dst_hours
+        ndarray trans_idx, grp, a_idx, b_idx, one_diff
+        list trans_grp
+
+    dst_hours = np.empty(n, dtype=np.int64)
+    dst_hours[:] = NPY_NAT
+
+    # Get the ambiguous hours (given the above, these are the hours
+    # where result_a != result_b and neither of them are NAT)
+    both_nat = np.logical_and(result_a != NPY_NAT, result_b != NPY_NAT)
+    both_eq = result_a == result_b
+    trans_idx = np.squeeze(np.nonzero(np.logical_and(both_nat, ~both_eq)))
+    if trans_idx.size == 1:
+        stamp = _render_tstamp(vals[trans_idx])
+        raise pytz.AmbiguousTimeError(
+            f"Cannot infer dst time from {stamp} as there "
+            "are no repeated times"
+        )
+
+    # Split the array into contiguous chunks (where the difference between
+    # indices is 1).  These are effectively dst transitions in different
+    # years which is useful for checking that there is not an ambiguous
+    # transition in an individual year.
+    if trans_idx.size > 0:
+        one_diff = np.where(np.diff(trans_idx) != 1)[0] + 1
+        trans_grp = np.array_split(trans_idx, one_diff)
+
+        # Iterate through each day, if there are no hours where the
+        # delta is negative (indicates a repeat of hour) the switch
+        # cannot be inferred
+        for grp in trans_grp:
+
+            delta = np.diff(result_a[grp])
+            if grp.size == 1 or np.all(delta > 0):
+                stamp = _render_tstamp(vals[grp[0]])
+                raise pytz.AmbiguousTimeError(stamp)
+
+            # Find the index for the switch and pull from a for dst and b
+            # for standard
+            switch_idx = (delta <= 0).nonzero()[0]
+            if switch_idx.size > 1:
+                raise pytz.AmbiguousTimeError(
+                    f"There are {switch_idx.size} dst switches when "
+                    "there should only be 1."
+                )
+
+            switch_idx = switch_idx[0] + 1  # TODO: declare type for switch_idx
+            # Pull the only index and adjust
+            a_idx = grp[:switch_idx]
+            b_idx = grp[switch_idx:]
+            dst_hours[grp] = np.hstack((result_a[a_idx], result_b[b_idx]))
+
+    return dst_hours
+
+
 # ----------------------------------------------------------------------
 # Timezone Conversion