Skip to content

Commit 4ff2efc

Browse files
authored
REF: separate out _get_dst_hours (#46399)
1 parent 2dfe3a9 commit 4ff2efc

File tree

1 file changed

+84
-60
lines changed

1 file changed

+84
-60
lines changed

pandas/_libs/tslibs/tzconversion.pyx

+84-60
Original file line numberDiff line numberDiff line change
@@ -115,21 +115,19 @@ timedelta-like}
115115
localized : ndarray[int64_t]
116116
"""
117117
cdef:
118-
int64_t[::1] deltas
119-
ndarray[uint8_t, cast=True] ambiguous_array, both_nat, both_eq
118+
const int64_t[::1] deltas
119+
ndarray[uint8_t, cast=True] ambiguous_array
120120
Py_ssize_t i, isl, isr, idx, pos, ntrans, n = vals.shape[0]
121121
Py_ssize_t delta_idx_offset, delta_idx, pos_left, pos_right
122122
int64_t *tdata
123123
int64_t v, left, right, val, v_left, v_right, new_local, remaining_mins
124124
int64_t first_delta
125125
int64_t shift_delta = 0
126-
ndarray[int64_t] trans, result, result_a, result_b, dst_hours, delta
127-
ndarray trans_idx, grp, a_idx, b_idx, one_diff
126+
ndarray[int64_t] trans, result, result_a, result_b, dst_hours
128127
npy_datetimestruct dts
129128
bint infer_dst = False, is_dst = False, fill = False
130129
bint shift_forward = False, shift_backward = False
131130
bint fill_nonexist = False
132-
list trans_grp
133131
str stamp
134132

135133
# Vectorized version of DstTzInfo.localize
@@ -223,49 +221,20 @@ timedelta-like}
223221
# silence false-positive compiler warning
224222
dst_hours = np.empty(0, dtype=np.int64)
225223
if infer_dst:
226-
dst_hours = np.empty(n, dtype=np.int64)
227-
dst_hours[:] = NPY_NAT
228-
229-
# Get the ambiguous hours (given the above, these are the hours
230-
# where result_a != result_b and neither of them are NAT)
231-
both_nat = np.logical_and(result_a != NPY_NAT, result_b != NPY_NAT)
232-
both_eq = result_a == result_b
233-
trans_idx = np.squeeze(np.nonzero(np.logical_and(both_nat, ~both_eq)))
234-
if trans_idx.size == 1:
235-
stamp = _render_tstamp(vals[trans_idx])
236-
raise pytz.AmbiguousTimeError(
237-
f"Cannot infer dst time from {stamp} as there "
238-
f"are no repeated times")
239-
# Split the array into contiguous chunks (where the difference between
240-
# indices is 1). These are effectively dst transitions in different
241-
# years which is useful for checking that there is not an ambiguous
242-
# transition in an individual year.
243-
if trans_idx.size > 0:
244-
one_diff = np.where(np.diff(trans_idx) != 1)[0] + 1
245-
trans_grp = np.array_split(trans_idx, one_diff)
246-
247-
# Iterate through each day, if there are no hours where the
248-
# delta is negative (indicates a repeat of hour) the switch
249-
# cannot be inferred
250-
for grp in trans_grp:
251-
252-
delta = np.diff(result_a[grp])
253-
if grp.size == 1 or np.all(delta > 0):
254-
stamp = _render_tstamp(vals[grp[0]])
255-
raise pytz.AmbiguousTimeError(stamp)
256-
257-
# Find the index for the switch and pull from a for dst and b
258-
# for standard
259-
switch_idx = (delta <= 0).nonzero()[0]
260-
if switch_idx.size > 1:
261-
raise pytz.AmbiguousTimeError(
262-
f"There are {switch_idx.size} dst switches when "
263-
f"there should only be 1.")
264-
switch_idx = switch_idx[0] + 1
265-
# Pull the only index and adjust
266-
a_idx = grp[:switch_idx]
267-
b_idx = grp[switch_idx:]
268-
dst_hours[grp] = np.hstack((result_a[a_idx], result_b[b_idx]))
224+
dst_hours = _get_dst_hours(vals, result_a, result_b)
225+
226+
# Pre-compute delta_idx_offset that will be used if we go down non-existent
227+
# paths.
228+
# Shift the delta_idx by if the UTC offset of
229+
# the target tz is greater than 0 and we're moving forward
230+
# or vice versa
231+
first_delta = deltas[0]
232+
if (shift_forward or shift_delta > 0) and first_delta > 0:
233+
delta_idx_offset = 1
234+
elif (shift_backward or shift_delta < 0) and first_delta < 0:
235+
delta_idx_offset = 1
236+
else:
237+
delta_idx_offset = 0
269238

270239
for i in range(n):
271240
val = vals[i]
@@ -290,7 +259,8 @@ timedelta-like}
290259
stamp = _render_tstamp(val)
291260
raise pytz.AmbiguousTimeError(
292261
f"Cannot infer dst time from {stamp}, try using the "
293-
f"'ambiguous' argument")
262+
"'ambiguous' argument"
263+
)
294264
elif left != NPY_NAT:
295265
result[i] = left
296266
elif right != NPY_NAT:
@@ -305,7 +275,7 @@ timedelta-like}
305275
# time
306276
if -1 < shift_delta + remaining_mins < HOUR_NANOS:
307277
raise ValueError(
308-
f"The provided timedelta will relocalize on a "
278+
"The provided timedelta will relocalize on a "
309279
f"nonexistent time: {nonexistent}"
310280
)
311281
new_local = val + shift_delta
@@ -318,16 +288,6 @@ timedelta-like}
318288

319289
delta_idx = bisect_right_i8(tdata, new_local, ntrans)
320290

321-
# Shift the delta_idx by if the UTC offset of
322-
# the target tz is greater than 0 and we're moving forward
323-
# or vice versa
324-
first_delta = deltas[0]
325-
if (shift_forward or shift_delta > 0) and first_delta > 0:
326-
delta_idx_offset = 1
327-
elif (shift_backward or shift_delta < 0) and first_delta < 0:
328-
delta_idx_offset = 1
329-
else:
330-
delta_idx_offset = 0
331291
delta_idx = delta_idx - delta_idx_offset
332292
result[i] = new_local - deltas[delta_idx]
333293
elif fill_nonexist:
@@ -375,6 +335,70 @@ cdef inline str _render_tstamp(int64_t val):
375335
return str(Timestamp(val))
376336

377337

338+
cdef ndarray[int64_t] _get_dst_hours(
339+
# vals only needed here to potential render an exception message
340+
ndarray[int64_t] vals,
341+
ndarray[int64_t] result_a,
342+
ndarray[int64_t] result_b,
343+
):
344+
cdef:
345+
Py_ssize_t n = vals.shape[0]
346+
ndarray[uint8_t, cast=True] both_nat, both_eq
347+
ndarray[int64_t] delta, dst_hours
348+
ndarray trans_idx, grp, a_idx, b_idx, one_diff
349+
list trans_grp
350+
351+
dst_hours = np.empty(n, dtype=np.int64)
352+
dst_hours[:] = NPY_NAT
353+
354+
# Get the ambiguous hours (given the above, these are the hours
355+
# where result_a != result_b and neither of them are NAT)
356+
both_nat = np.logical_and(result_a != NPY_NAT, result_b != NPY_NAT)
357+
both_eq = result_a == result_b
358+
trans_idx = np.squeeze(np.nonzero(np.logical_and(both_nat, ~both_eq)))
359+
if trans_idx.size == 1:
360+
stamp = _render_tstamp(vals[trans_idx])
361+
raise pytz.AmbiguousTimeError(
362+
f"Cannot infer dst time from {stamp} as there "
363+
"are no repeated times"
364+
)
365+
366+
# Split the array into contiguous chunks (where the difference between
367+
# indices is 1). These are effectively dst transitions in different
368+
# years which is useful for checking that there is not an ambiguous
369+
# transition in an individual year.
370+
if trans_idx.size > 0:
371+
one_diff = np.where(np.diff(trans_idx) != 1)[0] + 1
372+
trans_grp = np.array_split(trans_idx, one_diff)
373+
374+
# Iterate through each day, if there are no hours where the
375+
# delta is negative (indicates a repeat of hour) the switch
376+
# cannot be inferred
377+
for grp in trans_grp:
378+
379+
delta = np.diff(result_a[grp])
380+
if grp.size == 1 or np.all(delta > 0):
381+
stamp = _render_tstamp(vals[grp[0]])
382+
raise pytz.AmbiguousTimeError(stamp)
383+
384+
# Find the index for the switch and pull from a for dst and b
385+
# for standard
386+
switch_idx = (delta <= 0).nonzero()[0]
387+
if switch_idx.size > 1:
388+
raise pytz.AmbiguousTimeError(
389+
f"There are {switch_idx.size} dst switches when "
390+
"there should only be 1."
391+
)
392+
393+
switch_idx = switch_idx[0] + 1 # TODO: declare type for switch_idx
394+
# Pull the only index and adjust
395+
a_idx = grp[:switch_idx]
396+
b_idx = grp[switch_idx:]
397+
dst_hours[grp] = np.hstack((result_a[a_idx], result_b[b_idx]))
398+
399+
return dst_hours
400+
401+
378402
# ----------------------------------------------------------------------
379403
# Timezone Conversion
380404

0 commit comments

Comments
 (0)