@@ -638,34 +638,40 @@ cdef inline int64_t[:] _tz_convert_dst(int64_t[:] values, tzinfo tz,
638
638
"""
639
639
cdef:
640
640
Py_ssize_t n = len (values)
641
- Py_ssize_t i, pos
641
+ Py_ssize_t i
642
+ int64_t[:] pos
642
643
int64_t[:] result = np.empty(n, dtype = np.int64)
643
644
ndarray[int64_t] trans
644
645
int64_t[:] deltas
645
646
int64_t v
647
+ bint tz_is_local
646
648
647
- if not is_tzlocal(tz):
649
+ tz_is_local = is_tzlocal(tz)
650
+
651
+ if not tz_is_local:
648
652
# get_dst_info cannot extract offsets from tzlocal because its
649
653
# dependent on a datetime
650
654
trans, deltas, _ = get_dst_info(tz)
651
655
if not to_utc:
652
656
# We add `offset` below instead of subtracting it
653
657
deltas = - 1 * np.array(deltas, dtype = ' i8' )
654
658
659
+ # Previously, this search was done pointwise to try and benefit
660
+ # from getting to skip searches for iNaTs. However, it seems call
661
+ # overhead dominates the search time so doing it once in bulk
662
+ # is substantially faster (GH#24603)
663
+ pos = trans.searchsorted(values, side = ' right' ) - 1
664
+
655
665
for i in range (n):
656
666
v = values[i]
657
667
if v == NPY_NAT:
658
668
result[i] = v
659
- elif is_tzlocal(tz) :
669
+ elif tz_is_local :
660
670
result[i] = _tz_convert_tzlocal_utc(v, tz, to_utc = to_utc)
661
671
else :
662
- # TODO: Is it more efficient to call searchsorted pointwise or
663
- # on `values` outside the loop? We are not consistent about this.
664
- # relative effiency of pointwise increases with number of iNaTs
665
- pos = trans.searchsorted(v, side = ' right' ) - 1
666
- if pos < 0 :
672
+ if pos[i] < 0 :
667
673
raise ValueError (' First time before start of DST info' )
668
- result[i] = v - deltas[pos]
674
+ result[i] = v - deltas[pos[i] ]
669
675
670
676
return result
671
677
@@ -1282,9 +1288,9 @@ def is_date_array_normalized(int64_t[:] stamps, object tz=None):
1282
1288
is_normalized : bool True if all stamps are normalized
1283
1289
"""
1284
1290
cdef:
1285
- Py_ssize_t pos, i, n = len (stamps)
1291
+ Py_ssize_t i, n = len (stamps)
1286
1292
ndarray[int64_t] trans
1287
- int64_t[:] deltas
1293
+ int64_t[:] deltas, pos
1288
1294
npy_datetimestruct dts
1289
1295
int64_t local_val, delta
1290
1296
str typ
@@ -1313,11 +1319,10 @@ def is_date_array_normalized(int64_t[:] stamps, object tz=None):
1313
1319
return False
1314
1320
1315
1321
else :
1322
+ pos = trans.searchsorted(stamps) - 1
1316
1323
for i in range (n):
1317
1324
# Adjust datetime64 timestamp, recompute datetimestruct
1318
- pos = trans.searchsorted(stamps[i]) - 1
1319
-
1320
- dt64_to_dtstruct(stamps[i] + deltas[pos], & dts)
1325
+ dt64_to_dtstruct(stamps[i] + deltas[pos[i]], & dts)
1321
1326
if (dts.hour + dts.min + dts.sec + dts.us) > 0 :
1322
1327
return False
1323
1328
0 commit comments