Skip to content

Commit 360415c

Browse files
committed
PERF: speed up tz-aware operations by making searchsorted call in bulk,
rather than piecewise
1 parent c1a9049 commit 360415c

File tree

2 files changed

+24
-15
lines changed

2 files changed

+24
-15
lines changed

asv_bench/benchmarks/timeseries.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
class DatetimeIndex(object):
1414

15-
params = ['dst', 'repeated', 'tz_aware', 'tz_naive']
15+
params = ['dst', 'repeated', 'tz_aware', 'tz_local', 'tz_naive']
1616
param_names = ['index_type']
1717

1818
def setup(self, index_type):
@@ -26,6 +26,10 @@ def setup(self, index_type):
2626
periods=N,
2727
freq='s',
2828
tz='US/Eastern'),
29+
'tz_local': date_range(start='2000',
30+
periods=N,
31+
freq='s',
32+
tz=dateutil.tz.tzlocal()),
2933
'tz_naive': date_range(start='2000',
3034
periods=N,
3135
freq='s')}

pandas/_libs/tslibs/conversion.pyx

+19-14
Original file line numberDiff line numberDiff line change
@@ -638,34 +638,40 @@ cdef inline int64_t[:] _tz_convert_dst(int64_t[:] values, tzinfo tz,
638638
"""
639639
cdef:
640640
Py_ssize_t n = len(values)
641-
Py_ssize_t i, pos
641+
Py_ssize_t i
642+
int64_t[:] pos
642643
int64_t[:] result = np.empty(n, dtype=np.int64)
643644
ndarray[int64_t] trans
644645
int64_t[:] deltas
645646
int64_t v
647+
bint tz_is_local
646648

647-
if not is_tzlocal(tz):
649+
tz_is_local = is_tzlocal(tz)
650+
651+
if not tz_is_local:
648652
# get_dst_info cannot extract offsets from tzlocal because its
649653
# dependent on a datetime
650654
trans, deltas, _ = get_dst_info(tz)
651655
if not to_utc:
652656
# We add `offset` below instead of subtracting it
653657
deltas = -1 * np.array(deltas, dtype='i8')
654658

659+
# Previously, this search was done pointwise to try and benefit
660+
# from getting to skip searches for iNaTs. However, it seems call
661+
# overhead dominates the search time so doing it once in bulk
662+
# is substantially faster (GH#24603)
663+
pos = trans.searchsorted(values, side='right') - 1
664+
655665
for i in range(n):
656666
v = values[i]
657667
if v == NPY_NAT:
658668
result[i] = v
659-
elif is_tzlocal(tz):
669+
elif tz_is_local:
660670
result[i] = _tz_convert_tzlocal_utc(v, tz, to_utc=to_utc)
661671
else:
662-
# TODO: Is it more efficient to call searchsorted pointwise or
663-
# on `values` outside the loop? We are not consistent about this.
664-
# relative effiency of pointwise increases with number of iNaTs
665-
pos = trans.searchsorted(v, side='right') - 1
666-
if pos < 0:
672+
if pos[i] < 0:
667673
raise ValueError('First time before start of DST info')
668-
result[i] = v - deltas[pos]
674+
result[i] = v - deltas[pos[i]]
669675

670676
return result
671677

@@ -1282,9 +1288,9 @@ def is_date_array_normalized(int64_t[:] stamps, object tz=None):
12821288
is_normalized : bool True if all stamps are normalized
12831289
"""
12841290
cdef:
1285-
Py_ssize_t pos, i, n = len(stamps)
1291+
Py_ssize_t i, n = len(stamps)
12861292
ndarray[int64_t] trans
1287-
int64_t[:] deltas
1293+
int64_t[:] deltas, pos
12881294
npy_datetimestruct dts
12891295
int64_t local_val, delta
12901296
str typ
@@ -1313,11 +1319,10 @@ def is_date_array_normalized(int64_t[:] stamps, object tz=None):
13131319
return False
13141320

13151321
else:
1322+
pos = trans.searchsorted(stamps) - 1
13161323
for i in range(n):
13171324
# Adjust datetime64 timestamp, recompute datetimestruct
1318-
pos = trans.searchsorted(stamps[i]) - 1
1319-
1320-
dt64_to_dtstruct(stamps[i] + deltas[pos], &dts)
1325+
dt64_to_dtstruct(stamps[i] + deltas[pos[i]], &dts)
13211326
if (dts.hour + dts.min + dts.sec + dts.us) > 0:
13221327
return False
13231328

0 commit comments

Comments
 (0)