Skip to content

Commit 652a3de

Browse files
jbrockmendeljreback
authored andcommitted
standardize post-call treatment of get_dst_info, delay sorting calls (#21960)
1 parent 27ebb3e commit 652a3de

File tree

5 files changed

+101
-82
lines changed

5 files changed

+101
-82
lines changed

pandas/_libs/tslib.pyx

+39-30
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None,
104104
ndarray[int64_t] trans, deltas
105105
npy_datetimestruct dts
106106
object dt
107-
int64_t value
107+
int64_t value, delta
108108
ndarray[object] result = np.empty(n, dtype=object)
109109
object (*func_create)(int64_t, npy_datetimestruct, object, object)
110110

@@ -125,58 +125,67 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None,
125125
raise ValueError("box must be one of 'datetime', 'date', 'time' or"
126126
" 'timestamp'")
127127

128-
if tz is not None:
129-
if is_utc(tz):
128+
if is_utc(tz) or tz is None:
129+
for i in range(n):
130+
value = arr[i]
131+
if value == NPY_NAT:
132+
result[i] = NaT
133+
else:
134+
dt64_to_dtstruct(value, &dts)
135+
result[i] = func_create(value, dts, tz, freq)
136+
elif is_tzlocal(tz):
137+
for i in range(n):
138+
value = arr[i]
139+
if value == NPY_NAT:
140+
result[i] = NaT
141+
else:
142+
# Python datetime objects do not support nanosecond
143+
# resolution (yet, PEP 564). Need to compute new value
144+
# using the i8 representation.
145+
local_value = tz_convert_utc_to_tzlocal(value, tz)
146+
dt64_to_dtstruct(local_value, &dts)
147+
result[i] = func_create(value, dts, tz, freq)
148+
else:
149+
trans, deltas, typ = get_dst_info(tz)
150+
151+
if typ not in ['pytz', 'dateutil']:
152+
# static/fixed; in this case we know that len(delta) == 1
153+
delta = deltas[0]
130154
for i in range(n):
131155
value = arr[i]
132156
if value == NPY_NAT:
133157
result[i] = NaT
134158
else:
135-
dt64_to_dtstruct(value, &dts)
159+
# Adjust datetime64 timestamp, recompute datetimestruct
160+
dt64_to_dtstruct(value + delta, &dts)
136161
result[i] = func_create(value, dts, tz, freq)
137-
elif is_tzlocal(tz) or is_fixed_offset(tz):
162+
163+
elif typ == 'dateutil':
164+
# no zone-name change for dateutil tzs - dst etc
165+
# represented in single object.
138166
for i in range(n):
139167
value = arr[i]
140168
if value == NPY_NAT:
141169
result[i] = NaT
142170
else:
143-
# Python datetime objects do not support nanosecond
144-
# resolution (yet, PEP 564). Need to compute new value
145-
# using the i8 representation.
146-
local_value = tz_convert_utc_to_tzlocal(value, tz)
147-
dt64_to_dtstruct(local_value, &dts)
171+
# Adjust datetime64 timestamp, recompute datetimestruct
172+
pos = trans.searchsorted(value, side='right') - 1
173+
dt64_to_dtstruct(value + deltas[pos], &dts)
148174
result[i] = func_create(value, dts, tz, freq)
149175
else:
150-
trans, deltas, typ = get_dst_info(tz)
151-
176+
# pytz
152177
for i in range(n):
153-
154178
value = arr[i]
155179
if value == NPY_NAT:
156180
result[i] = NaT
157181
else:
158-
159182
# Adjust datetime64 timestamp, recompute datetimestruct
160183
pos = trans.searchsorted(value, side='right') - 1
161-
if treat_tz_as_pytz(tz):
162-
# find right representation of dst etc in pytz timezone
163-
new_tz = tz._tzinfos[tz._transition_info[pos]]
164-
else:
165-
# no zone-name change for dateutil tzs - dst etc
166-
# represented in single object.
167-
new_tz = tz
184+
# find right representation of dst etc in pytz timezone
185+
new_tz = tz._tzinfos[tz._transition_info[pos]]
168186

169187
dt64_to_dtstruct(value + deltas[pos], &dts)
170188
result[i] = func_create(value, dts, new_tz, freq)
171-
else:
172-
for i in range(n):
173-
174-
value = arr[i]
175-
if value == NPY_NAT:
176-
result[i] = NaT
177-
else:
178-
dt64_to_dtstruct(value, &dts)
179-
result[i] = func_create(value, dts, None, freq)
180189

181190
return result
182191

pandas/_libs/tslibs/conversion.pyx

+34-22
Original file line numberDiff line numberDiff line change
@@ -526,7 +526,7 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz):
526526
"""
527527
cdef:
528528
ndarray[int64_t] trans, deltas
529-
int64_t delta, local_val
529+
int64_t local_val
530530
Py_ssize_t pos
531531

532532
assert obj.tzinfo is None
@@ -542,22 +542,23 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz):
542542
# Adjust datetime64 timestamp, recompute datetimestruct
543543
trans, deltas, typ = get_dst_info(tz)
544544

545-
pos = trans.searchsorted(obj.value, side='right') - 1
546-
547-
# static/pytz/dateutil specific code
548545
if is_fixed_offset(tz):
549-
# statictzinfo
550-
assert len(deltas) == 1, len(deltas)
546+
# static/fixed tzinfo; in this case we know len(deltas) == 1
547+
# This can come back with `typ` of either "fixed" or None
551548
dt64_to_dtstruct(obj.value + deltas[0], &obj.dts)
552-
elif treat_tz_as_pytz(tz):
549+
elif typ == 'pytz':
550+
# i.e. treat_tz_as_pytz(tz)
551+
pos = trans.searchsorted(obj.value, side='right') - 1
553552
tz = tz._tzinfos[tz._transition_info[pos]]
554553
dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts)
555-
elif treat_tz_as_dateutil(tz):
554+
elif typ == 'dateutil':
555+
# i.e. treat_tz_as_dateutil(tz)
556+
pos = trans.searchsorted(obj.value, side='right') - 1
556557
dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts)
557558
else:
558-
# TODO: this case is never reached in the tests, but get_dst_info
559-
# has a path that returns typ = None and empty deltas.
560-
# --> Is this path possible?
559+
# Note: as of 2018-07-17 all tzinfo objects that are _not_
560+
# either pytz or dateutil have is_fixed_offset(tz) == True,
561+
# so this branch will never be reached.
561562
pass
562563

563564
obj.tzinfo = tz
@@ -1126,6 +1127,7 @@ cdef ndarray[int64_t] _normalize_local(ndarray[int64_t] stamps, object tz):
11261127
ndarray[int64_t] trans, deltas
11271128
Py_ssize_t[:] pos
11281129
npy_datetimestruct dts
1130+
int64_t delta
11291131

11301132
if is_utc(tz):
11311133
with nogil:
@@ -1147,17 +1149,17 @@ cdef ndarray[int64_t] _normalize_local(ndarray[int64_t] stamps, object tz):
11471149
# Adjust datetime64 timestamp, recompute datetimestruct
11481150
trans, deltas, typ = get_dst_info(tz)
11491151

1150-
pos = trans.searchsorted(stamps, side='right') - 1
1151-
1152-
# statictzinfo
11531152
if typ not in ['pytz', 'dateutil']:
1153+
# static/fixed; in this case we know that len(delta) == 1
1154+
delta = deltas[0]
11541155
for i in range(n):
11551156
if stamps[i] == NPY_NAT:
11561157
result[i] = NPY_NAT
11571158
continue
1158-
dt64_to_dtstruct(stamps[i] + deltas[0], &dts)
1159+
dt64_to_dtstruct(stamps[i] + delta, &dts)
11591160
result[i] = _normalized_stamp(&dts)
11601161
else:
1162+
pos = trans.searchsorted(stamps, side='right') - 1
11611163
for i in range(n):
11621164
if stamps[i] == NPY_NAT:
11631165
result[i] = NPY_NAT
@@ -1207,7 +1209,7 @@ def is_date_array_normalized(ndarray[int64_t] stamps, tz=None):
12071209
Py_ssize_t i, n = len(stamps)
12081210
ndarray[int64_t] trans, deltas
12091211
npy_datetimestruct dts
1210-
int64_t local_val
1212+
int64_t local_val, delta
12111213

12121214
if tz is None or is_utc(tz):
12131215
for i in range(n):
@@ -1223,12 +1225,22 @@ def is_date_array_normalized(ndarray[int64_t] stamps, tz=None):
12231225
else:
12241226
trans, deltas, typ = get_dst_info(tz)
12251227

1226-
for i in range(n):
1227-
# Adjust datetime64 timestamp, recompute datetimestruct
1228-
pos = trans.searchsorted(stamps[i]) - 1
1228+
if typ not in ['pytz', 'dateutil']:
1229+
# static/fixed; in this case we know that len(delta) == 1
1230+
delta = deltas[0]
1231+
for i in range(n):
1232+
# Adjust datetime64 timestamp, recompute datetimestruct
1233+
dt64_to_dtstruct(stamps[i] + delta, &dts)
1234+
if (dts.hour + dts.min + dts.sec + dts.us) > 0:
1235+
return False
12291236

1230-
dt64_to_dtstruct(stamps[i] + deltas[pos], &dts)
1231-
if (dts.hour + dts.min + dts.sec + dts.us) > 0:
1232-
return False
1237+
else:
1238+
for i in range(n):
1239+
# Adjust datetime64 timestamp, recompute datetimestruct
1240+
pos = trans.searchsorted(stamps[i]) - 1
1241+
1242+
dt64_to_dtstruct(stamps[i] + deltas[pos], &dts)
1243+
if (dts.hour + dts.min + dts.sec + dts.us) > 0:
1244+
return False
12331245

12341246
return True

pandas/_libs/tslibs/period.pyx

+11-10
Original file line numberDiff line numberDiff line change
@@ -938,13 +938,14 @@ cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps,
938938
npy_datetimestruct dts
939939
int64_t local_val
940940

941-
if is_utc(tz):
942-
for i in range(n):
943-
if stamps[i] == NPY_NAT:
944-
result[i] = NPY_NAT
945-
continue
946-
dt64_to_dtstruct(stamps[i], &dts)
947-
result[i] = get_period_ordinal(&dts, freq)
941+
if is_utc(tz) or tz is None:
942+
with nogil:
943+
for i in range(n):
944+
if stamps[i] == NPY_NAT:
945+
result[i] = NPY_NAT
946+
continue
947+
dt64_to_dtstruct(stamps[i], &dts)
948+
result[i] = get_period_ordinal(&dts, freq)
948949

949950
elif is_tzlocal(tz):
950951
for i in range(n):
@@ -958,17 +959,17 @@ cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps,
958959
# Adjust datetime64 timestamp, recompute datetimestruct
959960
trans, deltas, typ = get_dst_info(tz)
960961

961-
pos = trans.searchsorted(stamps, side='right') - 1
962-
963-
# statictzinfo
964962
if typ not in ['pytz', 'dateutil']:
963+
# static/fixed; in this case we know that len(delta) == 1
965964
for i in range(n):
966965
if stamps[i] == NPY_NAT:
967966
result[i] = NPY_NAT
968967
continue
969968
dt64_to_dtstruct(stamps[i] + deltas[0], &dts)
970969
result[i] = get_period_ordinal(&dts, freq)
971970
else:
971+
pos = trans.searchsorted(stamps, side='right') - 1
972+
972973
for i in range(n):
973974
if stamps[i] == NPY_NAT:
974975
result[i] = NPY_NAT

pandas/_libs/tslibs/resolution.pyx

+8-17
Original file line numberDiff line numberDiff line change
@@ -58,28 +58,19 @@ cpdef resolution(ndarray[int64_t] stamps, tz=None):
5858

5959
if tz is not None:
6060
tz = maybe_get_tz(tz)
61-
return _reso_local(stamps, tz)
62-
else:
63-
for i in range(n):
64-
if stamps[i] == NPY_NAT:
65-
continue
66-
dt64_to_dtstruct(stamps[i], &dts)
67-
curr_reso = _reso_stamp(&dts)
68-
if curr_reso < reso:
69-
reso = curr_reso
70-
return reso
61+
return _reso_local(stamps, tz)
7162

7263

7364
cdef _reso_local(ndarray[int64_t] stamps, object tz):
7465
cdef:
75-
Py_ssize_t n = len(stamps)
66+
Py_ssize_t i, n = len(stamps)
7667
int reso = RESO_DAY, curr_reso
7768
ndarray[int64_t] trans, deltas
7869
Py_ssize_t[:] pos
7970
npy_datetimestruct dts
80-
int64_t local_val
71+
int64_t local_val, delta
8172

82-
if is_utc(tz):
73+
if is_utc(tz) or tz is None:
8374
for i in range(n):
8475
if stamps[i] == NPY_NAT:
8576
continue
@@ -100,18 +91,18 @@ cdef _reso_local(ndarray[int64_t] stamps, object tz):
10091
# Adjust datetime64 timestamp, recompute datetimestruct
10192
trans, deltas, typ = get_dst_info(tz)
10293

103-
pos = trans.searchsorted(stamps, side='right') - 1
104-
105-
# statictzinfo
10694
if typ not in ['pytz', 'dateutil']:
95+
# static/fixed; in this case we know that len(delta) == 1
96+
delta = deltas[0]
10797
for i in range(n):
10898
if stamps[i] == NPY_NAT:
10999
continue
110-
dt64_to_dtstruct(stamps[i] + deltas[0], &dts)
100+
dt64_to_dtstruct(stamps[i] + delta, &dts)
111101
curr_reso = _reso_stamp(&dts)
112102
if curr_reso < reso:
113103
reso = curr_reso
114104
else:
105+
pos = trans.searchsorted(stamps, side='right') - 1
115106
for i in range(n):
116107
if stamps[i] == NPY_NAT:
117108
continue

pandas/_libs/tslibs/timezones.pyx

+9-3
Original file line numberDiff line numberDiff line change
@@ -258,12 +258,18 @@ cdef object get_dst_info(object tz):
258258
dtype='i8') * 1000000000
259259
typ = 'fixed'
260260
else:
261-
trans = np.array([], dtype='M8[ns]')
262-
deltas = np.array([], dtype='i8')
263-
typ = None
261+
# 2018-07-12 this is not reached in the tests, and this case
262+
# is not handled in any of the functions that call
263+
# get_dst_info. If this case _were_ hit the calling
264+
# functions would then hit an IndexError because they assume
265+
# `deltas` is non-empty.
266+
# (under the just-deleted code that returned empty arrays)
267+
raise AssertionError("dateutil tzinfo is not a FixedOffset "
268+
"and has an empty `_trans_list`.", tz)
264269

265270
else:
266271
# static tzinfo
272+
# TODO: This case is not hit in tests (2018-07-17); is it possible?
267273
trans = np.array([NPY_NAT + 1], dtype=np.int64)
268274
num = int(get_utcoffset(tz, None).total_seconds()) * 1000000000
269275
deltas = np.array([num], dtype=np.int64)

0 commit comments

Comments
 (0)