Skip to content

standardize post-call treatment of get_dst_info, delay sorting calls #21960

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jul 20, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 39 additions & 30 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None,
ndarray[int64_t] trans, deltas
npy_datetimestruct dts
object dt
int64_t value
int64_t value, delta
ndarray[object] result = np.empty(n, dtype=object)
object (*func_create)(int64_t, npy_datetimestruct, object, object)

Expand All @@ -125,58 +125,67 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None,
raise ValueError("box must be one of 'datetime', 'date', 'time' or"
" 'timestamp'")

if tz is not None:
if is_utc(tz):
if is_utc(tz) or tz is None:
for i in range(n):
value = arr[i]
if value == NPY_NAT:
result[i] = NaT
else:
dt64_to_dtstruct(value, &dts)
result[i] = func_create(value, dts, tz, freq)
elif is_tzlocal(tz):
for i in range(n):
value = arr[i]
if value == NPY_NAT:
result[i] = NaT
else:
# Python datetime objects do not support nanosecond
# resolution (yet, PEP 564). Need to compute new value
# using the i8 representation.
local_value = tz_convert_utc_to_tzlocal(value, tz)
dt64_to_dtstruct(local_value, &dts)
result[i] = func_create(value, dts, tz, freq)
else:
trans, deltas, typ = get_dst_info(tz)

if typ not in ['pytz', 'dateutil']:
# static/fixed; in this case we know that len(delta) == 1
delta = deltas[0]
for i in range(n):
value = arr[i]
if value == NPY_NAT:
result[i] = NaT
else:
dt64_to_dtstruct(value, &dts)
# Adjust datetime64 timestamp, recompute datetimestruct
dt64_to_dtstruct(value + delta, &dts)
result[i] = func_create(value, dts, tz, freq)
elif is_tzlocal(tz) or is_fixed_offset(tz):

elif typ == 'dateutil':
# no zone-name change for dateutil tzs - dst etc
# represented in single object.
for i in range(n):
value = arr[i]
if value == NPY_NAT:
result[i] = NaT
else:
# Python datetime objects do not support nanosecond
# resolution (yet, PEP 564). Need to compute new value
# using the i8 representation.
local_value = tz_convert_utc_to_tzlocal(value, tz)
dt64_to_dtstruct(local_value, &dts)
# Adjust datetime64 timestamp, recompute datetimestruct
pos = trans.searchsorted(value, side='right') - 1
dt64_to_dtstruct(value + deltas[pos], &dts)
result[i] = func_create(value, dts, tz, freq)
else:
trans, deltas, typ = get_dst_info(tz)

# pytz
for i in range(n):

value = arr[i]
if value == NPY_NAT:
result[i] = NaT
else:

# Adjust datetime64 timestamp, recompute datetimestruct
pos = trans.searchsorted(value, side='right') - 1
if treat_tz_as_pytz(tz):
# find right representation of dst etc in pytz timezone
new_tz = tz._tzinfos[tz._transition_info[pos]]
else:
# no zone-name change for dateutil tzs - dst etc
# represented in single object.
new_tz = tz
# find right representation of dst etc in pytz timezone
new_tz = tz._tzinfos[tz._transition_info[pos]]

dt64_to_dtstruct(value + deltas[pos], &dts)
result[i] = func_create(value, dts, new_tz, freq)
else:
for i in range(n):

value = arr[i]
if value == NPY_NAT:
result[i] = NaT
else:
dt64_to_dtstruct(value, &dts)
result[i] = func_create(value, dts, None, freq)

return result

Expand Down
56 changes: 34 additions & 22 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,7 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz):
"""
cdef:
ndarray[int64_t] trans, deltas
int64_t delta, local_val
int64_t local_val
Py_ssize_t pos

assert obj.tzinfo is None
Expand All @@ -542,22 +542,23 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz):
# Adjust datetime64 timestamp, recompute datetimestruct
trans, deltas, typ = get_dst_info(tz)

pos = trans.searchsorted(obj.value, side='right') - 1

# static/pytz/dateutil specific code
if is_fixed_offset(tz):
# statictzinfo
assert len(deltas) == 1, len(deltas)
# static/fixed tzinfo; in this case we know len(deltas) == 1
# This can come back with `typ` of either "fixed" or None
dt64_to_dtstruct(obj.value + deltas[0], &obj.dts)
elif treat_tz_as_pytz(tz):
elif typ == 'pytz':
# i.e. treat_tz_as_pytz(tz)
pos = trans.searchsorted(obj.value, side='right') - 1
tz = tz._tzinfos[tz._transition_info[pos]]
dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts)
elif treat_tz_as_dateutil(tz):
elif typ == 'dateutil':
# i.e. treat_tz_as_dateutil(tz)
pos = trans.searchsorted(obj.value, side='right') - 1
dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts)
else:
# TODO: this case is never reached in the tests, but get_dst_info
# has a path that returns typ = None and empty deltas.
# --> Is this path possible?
# Note: as of 2018-07-17 all tzinfo objects that are _not_
# either pytz or dateutil have is_fixed_offset(tz) == True,
# so this branch will never be reached.
pass

obj.tzinfo = tz
Expand Down Expand Up @@ -1126,6 +1127,7 @@ cdef ndarray[int64_t] _normalize_local(ndarray[int64_t] stamps, object tz):
ndarray[int64_t] trans, deltas
Py_ssize_t[:] pos
npy_datetimestruct dts
int64_t delta

if is_utc(tz):
with nogil:
Expand All @@ -1147,17 +1149,17 @@ cdef ndarray[int64_t] _normalize_local(ndarray[int64_t] stamps, object tz):
# Adjust datetime64 timestamp, recompute datetimestruct
trans, deltas, typ = get_dst_info(tz)

pos = trans.searchsorted(stamps, side='right') - 1

# statictzinfo
if typ not in ['pytz', 'dateutil']:
# static/fixed; in this case we know that len(delta) == 1
delta = deltas[0]
for i in range(n):
if stamps[i] == NPY_NAT:
result[i] = NPY_NAT
continue
dt64_to_dtstruct(stamps[i] + deltas[0], &dts)
dt64_to_dtstruct(stamps[i] + delta, &dts)
result[i] = _normalized_stamp(&dts)
else:
pos = trans.searchsorted(stamps, side='right') - 1
for i in range(n):
if stamps[i] == NPY_NAT:
result[i] = NPY_NAT
Expand Down Expand Up @@ -1207,7 +1209,7 @@ def is_date_array_normalized(ndarray[int64_t] stamps, tz=None):
Py_ssize_t i, n = len(stamps)
ndarray[int64_t] trans, deltas
npy_datetimestruct dts
int64_t local_val
int64_t local_val, delta

if tz is None or is_utc(tz):
for i in range(n):
Expand All @@ -1223,12 +1225,22 @@ def is_date_array_normalized(ndarray[int64_t] stamps, tz=None):
else:
trans, deltas, typ = get_dst_info(tz)

for i in range(n):
# Adjust datetime64 timestamp, recompute datetimestruct
pos = trans.searchsorted(stamps[i]) - 1
if typ not in ['pytz', 'dateutil']:
# static/fixed; in this case we know that len(delta) == 1
delta = deltas[0]
for i in range(n):
# Adjust datetime64 timestamp, recompute datetimestruct
dt64_to_dtstruct(stamps[i] + delta, &dts)
if (dts.hour + dts.min + dts.sec + dts.us) > 0:
return False

dt64_to_dtstruct(stamps[i] + deltas[pos], &dts)
if (dts.hour + dts.min + dts.sec + dts.us) > 0:
return False
else:
for i in range(n):
# Adjust datetime64 timestamp, recompute datetimestruct
pos = trans.searchsorted(stamps[i]) - 1

dt64_to_dtstruct(stamps[i] + deltas[pos], &dts)
if (dts.hour + dts.min + dts.sec + dts.us) > 0:
return False

return True
21 changes: 11 additions & 10 deletions pandas/_libs/tslibs/period.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -938,13 +938,14 @@ cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps,
npy_datetimestruct dts
int64_t local_val

if is_utc(tz):
for i in range(n):
if stamps[i] == NPY_NAT:
result[i] = NPY_NAT
continue
dt64_to_dtstruct(stamps[i], &dts)
result[i] = get_period_ordinal(&dts, freq)
if is_utc(tz) or tz is None:
with nogil:
for i in range(n):
if stamps[i] == NPY_NAT:
result[i] = NPY_NAT
continue
dt64_to_dtstruct(stamps[i], &dts)
result[i] = get_period_ordinal(&dts, freq)

elif is_tzlocal(tz):
for i in range(n):
Expand All @@ -958,17 +959,17 @@ cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps,
# Adjust datetime64 timestamp, recompute datetimestruct
trans, deltas, typ = get_dst_info(tz)

pos = trans.searchsorted(stamps, side='right') - 1

# statictzinfo
if typ not in ['pytz', 'dateutil']:
# static/fixed; in this case we know that len(delta) == 1
for i in range(n):
if stamps[i] == NPY_NAT:
result[i] = NPY_NAT
continue
dt64_to_dtstruct(stamps[i] + deltas[0], &dts)
result[i] = get_period_ordinal(&dts, freq)
else:
pos = trans.searchsorted(stamps, side='right') - 1

for i in range(n):
if stamps[i] == NPY_NAT:
result[i] = NPY_NAT
Expand Down
25 changes: 8 additions & 17 deletions pandas/_libs/tslibs/resolution.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -58,28 +58,19 @@ cpdef resolution(ndarray[int64_t] stamps, tz=None):

if tz is not None:
tz = maybe_get_tz(tz)
return _reso_local(stamps, tz)
else:
for i in range(n):
if stamps[i] == NPY_NAT:
continue
dt64_to_dtstruct(stamps[i], &dts)
curr_reso = _reso_stamp(&dts)
if curr_reso < reso:
reso = curr_reso
return reso
return _reso_local(stamps, tz)


cdef _reso_local(ndarray[int64_t] stamps, object tz):
cdef:
Py_ssize_t n = len(stamps)
Py_ssize_t i, n = len(stamps)
int reso = RESO_DAY, curr_reso
ndarray[int64_t] trans, deltas
Py_ssize_t[:] pos
npy_datetimestruct dts
int64_t local_val
int64_t local_val, delta

if is_utc(tz):
if is_utc(tz) or tz is None:
for i in range(n):
if stamps[i] == NPY_NAT:
continue
Expand All @@ -100,18 +91,18 @@ cdef _reso_local(ndarray[int64_t] stamps, object tz):
# Adjust datetime64 timestamp, recompute datetimestruct
trans, deltas, typ = get_dst_info(tz)

pos = trans.searchsorted(stamps, side='right') - 1

# statictzinfo
if typ not in ['pytz', 'dateutil']:
# static/fixed; in this case we know that len(delta) == 1
delta = deltas[0]
for i in range(n):
if stamps[i] == NPY_NAT:
continue
dt64_to_dtstruct(stamps[i] + deltas[0], &dts)
dt64_to_dtstruct(stamps[i] + delta, &dts)
curr_reso = _reso_stamp(&dts)
if curr_reso < reso:
reso = curr_reso
else:
pos = trans.searchsorted(stamps, side='right') - 1
for i in range(n):
if stamps[i] == NPY_NAT:
continue
Expand Down
12 changes: 9 additions & 3 deletions pandas/_libs/tslibs/timezones.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -258,12 +258,18 @@ cdef object get_dst_info(object tz):
dtype='i8') * 1000000000
typ = 'fixed'
else:
trans = np.array([], dtype='M8[ns]')
deltas = np.array([], dtype='i8')
typ = None
# 2018-07-12 this is not reached in the tests, and this case
# is not handled in any of the functions that call
# get_dst_info. If this case _were_ hit the calling
# functions would then hit an IndexError because they assume
# `deltas` is non-empty.
# (under the just-deleted code that returned empty arrays)
raise AssertionError("dateutil tzinfo is not a FixedOffset "
"and has an empty `_trans_list`.", tz)

else:
# static tzinfo
# TODO: This case is not hit in tests (2018-07-17); is it possible?
trans = np.array([NPY_NAT + 1], dtype=np.int64)
num = int(get_utcoffset(tz, None).total_seconds()) * 1000000000
deltas = np.array([num], dtype=np.int64)
Expand Down