Skip to content

REF: use standard patterns in tslibs #46259

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pandas/_libs/tslibs/conversion.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ from pandas._libs.tslibs.np_datetime cimport npy_datetimestruct


cdef class _TSObject:
cdef:
cdef readonly:
npy_datetimestruct dts # npy_datetimestruct
int64_t value # numpy dt64
object tzinfo
tzinfo tzinfo
bint fold


Expand Down
40 changes: 18 additions & 22 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -370,18 +370,13 @@ cdef class _TSObject:
# cdef:
# npy_datetimestruct dts # npy_datetimestruct
# int64_t value # numpy dt64
# object tzinfo
# tzinfo tzinfo
# bint fold

def __cinit__(self):
# GH 25057. As per PEP 495, set fold to 0 by default
self.fold = 0

@property
def value(self):
# This is needed in order for `value` to be accessible in lib.pyx
return self.value


cdef convert_to_tsobject(object ts, tzinfo tz, str unit,
bint dayfirst, bint yearfirst, int32_t nanos=0):
Expand Down Expand Up @@ -541,7 +536,7 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts,
int64_t value # numpy dt64
datetime dt
ndarray[int64_t] trans
int64_t[:] deltas
int64_t[::1] deltas

value = dtstruct_to_dt64(&dts)
obj.dts = dts
Expand Down Expand Up @@ -711,7 +706,7 @@ cdef inline void _localize_tso(_TSObject obj, tzinfo tz):
"""
cdef:
ndarray[int64_t] trans
int64_t[:] deltas
int64_t[::1] deltas
int64_t local_val
Py_ssize_t pos
str typ
Expand All @@ -729,26 +724,27 @@ cdef inline void _localize_tso(_TSObject obj, tzinfo tz):
# Adjust datetime64 timestamp, recompute datetimestruct
trans, deltas, typ = get_dst_info(tz)

if is_fixed_offset(tz):
# static/fixed tzinfo; in this case we know len(deltas) == 1
# This can come back with `typ` of either "fixed" or None
dt64_to_dtstruct(obj.value + deltas[0], &obj.dts)
elif typ == 'pytz':
if typ == "pytz":
# i.e. treat_tz_as_pytz(tz)
pos = trans.searchsorted(obj.value, side='right') - 1
pos = trans.searchsorted(obj.value, side="right") - 1
local_val = obj.value + deltas[pos]

# find right representation of dst etc in pytz timezone
tz = tz._tzinfos[tz._transition_info[pos]]
dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts)
elif typ == 'dateutil':
elif typ == "dateutil":
# i.e. treat_tz_as_dateutil(tz)
pos = trans.searchsorted(obj.value, side='right') - 1
dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts)
pos = trans.searchsorted(obj.value, side="right") - 1
local_val = obj.value + deltas[pos]

# dateutil supports fold, so we infer fold from value
obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos)
else:
# Note: as of 2018-07-17 all tzinfo objects that are _not_
# either pytz or dateutil have is_fixed_offset(tz) == True,
# so this branch will never be reached.
pass
# All other cases have len(deltas) == 1. As of 2018-07-17
# (and 2022-03-07), all test cases that get here have
# is_fixed_offset(tz).
local_val = obj.value + deltas[0]

dt64_to_dtstruct(local_val, &obj.dts)

obj.tzinfo = tz

Expand Down
74 changes: 36 additions & 38 deletions pandas/_libs/tslibs/tzconversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ cdef int64_t tz_localize_to_utc_single(
"""See tz_localize_to_utc.__doc__"""
cdef:
int64_t delta
int64_t[:] deltas
int64_t[::1] deltas

if val == NPY_NAT:
return val
Expand Down Expand Up @@ -115,9 +115,10 @@ timedelta-like}
localized : ndarray[int64_t]
"""
cdef:
int64_t[:] deltas, idx_shifted, idx_shifted_left, idx_shifted_right
int64_t[::1] deltas
int64_t[:] idx_shifted, idx_shifted_left, idx_shifted_right
ndarray[uint8_t, cast=True] ambiguous_array, both_nat, both_eq
Py_ssize_t i, idx, pos, ntrans, n = len(vals)
Py_ssize_t i, idx, pos, ntrans, n = vals.shape[0]
Py_ssize_t delta_idx_offset, delta_idx, pos_left, pos_right
int64_t *tdata
int64_t v, left, right, val, v_left, v_right, new_local, remaining_mins
Expand Down Expand Up @@ -184,7 +185,7 @@ timedelta-like}
trans, deltas, _ = get_dst_info(tz)

tdata = <int64_t*>cnp.PyArray_DATA(trans)
ntrans = len(trans)
ntrans = trans.shape[0]

# Determine whether each date lies left of the DST transition (store in
# result_a) or right of the DST transition (store in result_b)
Expand Down Expand Up @@ -400,7 +401,7 @@ cpdef int64_t tz_convert_from_utc_single(int64_t val, tzinfo tz):
"""
cdef:
int64_t delta
int64_t[:] deltas
int64_t[::1] deltas
ndarray[int64_t, ndim=1] trans
intp_t pos

Expand Down Expand Up @@ -437,11 +438,11 @@ def tz_convert_from_utc(const int64_t[:] vals, tzinfo tz):
cdef:
const int64_t[:] converted

if len(vals) == 0:
if vals.shape[0] == 0:
return np.array([], dtype=np.int64)

converted = _tz_convert_from_utc(vals, tz)
return np.array(converted, dtype=np.int64)
return np.asarray(converted, dtype=np.int64)


@cython.boundscheck(False)
Expand All @@ -460,53 +461,48 @@ cdef const int64_t[:] _tz_convert_from_utc(const int64_t[:] vals, tzinfo tz):
converted : ndarray[int64_t]
"""
cdef:
int64_t[:] converted, deltas
Py_ssize_t i, n = len(vals)
int64_t val, delta
int64_t[::1] converted, deltas
Py_ssize_t i, n = vals.shape[0]
int64_t val, delta = 0 # avoid not-initialized-warning
intp_t[:] pos
ndarray[int64_t] trans
str typ
bint use_tzlocal = False, use_fixed = False, use_utc = True

if is_utc(tz):
return vals
# Much faster than going through the "standard" pattern below
return vals.copy()

if is_utc(tz) or tz is None:
use_utc = True
elif is_tzlocal(tz):
converted = np.empty(n, dtype=np.int64)
for i in range(n):
val = vals[i]
if val == NPY_NAT:
converted[i] = NPY_NAT
else:
converted[i] = _tz_convert_tzlocal_utc(val, tz, to_utc=False)
use_tzlocal = True
else:
converted = np.empty(n, dtype=np.int64)

trans, deltas, typ = get_dst_info(tz)

if typ not in ["pytz", "dateutil"]:
# FixedOffset, we know len(deltas) == 1
delta = deltas[0]

for i in range(n):
val = vals[i]
if val == NPY_NAT:
converted[i] = val
else:
converted[i] = val + delta

use_fixed = True
else:
pos = trans.searchsorted(vals, side="right") - 1

for i in range(n):
val = vals[i]
if val == NPY_NAT:
converted[i] = val
else:
if pos[i] < 0:
# TODO: How is this reached? Should we be checking for
# it elsewhere?
raise ValueError("First time before start of DST info")
converted = np.empty(n, dtype=np.int64)

converted[i] = val + deltas[pos[i]]
for i in range(n):
val = vals[i]
if val == NPY_NAT:
converted[i] = NPY_NAT
continue

# The pattern used in vectorized.pyx checks for use_utc here,
# but we handle that case above.
if use_tzlocal:
converted[i] = _tz_convert_tzlocal_utc(val, tz, to_utc=False)
elif use_fixed:
converted[i] = val + delta
else:
converted[i] = val + deltas[pos[i]]

return converted

Expand Down Expand Up @@ -547,8 +543,10 @@ cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True,
timedelta td

dt64_to_dtstruct(val, &dts)

dt = datetime(dts.year, dts.month, dts.day, dts.hour,
dts.min, dts.sec, dts.us)

# tz.utcoffset only makes sense if datetime
# is _wall time_, so if val is a UTC timestamp convert to wall time
if not to_utc:
Expand Down
45 changes: 21 additions & 24 deletions pandas/_libs/tslibs/vectorized.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def ints_to_pydatetime(
cdef:
Py_ssize_t i, n = len(stamps)
ndarray[int64_t] trans
int64_t[:] deltas
int64_t[::1] deltas
intp_t[:] pos
npy_datetimestruct dts
object dt, new_tz
Expand Down Expand Up @@ -167,26 +167,23 @@ def ints_to_pydatetime(

if value == NPY_NAT:
result[i] = <object>NaT
continue

if use_utc:
local_val = value
elif use_tzlocal:
local_val = tz_convert_utc_to_tzlocal(value, tz)
elif use_fixed:
local_val = value + delta
else:
if use_utc:
local_val = value
elif use_tzlocal:
local_val = tz_convert_utc_to_tzlocal(value, tz)
elif use_fixed:
local_val = value + delta
elif not use_pytz:
# i.e. dateutil
# no zone-name change for dateutil tzs - dst etc
# represented in single object.
local_val = value + deltas[pos[i]]
else:
# pytz
# find right representation of dst etc in pytz timezone
new_tz = tz._tzinfos[tz._transition_info[pos[i]]]
local_val = value + deltas[pos[i]]

dt64_to_dtstruct(local_val, &dts)
result[i] = func_create(value, dts, new_tz, freq, fold)
local_val = value + deltas[pos[i]]

if use_pytz:
# find right representation of dst etc in pytz timezone
new_tz = tz._tzinfos[tz._transition_info[pos[i]]]

dt64_to_dtstruct(local_val, &dts)
result[i] = func_create(value, dts, new_tz, freq, fold)

return result

Expand Down Expand Up @@ -226,7 +223,7 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None) -> Resolution:
npy_datetimestruct dts
int reso = RESO_DAY, curr_reso
ndarray[int64_t] trans
int64_t[:] deltas
int64_t[::1] deltas
intp_t[:] pos
int64_t local_val, delta = NPY_NAT
bint use_utc = False, use_tzlocal = False, use_fixed = False
Expand Down Expand Up @@ -288,7 +285,7 @@ cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo t
Py_ssize_t i, n = len(stamps)
int64_t[:] result = np.empty(n, dtype=np.int64)
ndarray[int64_t] trans
int64_t[:] deltas
int64_t[::1] deltas
str typ
Py_ssize_t[:] pos
int64_t local_val, delta = NPY_NAT
Expand Down Expand Up @@ -346,7 +343,7 @@ def is_date_array_normalized(const int64_t[:] stamps, tzinfo tz=None) -> bool:
cdef:
Py_ssize_t i, n = len(stamps)
ndarray[int64_t] trans
int64_t[:] deltas
int64_t[::1] deltas
intp_t[:] pos
int64_t local_val, delta = NPY_NAT
str typ
Expand Down Expand Up @@ -392,7 +389,7 @@ def dt64arr_to_periodarr(const int64_t[:] stamps, int freq, tzinfo tz):
Py_ssize_t i, n = len(stamps)
int64_t[:] result = np.empty(n, dtype=np.int64)
ndarray[int64_t] trans
int64_t[:] deltas
int64_t[::1] deltas
Py_ssize_t[:] pos
npy_datetimestruct dts
int64_t local_val, delta = NPY_NAT
Expand Down