Skip to content

REF: de-duplicate DST tzconversion code #35077

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 46 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
4b8c4fb
REF: implement TZConvertInfo
jbrockmendel Jun 30, 2020
d8bffdd
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Jun 30, 2020
fc1ad75
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Jun 30, 2020
998341e
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Jul 1, 2020
8114413
setup_cache->setup
jbrockmendel Jul 1, 2020
907f9c4
revert
jbrockmendel Jul 1, 2020
addf931
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Jul 1, 2020
b2154d4
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Jul 1, 2020
9bcccc0
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Jul 1, 2020
bb7e60d
ensure initialized
jbrockmendel Jul 1, 2020
407f266
ensure initialized
jbrockmendel Jul 1, 2020
5adca21
debuggina ssertions
jbrockmendel Jul 1, 2020
69bfb80
debuggina ssertions
jbrockmendel Jul 1, 2020
13244c8
debuggina ssertions
jbrockmendel Jul 1, 2020
c0f8b34
debuggina ssertions
jbrockmendel Jul 1, 2020
8620905
debuggina ssertions
jbrockmendel Jul 1, 2020
3605695
debuggina ssertions
jbrockmendel Jul 1, 2020
a0eb787
debuggina ssertions
jbrockmendel Jul 1, 2020
c8fcc19
debuggina ssertions
jbrockmendel Jul 1, 2020
db71af5
debuggina ssertions
jbrockmendel Jul 1, 2020
e47e490
debuggina ssertions
jbrockmendel Jul 1, 2020
7f8c717
debuggina ssertions
jbrockmendel Jul 1, 2020
e21cd6a
debuggina ssertions
jbrockmendel Jul 1, 2020
9a47096
debuggina ssertions
jbrockmendel Jul 1, 2020
d6dce1a
debuggina ssertions
jbrockmendel Jul 1, 2020
0dbd8ac
debuggina ssertions
jbrockmendel Jul 1, 2020
f9514b4
debuggina ssertions
jbrockmendel Jul 2, 2020
e198dbd
debuggina ssertions
jbrockmendel Jul 2, 2020
681f5b8
debuggina ssertions
jbrockmendel Jul 2, 2020
e93b961
debuggina ssertions
jbrockmendel Jul 2, 2020
52af5e1
debuggina ssertions
jbrockmendel Jul 2, 2020
26d0d3a
debuggina ssertions
jbrockmendel Jul 2, 2020
3b220fb
debuggina ssertions
jbrockmendel Jul 2, 2020
3c1bf60
debuggina ssertions
jbrockmendel Jul 2, 2020
a70ce3f
debuggina ssertions
jbrockmendel Jul 2, 2020
2478ec3
debuggina ssertions
jbrockmendel Jul 2, 2020
21cbfc2
debuggina ssertions
jbrockmendel Jul 2, 2020
2aa256a
debuggina ssertions
jbrockmendel Jul 2, 2020
7669dc2
debuggina ssertions
jbrockmendel Jul 2, 2020
2bfb9df
debuggina ssertions
jbrockmendel Jul 2, 2020
3dd2957
CLN
jbrockmendel Jul 2, 2020
b31c40e
debugging assertions
jbrockmendel Jul 2, 2020
8b2e9a3
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Jul 7, 2020
e5b73c7
Implement TZ
jbrockmendel Jul 7, 2020
184e188
implement as cdef class
jbrockmendel Jul 7, 2020
b974ec7
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Jul 7, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 15 additions & 49 deletions pandas/_libs/tslibs/resolution.pyx
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
from cpython.datetime cimport tzinfo

import numpy as np
from numpy cimport ndarray, int64_t, int32_t
from numpy cimport ndarray, int64_t, int32_t, intp_t

from pandas._libs.tslibs.util cimport get_nat

from pandas._libs.tslibs.dtypes import Resolution
from pandas._libs.tslibs.np_datetime cimport (
npy_datetimestruct, dt64_to_dtstruct)
from pandas._libs.tslibs.timezones cimport (
is_utc, is_tzlocal, get_dst_info)
from pandas._libs.tslibs.ccalendar cimport get_days_in_month
from pandas._libs.tslibs.tzconversion cimport tz_convert_utc_to_tzlocal
from pandas._libs.tslibs.tzconversion cimport Localizer

# ----------------------------------------------------------------------
# Constants
Expand Down Expand Up @@ -39,51 +37,19 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None):
Py_ssize_t i, n = len(stamps)
npy_datetimestruct dts
int reso = RESO_DAY, curr_reso
ndarray[int64_t] trans
int64_t[:] deltas
Py_ssize_t[:] pos
int64_t local_val, delta

if is_utc(tz) or tz is None:
for i in range(n):
if stamps[i] == NPY_NAT:
continue
dt64_to_dtstruct(stamps[i], &dts)
curr_reso = _reso_stamp(&dts)
if curr_reso < reso:
reso = curr_reso
elif is_tzlocal(tz):
for i in range(n):
if stamps[i] == NPY_NAT:
continue
local_val = tz_convert_utc_to_tzlocal(stamps[i], tz)
dt64_to_dtstruct(local_val, &dts)
curr_reso = _reso_stamp(&dts)
if curr_reso < reso:
reso = curr_reso
else:
# Adjust datetime64 timestamp, recompute datetimestruct
trans, deltas, typ = get_dst_info(tz)

if typ not in ['pytz', 'dateutil']:
# static/fixed; in this case we know that len(delta) == 1
delta = deltas[0]
for i in range(n):
if stamps[i] == NPY_NAT:
continue
dt64_to_dtstruct(stamps[i] + delta, &dts)
curr_reso = _reso_stamp(&dts)
if curr_reso < reso:
reso = curr_reso
else:
pos = trans.searchsorted(stamps, side='right') - 1
for i in range(n):
if stamps[i] == NPY_NAT:
continue
dt64_to_dtstruct(stamps[i] + deltas[pos[i]], &dts)
curr_reso = _reso_stamp(&dts)
if curr_reso < reso:
reso = curr_reso
int64_t local_val
Localizer localizer = Localizer(tz, stamps)

for i in range(n):
if stamps[i] == NPY_NAT:
continue

local_val = localizer.get_local_timestamp(stamps[i], i)

dt64_to_dtstruct(local_val, &dts)
curr_reso = _reso_stamp(&dts)
if curr_reso < reso:
reso = curr_reso

return Resolution(reso)

Expand Down
2 changes: 2 additions & 0 deletions pandas/_libs/tslibs/timezones.pxd
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from cpython.datetime cimport datetime, timedelta, tzinfo

from numpy cimport int64_t, intp_t, ndarray

cdef tzinfo utc_pytz

cpdef bint is_utc(tzinfo tz)
Expand Down
7 changes: 3 additions & 4 deletions pandas/_libs/tslibs/timezones.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ UTC = pytz.utc

import numpy as np
cimport numpy as cnp
from numpy cimport int64_t
from numpy cimport int64_t, intp_t, ndarray
cnp.import_array()

# ----------------------------------------------------------------------
Expand Down Expand Up @@ -192,10 +192,10 @@ cdef object _get_utc_trans_times_from_dateutil_tz(tzinfo tz):
return new_trans


cdef int64_t[:] unbox_utcoffsets(object transinfo):
cdef ndarray[int64_t, ndim=1] unbox_utcoffsets(object transinfo):
cdef:
Py_ssize_t i, sz
int64_t[:] arr
ndarray[int64_t, ndim=1] arr

sz = len(transinfo)
arr = np.empty(sz, dtype='i8')
Expand All @@ -209,7 +209,6 @@ cdef int64_t[:] unbox_utcoffsets(object transinfo):
# ----------------------------------------------------------------------
# Daylight Savings


cdef object get_dst_info(tzinfo tz):
"""
Returns
Expand Down
15 changes: 14 additions & 1 deletion pandas/_libs/tslibs/tzconversion.pxd
Original file line number Diff line number Diff line change
@@ -1,9 +1,22 @@
from cpython.datetime cimport tzinfo
from numpy cimport int64_t
from numpy cimport int64_t, intp_t, ndarray


cdef int64_t tz_convert_utc_to_tzlocal(int64_t utc_val, tzinfo tz, bint* fold=*)
cpdef int64_t tz_convert_single(int64_t val, tzinfo tz1, tzinfo tz2)
cdef int64_t tz_localize_to_utc_single(
int64_t val, tzinfo tz, object ambiguous=*, object nonexistent=*
) except? -1


cdef class Localizer:
cdef:
bint use_utc, use_tzlocal, use_fixed, use_pytz
int noffsets
int64_t* utcoffsets
intp_t* positions
ndarray positions_arr # needed to avoid segfault
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we even need both positions_arr and positions or can we just use the former?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we could just use the former, but i think we get a perf boost from indexing on the latter

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you can see a difference then sure, but if not would definitely be cleaner to just stick with the ndarray

int64_t delta
tzinfo tz

cdef inline int64_t get_local_timestamp(self, int64_t utc_value, Py_ssize_t i)
55 changes: 55 additions & 0 deletions pandas/_libs/tslibs/tzconversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,61 @@ from pandas._libs.tslibs.timezones cimport (
)


cdef class Localizer:
# cdef:
# bint use_utc, use_tzlocal, use_fixed, use_pytz
# int noffsets
# int64_t* utcoffsets
# intp_t* positions
# ndarray positions_arr # needed to avoid segfault
# int64_t delta
# tzinfo tz

def __cinit__(self, tzinfo tz, int64_t[:] values):
cdef:
ndarray[intp_t, ndim=1] pos
ndarray[int64_t, ndim=1] deltas

self.use_utc = self.use_tzlocal = self.use_fixed = self.use_pytz = False
self.delta = NPY_NAT # placeholder
self.utcoffsets = NULL
self.positions = NULL
self.noffsets = 0
self.tz = tz

if tz is None or is_utc(tz):
self.use_utc = True
elif is_tzlocal(tz):
self.use_tzlocal = True
else:
trans, deltas, typ = get_dst_info(tz)
self.noffsets = len(deltas)
if typ not in ["pytz", "dateutil"]:
# Fixed Offset
self.use_fixed = True
self.delta = deltas[0]
else:
self.utcoffsets = <int64_t*>deltas.data
pos = trans.searchsorted(values, side="right") - 1
self.positions_arr = pos
self.positions = <intp_t*>pos.data
self.use_pytz = typ == "pytz"

cdef inline int64_t get_local_timestamp(self, int64_t utc_value, Py_ssize_t i):
cdef:
int64_t local_val

if self.use_utc:
local_val = utc_value
elif self.use_tzlocal:
local_val = tz_convert_utc_to_tzlocal(utc_value, self.tz)
elif self.use_fixed:
local_val = utc_value + self.delta
else:
local_val = utc_value + self.utcoffsets[self.positions[i]]
return local_val


cdef int64_t tz_localize_to_utc_single(
int64_t val, tzinfo tz, object ambiguous=None, object nonexistent=None,
) except? -1:
Expand Down