Skip to content

REF: re-use Localizer for tz_convert_from_utc #46803

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions asv_bench/benchmarks/tslibs/tz_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,14 @@

try:
old_sig = False
from pandas._libs.tslibs.tzconversion import tz_convert_from_utc
from pandas._libs.tslibs import tz_convert_from_utc
except ImportError:
old_sig = True
from pandas._libs.tslibs.tzconversion import tz_convert as tz_convert_from_utc
try:
old_sig = False
from pandas._libs.tslibs.tzconversion import tz_convert_from_utc
except ImportError:
old_sig = True
from pandas._libs.tslibs.tzconversion import tz_convert as tz_convert_from_utc


class TimeTZConvert:
Expand Down
2 changes: 2 additions & 0 deletions pandas/_libs/tslibs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"get_resolution",
"Timestamp",
"tz_convert_from_utc_single",
"tz_convert_from_utc",
"to_offset",
"Tick",
"BaseOffset",
Expand Down Expand Up @@ -64,4 +65,5 @@
ints_to_pydatetime,
is_date_array_normalized,
normalize_i8_timestamps,
tz_convert_from_utc,
)
5 changes: 0 additions & 5 deletions pandas/_libs/tslibs/tzconversion.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,6 @@ import numpy as np

from pandas._typing import npt

def tz_convert_from_utc(
vals: npt.NDArray[np.int64], # const int64_t[:]
tz: tzinfo,
) -> npt.NDArray[np.int64]: ...

# py_tz_convert_from_utc_single exposed for testing
def py_tz_convert_from_utc_single(val: np.int64, tz: tzinfo) -> np.int64: ...
def tz_localize_to_utc(
Expand Down
104 changes: 1 addition & 103 deletions pandas/_libs/tslibs/tzconversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -429,18 +429,7 @@ cdef int64_t localize_tzinfo_api(
int64_t utc_val, tzinfo tz, bint* fold=NULL
) except? -1:
"""
Parameters
----------
utc_val : int64_t
tz : tzinfo
fold : bint*
pointer to fold: whether datetime ends up in a fold or not
after adjustment

Returns
-------
delta : int64_t
Value to add when converting from utc.
See _tz_localize_using_tzinfo_api.__doc__
"""
return _tz_localize_using_tzinfo_api(utc_val, tz, to_utc=False, fold=fold)

Expand Down Expand Up @@ -514,97 +503,6 @@ cdef int64_t tz_convert_from_utc_single(
return utc_val + deltas[0]


def tz_convert_from_utc(const int64_t[:] vals, tzinfo tz):
"""
Convert the values (in i8) from UTC to tz

Parameters
----------
vals : int64 ndarray
tz : tzinfo

Returns
-------
int64 ndarray of converted
"""
cdef:
const int64_t[:] converted

if vals.shape[0] == 0:
return np.array([], dtype=np.int64)

converted = _tz_convert_from_utc(vals, tz)
return np.asarray(converted, dtype=np.int64)


@cython.boundscheck(False)
@cython.wraparound(False)
cdef const int64_t[:] _tz_convert_from_utc(const int64_t[:] stamps, tzinfo tz):
"""
Convert the given values (in i8) either to UTC or from UTC.

Parameters
----------
stamps : int64 ndarray
tz : tzinfo

Returns
-------
converted : ndarray[int64_t]
"""
cdef:
Py_ssize_t i, ntrans = -1, n = stamps.shape[0]
ndarray[int64_t] trans
int64_t[::1] deltas
int64_t* tdata = NULL
intp_t pos
int64_t utc_val, local_val, delta = NPY_NAT
bint use_utc = False, use_tzlocal = False, use_fixed = False
str typ

int64_t[::1] result

if is_utc(tz) or tz is None:
# Much faster than going through the "standard" pattern below
return stamps.copy()

if is_utc(tz) or tz is None:
use_utc = True
elif is_tzlocal(tz) or is_zoneinfo(tz):
use_tzlocal = True
else:
trans, deltas, typ = get_dst_info(tz)
ntrans = trans.shape[0]
if typ not in ["pytz", "dateutil"]:
# static/fixed; in this case we know that len(delta) == 1
use_fixed = True
delta = deltas[0]
else:
tdata = <int64_t*>cnp.PyArray_DATA(trans)

result = np.empty(n, dtype=np.int64)

for i in range(n):
utc_val = stamps[i]
if utc_val == NPY_NAT:
result[i] = NPY_NAT
continue

# The pattern used in vectorized.pyx checks for use_utc here,
# but we handle that case above.
if use_tzlocal:
local_val = utc_val + _tz_localize_using_tzinfo_api(utc_val, tz, to_utc=False)
elif use_fixed:
local_val = utc_val + delta
else:
pos = bisect_right_i8(tdata, utc_val, ntrans) - 1
local_val = utc_val + deltas[pos]

result[i] = local_val

return result


# OSError may be thrown by tzlocal on windows at or close to 1970-01-01
# see https://github.com/pandas-dev/pandas/pull/37591#issuecomment-720628241
cdef int64_t _tz_localize_using_tzinfo_api(
Expand Down
3 changes: 3 additions & 0 deletions pandas/_libs/tslibs/vectorized.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,6 @@ def ints_to_pydatetime(
fold: bool = ...,
box: str = ...,
) -> npt.NDArray[np.object_]: ...
def tz_convert_from_utc(
stamps: npt.NDArray[np.int64], tz: tzinfo | None
) -> npt.NDArray[np.int64]: ...
83 changes: 58 additions & 25 deletions pandas/_libs/tslibs/vectorized.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ cdef class Localizer:
Py_ssize_t ntrans
const int64_t[::1] deltas
int64_t delta
int64_t* tdata

@cython.initializedcheck(False)
@cython.boundscheck(False)
Expand All @@ -69,6 +70,7 @@ cdef class Localizer:
self.ntrans = -1 # placeholder
self.delta = -1 # placeholder
self.deltas = _deltas_placeholder
self.tdata = NULL

if is_utc(tz) or tz is None:
self.use_utc = True
Expand All @@ -91,6 +93,57 @@ cdef class Localizer:
if typ == "pytz":
self.use_pytz = True

self.tdata = <int64_t*>cnp.PyArray_DATA(self.trans)


@cython.boundscheck(False)
@cython.wraparound(False)
def tz_convert_from_utc(const int64_t[:] stamps, tzinfo tz):
"""
Convert the values (in i8) from UTC to tz

Parameters
----------
stamps : ndarray[int64]
tz : tzinfo

Returns
-------
ndarray[int64]
"""
cdef:
Localizer info = Localizer(tz)
int64_t utc_val, local_val
Py_ssize_t pos, i, n = stamps.shape[0]

int64_t[::1] result

if tz is None or is_utc(tz) or stamps.size == 0:
# Much faster than going through the "standard" pattern below
return stamps.base.copy()

result = np.empty(n, dtype=np.int64)

for i in range(n):
utc_val = stamps[i]
if utc_val == NPY_NAT:
result[i] = NPY_NAT
continue

if info.use_utc:
local_val = utc_val
elif info.use_tzlocal:
local_val = utc_val + localize_tzinfo_api(utc_val, tz)
elif info.use_fixed:
local_val = utc_val + info.delta
else:
pos = bisect_right_i8(info.tdata, utc_val, info.ntrans) - 1
local_val = utc_val + info.deltas[pos]

result[i] = local_val

return result.base


# -------------------------------------------------------------------------

Expand Down Expand Up @@ -135,7 +188,6 @@ def ints_to_pydatetime(
Localizer info = Localizer(tz)
int64_t utc_val, local_val
Py_ssize_t pos, i, n = stamps.shape[0]
int64_t* tdata = NULL

npy_datetimestruct dts
tzinfo new_tz
Expand All @@ -156,9 +208,6 @@ def ints_to_pydatetime(
"box must be one of 'datetime', 'date', 'time' or 'timestamp'"
)

if info.use_dst:
tdata = <int64_t*>cnp.PyArray_DATA(info.trans)

for i in range(n):
utc_val = stamps[i]
new_tz = tz
Expand All @@ -174,7 +223,7 @@ def ints_to_pydatetime(
elif info.use_fixed:
local_val = utc_val + info.delta
else:
pos = bisect_right_i8(tdata, utc_val, info.ntrans) - 1
pos = bisect_right_i8(info.tdata, utc_val, info.ntrans) - 1
local_val = utc_val + info.deltas[pos]

if info.use_pytz:
Expand Down Expand Up @@ -222,14 +271,10 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None) -> Resolution:
Localizer info = Localizer(tz)
int64_t utc_val, local_val
Py_ssize_t pos, i, n = stamps.shape[0]
int64_t* tdata = NULL

npy_datetimestruct dts
c_Resolution reso = c_Resolution.RESO_DAY, curr_reso

if info.use_dst:
tdata = <int64_t*>cnp.PyArray_DATA(info.trans)

for i in range(n):
utc_val = stamps[i]
if utc_val == NPY_NAT:
Expand All @@ -242,7 +287,7 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None) -> Resolution:
elif info.use_fixed:
local_val = utc_val + info.delta
else:
pos = bisect_right_i8(tdata, utc_val, info.ntrans) - 1
pos = bisect_right_i8(info.tdata, utc_val, info.ntrans) - 1
local_val = utc_val + info.deltas[pos]

dt64_to_dtstruct(local_val, &dts)
Expand Down Expand Up @@ -278,13 +323,9 @@ cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo t
Localizer info = Localizer(tz)
int64_t utc_val, local_val
Py_ssize_t pos, i, n = stamps.shape[0]
int64_t* tdata = NULL

int64_t[::1] result = np.empty(n, dtype=np.int64)

if info.use_dst:
tdata = <int64_t*>cnp.PyArray_DATA(info.trans)

for i in range(n):
utc_val = stamps[i]
if utc_val == NPY_NAT:
Expand All @@ -298,7 +339,7 @@ cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo t
elif info.use_fixed:
local_val = utc_val + info.delta
else:
pos = bisect_right_i8(tdata, utc_val, info.ntrans) - 1
pos = bisect_right_i8(info.tdata, utc_val, info.ntrans) - 1
local_val = utc_val + info.deltas[pos]

result[i] = local_val - (local_val % DAY_NANOS)
Expand Down Expand Up @@ -327,10 +368,6 @@ def is_date_array_normalized(const int64_t[:] stamps, tzinfo tz=None) -> bool:
Localizer info = Localizer(tz)
int64_t utc_val, local_val
Py_ssize_t pos, i, n = stamps.shape[0]
int64_t* tdata = NULL

if info.use_dst:
tdata = <int64_t*>cnp.PyArray_DATA(info.trans)

for i in range(n):
utc_val = stamps[i]
Expand All @@ -341,7 +378,7 @@ def is_date_array_normalized(const int64_t[:] stamps, tzinfo tz=None) -> bool:
elif info.use_fixed:
local_val = utc_val + info.delta
else:
pos = bisect_right_i8(tdata, utc_val, info.ntrans) - 1
pos = bisect_right_i8(info.tdata, utc_val, info.ntrans) - 1
local_val = utc_val + info.deltas[pos]

if local_val % DAY_NANOS != 0:
Expand All @@ -361,15 +398,11 @@ def dt64arr_to_periodarr(ndarray stamps, int freq, tzinfo tz):
Localizer info = Localizer(tz)
Py_ssize_t pos, i, n = stamps.size
int64_t utc_val, local_val, res_val
int64_t* tdata = NULL

npy_datetimestruct dts
ndarray result = cnp.PyArray_EMPTY(stamps.ndim, stamps.shape, cnp.NPY_INT64, 0)
cnp.broadcast mi = cnp.PyArray_MultiIterNew2(result, stamps)

if info.use_dst:
tdata = <int64_t*>cnp.PyArray_DATA(info.trans)

for i in range(n):
# Analogous to: utc_val = stamps[i]
utc_val = (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
Expand All @@ -384,7 +417,7 @@ def dt64arr_to_periodarr(ndarray stamps, int freq, tzinfo tz):
elif info.use_fixed:
local_val = utc_val + info.delta
else:
pos = bisect_right_i8(tdata, utc_val, info.ntrans) - 1
pos = bisect_right_i8(info.tdata, utc_val, info.ntrans) - 1
local_val = utc_val + info.deltas[pos]

dt64_to_dtstruct(local_val, &dts)
Expand Down
Loading