Skip to content

Commit 69954ff

Browse files
authored
REF: re-use Localizer for tz_convert_from_utc (#46803)
1 parent 1213a17 commit 69954ff

File tree

10 files changed

+88
-148
lines changed

10 files changed

+88
-148
lines changed

asv_bench/benchmarks/tslibs/tz_convert.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,14 @@
1111

1212
try:
1313
old_sig = False
14-
from pandas._libs.tslibs.tzconversion import tz_convert_from_utc
14+
from pandas._libs.tslibs import tz_convert_from_utc
1515
except ImportError:
16-
old_sig = True
17-
from pandas._libs.tslibs.tzconversion import tz_convert as tz_convert_from_utc
16+
try:
17+
old_sig = False
18+
from pandas._libs.tslibs.tzconversion import tz_convert_from_utc
19+
except ImportError:
20+
old_sig = True
21+
from pandas._libs.tslibs.tzconversion import tz_convert as tz_convert_from_utc
1822

1923

2024
class TimeTZConvert:

pandas/_libs/tslibs/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
"get_resolution",
2121
"Timestamp",
2222
"tz_convert_from_utc_single",
23+
"tz_convert_from_utc",
2324
"to_offset",
2425
"Tick",
2526
"BaseOffset",
@@ -64,4 +65,5 @@
6465
ints_to_pydatetime,
6566
is_date_array_normalized,
6667
normalize_i8_timestamps,
68+
tz_convert_from_utc,
6769
)

pandas/_libs/tslibs/tzconversion.pyi

-5
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,6 @@ import numpy as np
88

99
from pandas._typing import npt
1010

11-
def tz_convert_from_utc(
12-
vals: npt.NDArray[np.int64], # const int64_t[:]
13-
tz: tzinfo,
14-
) -> npt.NDArray[np.int64]: ...
15-
1611
# py_tz_convert_from_utc_single exposed for testing
1712
def py_tz_convert_from_utc_single(val: np.int64, tz: tzinfo) -> np.int64: ...
1813
def tz_localize_to_utc(

pandas/_libs/tslibs/tzconversion.pyx

+1-103
Original file line numberDiff line numberDiff line change
@@ -430,18 +430,7 @@ cdef int64_t localize_tzinfo_api(
430430
int64_t utc_val, tzinfo tz, bint* fold=NULL
431431
) except? -1:
432432
"""
433-
Parameters
434-
----------
435-
utc_val : int64_t
436-
tz : tzinfo
437-
fold : bint*
438-
pointer to fold: whether datetime ends up in a fold or not
439-
after adjustment
440-
441-
Returns
442-
-------
443-
delta : int64_t
444-
Value to add when converting from utc.
433+
See _tz_localize_using_tzinfo_api.__doc__
445434
"""
446435
return _tz_localize_using_tzinfo_api(utc_val, tz, to_utc=False, fold=fold)
447436

@@ -516,97 +505,6 @@ cdef int64_t tz_convert_from_utc_single(
516505
return utc_val + deltas[0]
517506

518507

519-
def tz_convert_from_utc(const int64_t[:] vals, tzinfo tz):
520-
"""
521-
Convert the values (in i8) from UTC to tz
522-
523-
Parameters
524-
----------
525-
vals : int64 ndarray
526-
tz : tzinfo
527-
528-
Returns
529-
-------
530-
int64 ndarray of converted
531-
"""
532-
cdef:
533-
const int64_t[:] converted
534-
535-
if vals.shape[0] == 0:
536-
return np.array([], dtype=np.int64)
537-
538-
converted = _tz_convert_from_utc(vals, tz)
539-
return np.asarray(converted, dtype=np.int64)
540-
541-
542-
@cython.boundscheck(False)
543-
@cython.wraparound(False)
544-
cdef const int64_t[:] _tz_convert_from_utc(const int64_t[:] stamps, tzinfo tz):
545-
"""
546-
Convert the given values (in i8) either to UTC or from UTC.
547-
548-
Parameters
549-
----------
550-
stamps : int64 ndarray
551-
tz : tzinfo
552-
553-
Returns
554-
-------
555-
converted : ndarray[int64_t]
556-
"""
557-
cdef:
558-
Py_ssize_t i, ntrans = -1, n = stamps.shape[0]
559-
ndarray[int64_t] trans
560-
int64_t[::1] deltas
561-
int64_t* tdata = NULL
562-
intp_t pos
563-
int64_t utc_val, local_val, delta = NPY_NAT
564-
bint use_utc = False, use_tzlocal = False, use_fixed = False
565-
str typ
566-
567-
int64_t[::1] result
568-
569-
if is_utc(tz) or tz is None:
570-
# Much faster than going through the "standard" pattern below
571-
return stamps.copy()
572-
573-
if is_utc(tz) or tz is None:
574-
use_utc = True
575-
elif is_tzlocal(tz) or is_zoneinfo(tz):
576-
use_tzlocal = True
577-
else:
578-
trans, deltas, typ = get_dst_info(tz)
579-
ntrans = trans.shape[0]
580-
if typ not in ["pytz", "dateutil"]:
581-
# static/fixed; in this case we know that len(delta) == 1
582-
use_fixed = True
583-
delta = deltas[0]
584-
else:
585-
tdata = <int64_t*>cnp.PyArray_DATA(trans)
586-
587-
result = np.empty(n, dtype=np.int64)
588-
589-
for i in range(n):
590-
utc_val = stamps[i]
591-
if utc_val == NPY_NAT:
592-
result[i] = NPY_NAT
593-
continue
594-
595-
# The pattern used in vectorized.pyx checks for use_utc here,
596-
# but we handle that case above.
597-
if use_tzlocal:
598-
local_val = utc_val + _tz_localize_using_tzinfo_api(utc_val, tz, to_utc=False)
599-
elif use_fixed:
600-
local_val = utc_val + delta
601-
else:
602-
pos = bisect_right_i8(tdata, utc_val, ntrans) - 1
603-
local_val = utc_val + deltas[pos]
604-
605-
result[i] = local_val
606-
607-
return result
608-
609-
610508
# OSError may be thrown by tzlocal on windows at or close to 1970-01-01
611509
# see https://github.com/pandas-dev/pandas/pull/37591#issuecomment-720628241
612510
cdef int64_t _tz_localize_using_tzinfo_api(

pandas/_libs/tslibs/vectorized.pyi

+3
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,6 @@ def ints_to_pydatetime(
3434
fold: bool = ...,
3535
box: str = ...,
3636
) -> npt.NDArray[np.object_]: ...
37+
def tz_convert_from_utc(
38+
stamps: npt.NDArray[np.int64], tz: tzinfo | None
39+
) -> npt.NDArray[np.int64]: ...

pandas/_libs/tslibs/vectorized.pyx

+58-25
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ cdef class Localizer:
5858
Py_ssize_t ntrans
5959
const int64_t[::1] deltas
6060
int64_t delta
61+
int64_t* tdata
6162

6263
@cython.initializedcheck(False)
6364
@cython.boundscheck(False)
@@ -68,6 +69,7 @@ cdef class Localizer:
6869
self.ntrans = -1 # placeholder
6970
self.delta = -1 # placeholder
7071
self.deltas = _deltas_placeholder
72+
self.tdata = NULL
7173

7274
if is_utc(tz) or tz is None:
7375
self.use_utc = True
@@ -90,6 +92,57 @@ cdef class Localizer:
9092
if typ == "pytz":
9193
self.use_pytz = True
9294

95+
self.tdata = <int64_t*>cnp.PyArray_DATA(self.trans)
96+
97+
98+
@cython.boundscheck(False)
99+
@cython.wraparound(False)
100+
def tz_convert_from_utc(const int64_t[:] stamps, tzinfo tz):
101+
"""
102+
Convert the values (in i8) from UTC to tz
103+
104+
Parameters
105+
----------
106+
stamps : ndarray[int64]
107+
tz : tzinfo
108+
109+
Returns
110+
-------
111+
ndarray[int64]
112+
"""
113+
cdef:
114+
Localizer info = Localizer(tz)
115+
int64_t utc_val, local_val
116+
Py_ssize_t pos, i, n = stamps.shape[0]
117+
118+
int64_t[::1] result
119+
120+
if tz is None or is_utc(tz) or stamps.size == 0:
121+
# Much faster than going through the "standard" pattern below
122+
return stamps.base.copy()
123+
124+
result = np.empty(n, dtype=np.int64)
125+
126+
for i in range(n):
127+
utc_val = stamps[i]
128+
if utc_val == NPY_NAT:
129+
result[i] = NPY_NAT
130+
continue
131+
132+
if info.use_utc:
133+
local_val = utc_val
134+
elif info.use_tzlocal:
135+
local_val = utc_val + localize_tzinfo_api(utc_val, tz)
136+
elif info.use_fixed:
137+
local_val = utc_val + info.delta
138+
else:
139+
pos = bisect_right_i8(info.tdata, utc_val, info.ntrans) - 1
140+
local_val = utc_val + info.deltas[pos]
141+
142+
result[i] = local_val
143+
144+
return result.base
145+
93146

94147
# -------------------------------------------------------------------------
95148

@@ -134,7 +187,6 @@ def ints_to_pydatetime(
134187
Localizer info = Localizer(tz)
135188
int64_t utc_val, local_val
136189
Py_ssize_t pos, i, n = stamps.shape[0]
137-
int64_t* tdata = NULL
138190

139191
npy_datetimestruct dts
140192
tzinfo new_tz
@@ -155,9 +207,6 @@ def ints_to_pydatetime(
155207
"box must be one of 'datetime', 'date', 'time' or 'timestamp'"
156208
)
157209

158-
if info.use_dst:
159-
tdata = <int64_t*>cnp.PyArray_DATA(info.trans)
160-
161210
for i in range(n):
162211
utc_val = stamps[i]
163212
new_tz = tz
@@ -173,7 +222,7 @@ def ints_to_pydatetime(
173222
elif info.use_fixed:
174223
local_val = utc_val + info.delta
175224
else:
176-
pos = bisect_right_i8(tdata, utc_val, info.ntrans) - 1
225+
pos = bisect_right_i8(info.tdata, utc_val, info.ntrans) - 1
177226
local_val = utc_val + info.deltas[pos]
178227

179228
if info.use_pytz:
@@ -221,14 +270,10 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None) -> Resolution:
221270
Localizer info = Localizer(tz)
222271
int64_t utc_val, local_val
223272
Py_ssize_t pos, i, n = stamps.shape[0]
224-
int64_t* tdata = NULL
225273

226274
npy_datetimestruct dts
227275
c_Resolution reso = c_Resolution.RESO_DAY, curr_reso
228276

229-
if info.use_dst:
230-
tdata = <int64_t*>cnp.PyArray_DATA(info.trans)
231-
232277
for i in range(n):
233278
utc_val = stamps[i]
234279
if utc_val == NPY_NAT:
@@ -241,7 +286,7 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None) -> Resolution:
241286
elif info.use_fixed:
242287
local_val = utc_val + info.delta
243288
else:
244-
pos = bisect_right_i8(tdata, utc_val, info.ntrans) - 1
289+
pos = bisect_right_i8(info.tdata, utc_val, info.ntrans) - 1
245290
local_val = utc_val + info.deltas[pos]
246291

247292
dt64_to_dtstruct(local_val, &dts)
@@ -277,13 +322,9 @@ cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo t
277322
Localizer info = Localizer(tz)
278323
int64_t utc_val, local_val
279324
Py_ssize_t pos, i, n = stamps.shape[0]
280-
int64_t* tdata = NULL
281325

282326
int64_t[::1] result = np.empty(n, dtype=np.int64)
283327

284-
if info.use_dst:
285-
tdata = <int64_t*>cnp.PyArray_DATA(info.trans)
286-
287328
for i in range(n):
288329
utc_val = stamps[i]
289330
if utc_val == NPY_NAT:
@@ -297,7 +338,7 @@ cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo t
297338
elif info.use_fixed:
298339
local_val = utc_val + info.delta
299340
else:
300-
pos = bisect_right_i8(tdata, utc_val, info.ntrans) - 1
341+
pos = bisect_right_i8(info.tdata, utc_val, info.ntrans) - 1
301342
local_val = utc_val + info.deltas[pos]
302343

303344
result[i] = local_val - (local_val % DAY_NANOS)
@@ -326,10 +367,6 @@ def is_date_array_normalized(const int64_t[:] stamps, tzinfo tz=None) -> bool:
326367
Localizer info = Localizer(tz)
327368
int64_t utc_val, local_val
328369
Py_ssize_t pos, i, n = stamps.shape[0]
329-
int64_t* tdata = NULL
330-
331-
if info.use_dst:
332-
tdata = <int64_t*>cnp.PyArray_DATA(info.trans)
333370

334371
for i in range(n):
335372
utc_val = stamps[i]
@@ -340,7 +377,7 @@ def is_date_array_normalized(const int64_t[:] stamps, tzinfo tz=None) -> bool:
340377
elif info.use_fixed:
341378
local_val = utc_val + info.delta
342379
else:
343-
pos = bisect_right_i8(tdata, utc_val, info.ntrans) - 1
380+
pos = bisect_right_i8(info.tdata, utc_val, info.ntrans) - 1
344381
local_val = utc_val + info.deltas[pos]
345382

346383
if local_val % DAY_NANOS != 0:
@@ -360,15 +397,11 @@ def dt64arr_to_periodarr(ndarray stamps, int freq, tzinfo tz):
360397
Localizer info = Localizer(tz)
361398
Py_ssize_t pos, i, n = stamps.size
362399
int64_t utc_val, local_val, res_val
363-
int64_t* tdata = NULL
364400

365401
npy_datetimestruct dts
366402
ndarray result = cnp.PyArray_EMPTY(stamps.ndim, stamps.shape, cnp.NPY_INT64, 0)
367403
cnp.broadcast mi = cnp.PyArray_MultiIterNew2(result, stamps)
368404

369-
if info.use_dst:
370-
tdata = <int64_t*>cnp.PyArray_DATA(info.trans)
371-
372405
for i in range(n):
373406
# Analogous to: utc_val = stamps[i]
374407
utc_val = (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
@@ -383,7 +416,7 @@ def dt64arr_to_periodarr(ndarray stamps, int freq, tzinfo tz):
383416
elif info.use_fixed:
384417
local_val = utc_val + info.delta
385418
else:
386-
pos = bisect_right_i8(tdata, utc_val, info.ntrans) - 1
419+
pos = bisect_right_i8(info.tdata, utc_val, info.ntrans) - 1
387420
local_val = utc_val + info.deltas[pos]
388421

389422
dt64_to_dtstruct(local_val, &dts)

0 commit comments

Comments
 (0)