Skip to content

Commit 50a8667

Browse files
authored
BUG: support ambiguous=infer with ZoneInfo (#49700)
* BUG: rendering dt64tz values with non-pytz * GH ref * py38 compat * BUG: support ambiguous=infer with ZoneInfo * add type declaration * no-tzdata compat
1 parent 005486f commit 50a8667

File tree

6 files changed

+140
-39
lines changed

6 files changed

+140
-39
lines changed

pandas/_libs/tslibs/timezones.pxd

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ from cpython.datetime cimport (
66

77

88
cdef tzinfo utc_pytz
9+
cdef tzinfo utc_stdlib
910

1011
cpdef bint is_utc(tzinfo tz)
1112
cdef bint is_tzlocal(tzinfo tz)

pandas/_libs/tslibs/tzconversion.pyx

+101-29
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,15 @@ from pandas._libs.tslibs.np_datetime cimport (
3636
NPY_DATETIMEUNIT,
3737
npy_datetimestruct,
3838
pandas_datetime_to_datetimestruct,
39+
pydatetime_to_dt64,
3940
)
4041
from pandas._libs.tslibs.timezones cimport (
4142
get_dst_info,
4243
is_fixed_offset,
4344
is_tzlocal,
4445
is_utc,
4546
is_zoneinfo,
47+
utc_stdlib,
4648
)
4749

4850

@@ -154,7 +156,7 @@ cdef int64_t tz_localize_to_utc_single(
154156
# TODO: test with non-nano
155157
return val
156158

157-
elif is_tzlocal(tz) or is_zoneinfo(tz):
159+
elif is_tzlocal(tz):
158160
return val - _tz_localize_using_tzinfo_api(val, tz, to_utc=True, creso=creso)
159161

160162
elif is_fixed_offset(tz):
@@ -242,29 +244,6 @@ timedelta-like}
242244
if info.use_utc:
243245
return vals.copy()
244246

245-
result = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0)
246-
247-
if info.use_tzlocal:
248-
for i in range(n):
249-
v = vals[i]
250-
if v == NPY_NAT:
251-
result[i] = NPY_NAT
252-
else:
253-
result[i] = v - _tz_localize_using_tzinfo_api(
254-
v, tz, to_utc=True, creso=creso
255-
)
256-
return result.base # to return underlying ndarray
257-
258-
elif info.use_fixed:
259-
delta = info.delta
260-
for i in range(n):
261-
v = vals[i]
262-
if v == NPY_NAT:
263-
result[i] = NPY_NAT
264-
else:
265-
result[i] = v - delta
266-
return result.base # to return underlying ndarray
267-
268247
# silence false-positive compiler warning
269248
ambiguous_array = np.empty(0, dtype=bool)
270249
if isinstance(ambiguous, str):
@@ -299,11 +278,39 @@ timedelta-like}
299278
"shift_backwards} or a timedelta object")
300279
raise ValueError(msg)
301280

281+
result = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0)
282+
283+
if info.use_tzlocal and not is_zoneinfo(tz):
284+
for i in range(n):
285+
v = vals[i]
286+
if v == NPY_NAT:
287+
result[i] = NPY_NAT
288+
else:
289+
result[i] = v - _tz_localize_using_tzinfo_api(
290+
v, tz, to_utc=True, creso=creso
291+
)
292+
return result.base # to return underlying ndarray
293+
294+
elif info.use_fixed:
295+
delta = info.delta
296+
for i in range(n):
297+
v = vals[i]
298+
if v == NPY_NAT:
299+
result[i] = NPY_NAT
300+
else:
301+
result[i] = v - delta
302+
return result.base # to return underlying ndarray
303+
302304
# Determine whether each date lies left of the DST transition (store in
303305
# result_a) or right of the DST transition (store in result_b)
304-
result_a, result_b =_get_utc_bounds(
305-
vals, info.tdata, info.ntrans, info.deltas, creso=creso
306-
)
306+
if is_zoneinfo(tz):
307+
result_a, result_b =_get_utc_bounds_zoneinfo(
308+
vals, tz, creso=creso
309+
)
310+
else:
311+
result_a, result_b =_get_utc_bounds(
312+
vals, info.tdata, info.ntrans, info.deltas, creso=creso
313+
)
307314

308315
# silence false-positive compiler warning
309316
dst_hours = np.empty(0, dtype=np.int64)
@@ -391,8 +398,7 @@ timedelta-like}
391398
return result.base # .base to get underlying ndarray
392399

393400

394-
cdef inline Py_ssize_t bisect_right_i8(int64_t *data,
395-
int64_t val, Py_ssize_t n):
401+
cdef inline Py_ssize_t bisect_right_i8(int64_t *data, int64_t val, Py_ssize_t n):
396402
# Caller is responsible for checking n > 0
397403
# This looks very similar to local_search_right in the ndarray.searchsorted
398404
# implementation.
@@ -483,6 +489,72 @@ cdef _get_utc_bounds(
483489
return result_a, result_b
484490

485491

492+
cdef _get_utc_bounds_zoneinfo(ndarray vals, tz, NPY_DATETIMEUNIT creso):
493+
"""
494+
For each point in 'vals', find the UTC time that it corresponds to if
495+
with fold=0 and fold=1. In non-ambiguous cases, these will match.
496+
497+
Parameters
498+
----------
499+
vals : ndarray[int64_t]
500+
tz : ZoneInfo
501+
creso : NPY_DATETIMEUNIT
502+
503+
Returns
504+
-------
505+
ndarray[int64_t]
506+
ndarray[int64_t]
507+
"""
508+
cdef:
509+
Py_ssize_t i, n = vals.size
510+
npy_datetimestruct dts
511+
datetime dt, rt, left, right, aware, as_utc
512+
int64_t val, pps = periods_per_second(creso)
513+
ndarray result_a, result_b
514+
515+
result_a = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0)
516+
result_b = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0)
517+
518+
for i in range(n):
519+
val = vals[i]
520+
if val == NPY_NAT:
521+
result_a[i] = NPY_NAT
522+
result_b[i] = NPY_NAT
523+
continue
524+
525+
pandas_datetime_to_datetimestruct(val, creso, &dts)
526+
# casting to pydatetime drops nanoseconds etc, which we will
527+
# need to re-add later as 'extra''
528+
extra = (dts.ps // 1000) * (pps // 1_000_000_000)
529+
530+
dt = datetime_new(dts.year, dts.month, dts.day, dts.hour,
531+
dts.min, dts.sec, dts.us, None)
532+
533+
aware = dt.replace(tzinfo=tz)
534+
as_utc = aware.astimezone(utc_stdlib)
535+
rt = as_utc.astimezone(tz)
536+
if aware != rt:
537+
# AFAICT this means that 'aware' is non-existent
538+
# TODO: better way to check this?
539+
# mail.python.org/archives/list/[email protected]/
540+
# thread/57Y3IQAASJOKHX4D27W463XTZIS2NR3M/
541+
result_a[i] = NPY_NAT
542+
else:
543+
left = as_utc.replace(tzinfo=None)
544+
result_a[i] = pydatetime_to_dt64(left, &dts, creso) + extra
545+
546+
aware = dt.replace(fold=1, tzinfo=tz)
547+
as_utc = aware.astimezone(utc_stdlib)
548+
rt = as_utc.astimezone(tz)
549+
if aware != rt:
550+
result_b[i] = NPY_NAT
551+
else:
552+
right = as_utc.replace(tzinfo=None)
553+
result_b[i] = pydatetime_to_dt64(right, &dts, creso) + extra
554+
555+
return result_a, result_b
556+
557+
486558
@cython.boundscheck(False)
487559
cdef ndarray[int64_t] _get_dst_hours(
488560
# vals, creso only needed here to potential render an exception message

pandas/_libs/tslibs/vectorized.pyx

-7
Original file line numberDiff line numberDiff line change
@@ -106,13 +106,6 @@ def ints_to_pydatetime(
106106
stamps : array of i8
107107
tz : str, optional
108108
convert to this timezone
109-
fold : bint, default is 0
110-
Due to daylight saving time, one wall clock time can occur twice
111-
when shifting from summer to winter time; fold describes whether the
112-
datetime-like corresponds to the first (0) or the second time (1)
113-
the wall clock hits the ambiguous time
114-
115-
.. versionadded:: 1.1.0
116109
box : {'datetime', 'timestamp', 'date', 'time'}, default 'datetime'
117110
* If datetime, convert to datetime.datetime
118111
* If date, convert to datetime.date

pandas/tests/arrays/test_datetimes.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -715,10 +715,12 @@ def test_tz_localize_t2d(self):
715715
easts = ["US/Eastern", "dateutil/US/Eastern"]
716716
if ZoneInfo is not None:
717717
try:
718-
easts.append(ZoneInfo("US/Eastern"))
718+
tz = ZoneInfo("US/Eastern")
719719
except KeyError:
720-
# No tzdata
720+
# no tzdata
721721
pass
722+
else:
723+
easts.append(tz)
722724

723725
@pytest.mark.parametrize("tz", easts)
724726
def test_iter_zoneinfo_fold(self, tz):

pandas/tests/indexes/datetimes/test_timezones.py

+16-1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@
1818
import pytest
1919
import pytz
2020

21+
try:
22+
from zoneinfo import ZoneInfo
23+
except ImportError:
24+
ZoneInfo = None
25+
2126
from pandas._libs.tslibs import (
2227
conversion,
2328
timezones,
@@ -355,7 +360,17 @@ def test_dti_tz_localize_nonexistent_raise_coerce(self):
355360
expected = dti.tz_convert("US/Eastern")
356361
tm.assert_index_equal(result, expected)
357362

358-
@pytest.mark.parametrize("tz", [pytz.timezone("US/Eastern"), gettz("US/Eastern")])
363+
easts = [pytz.timezone("US/Eastern"), gettz("US/Eastern")]
364+
if ZoneInfo is not None:
365+
try:
366+
tz = ZoneInfo("US/Eastern")
367+
except KeyError:
368+
# no tzdata
369+
pass
370+
else:
371+
easts.append(tz)
372+
373+
@pytest.mark.parametrize("tz", easts)
359374
def test_dti_tz_localize_ambiguous_infer(self, tz):
360375
# November 6, 2011, fall back, repeat 2 AM hour
361376
# With no repeated hours, we cannot infer the transition

pandas/tests/scalar/timestamp/test_timezones.py

+18
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,11 @@
2929
Timestamp,
3030
)
3131

32+
try:
33+
from zoneinfo import ZoneInfo
34+
except ImportError:
35+
ZoneInfo = None
36+
3237

3338
class TestTimestampTZOperations:
3439
# --------------------------------------------------------------
@@ -70,6 +75,19 @@ def test_tz_localize_ambiguous_bool(self, unit):
7075
with pytest.raises(pytz.AmbiguousTimeError, match=msg):
7176
ts.tz_localize("US/Central")
7277

78+
with pytest.raises(pytz.AmbiguousTimeError, match=msg):
79+
ts.tz_localize("dateutil/US/Central")
80+
81+
if ZoneInfo is not None:
82+
try:
83+
tz = ZoneInfo("US/Central")
84+
except KeyError:
85+
# no tzdata
86+
pass
87+
else:
88+
with pytest.raises(pytz.AmbiguousTimeError, match=msg):
89+
ts.tz_localize(tz)
90+
7391
result = ts.tz_localize("US/Central", ambiguous=True)
7492
assert result == expected0
7593
assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value

0 commit comments

Comments
 (0)