Skip to content

BUG: support ambiguous=infer with ZoneInfo #49700

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Nov 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pandas/_libs/tslibs/timezones.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ from cpython.datetime cimport (


cdef tzinfo utc_pytz
cdef tzinfo utc_stdlib

cpdef bint is_utc(tzinfo tz)
cdef bint is_tzlocal(tzinfo tz)
Expand Down
130 changes: 101 additions & 29 deletions pandas/_libs/tslibs/tzconversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,15 @@ from pandas._libs.tslibs.np_datetime cimport (
NPY_DATETIMEUNIT,
npy_datetimestruct,
pandas_datetime_to_datetimestruct,
pydatetime_to_dt64,
)
from pandas._libs.tslibs.timezones cimport (
get_dst_info,
is_fixed_offset,
is_tzlocal,
is_utc,
is_zoneinfo,
utc_stdlib,
)


Expand Down Expand Up @@ -154,7 +156,7 @@ cdef int64_t tz_localize_to_utc_single(
# TODO: test with non-nano
return val

elif is_tzlocal(tz) or is_zoneinfo(tz):
elif is_tzlocal(tz):
return val - _tz_localize_using_tzinfo_api(val, tz, to_utc=True, creso=creso)

elif is_fixed_offset(tz):
Expand Down Expand Up @@ -242,29 +244,6 @@ timedelta-like}
if info.use_utc:
return vals.copy()

result = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0)

if info.use_tzlocal:
for i in range(n):
v = vals[i]
if v == NPY_NAT:
result[i] = NPY_NAT
else:
result[i] = v - _tz_localize_using_tzinfo_api(
v, tz, to_utc=True, creso=creso
)
return result.base # to return underlying ndarray

elif info.use_fixed:
delta = info.delta
for i in range(n):
v = vals[i]
if v == NPY_NAT:
result[i] = NPY_NAT
else:
result[i] = v - delta
return result.base # to return underlying ndarray

# silence false-positive compiler warning
ambiguous_array = np.empty(0, dtype=bool)
if isinstance(ambiguous, str):
Expand Down Expand Up @@ -299,11 +278,39 @@ timedelta-like}
"shift_backwards} or a timedelta object")
raise ValueError(msg)

result = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0)

if info.use_tzlocal and not is_zoneinfo(tz):
for i in range(n):
v = vals[i]
if v == NPY_NAT:
result[i] = NPY_NAT
else:
result[i] = v - _tz_localize_using_tzinfo_api(
v, tz, to_utc=True, creso=creso
)
return result.base # to return underlying ndarray

elif info.use_fixed:
delta = info.delta
for i in range(n):
v = vals[i]
if v == NPY_NAT:
result[i] = NPY_NAT
else:
result[i] = v - delta
return result.base # to return underlying ndarray

# Determine whether each date lies left of the DST transition (store in
# result_a) or right of the DST transition (store in result_b)
result_a, result_b =_get_utc_bounds(
vals, info.tdata, info.ntrans, info.deltas, creso=creso
)
if is_zoneinfo(tz):
result_a, result_b =_get_utc_bounds_zoneinfo(
vals, tz, creso=creso
)
else:
result_a, result_b =_get_utc_bounds(
vals, info.tdata, info.ntrans, info.deltas, creso=creso
)

# silence false-positive compiler warning
dst_hours = np.empty(0, dtype=np.int64)
Expand Down Expand Up @@ -391,8 +398,7 @@ timedelta-like}
return result.base # .base to get underlying ndarray


cdef inline Py_ssize_t bisect_right_i8(int64_t *data,
int64_t val, Py_ssize_t n):
cdef inline Py_ssize_t bisect_right_i8(int64_t *data, int64_t val, Py_ssize_t n):
# Caller is responsible for checking n > 0
# This looks very similar to local_search_right in the ndarray.searchsorted
# implementation.
Expand Down Expand Up @@ -483,6 +489,72 @@ cdef _get_utc_bounds(
return result_a, result_b


cdef _get_utc_bounds_zoneinfo(ndarray vals, tz, NPY_DATETIMEUNIT creso):
"""
For each point in 'vals', find the UTC time that it corresponds to if
with fold=0 and fold=1. In non-ambiguous cases, these will match.

Parameters
----------
vals : ndarray[int64_t]
tz : ZoneInfo
creso : NPY_DATETIMEUNIT

Returns
-------
ndarray[int64_t]
ndarray[int64_t]
"""
cdef:
Py_ssize_t i, n = vals.size
npy_datetimestruct dts
datetime dt, rt, left, right, aware, as_utc
int64_t val, pps = periods_per_second(creso)
ndarray result_a, result_b

result_a = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0)
result_b = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0)

for i in range(n):
val = vals[i]
if val == NPY_NAT:
result_a[i] = NPY_NAT
result_b[i] = NPY_NAT
continue

pandas_datetime_to_datetimestruct(val, creso, &dts)
# casting to pydatetime drops nanoseconds etc, which we will
# need to re-add later as 'extra''
extra = (dts.ps // 1000) * (pps // 1_000_000_000)

dt = datetime_new(dts.year, dts.month, dts.day, dts.hour,
dts.min, dts.sec, dts.us, None)

aware = dt.replace(tzinfo=tz)
as_utc = aware.astimezone(utc_stdlib)
rt = as_utc.astimezone(tz)
if aware != rt:
# AFAICT this means that 'aware' is non-existent
# TODO: better way to check this?
# mail.python.org/archives/list/[email protected]/
# thread/57Y3IQAASJOKHX4D27W463XTZIS2NR3M/
result_a[i] = NPY_NAT
else:
left = as_utc.replace(tzinfo=None)
result_a[i] = pydatetime_to_dt64(left, &dts, creso) + extra

aware = dt.replace(fold=1, tzinfo=tz)
as_utc = aware.astimezone(utc_stdlib)
rt = as_utc.astimezone(tz)
if aware != rt:
result_b[i] = NPY_NAT
else:
right = as_utc.replace(tzinfo=None)
result_b[i] = pydatetime_to_dt64(right, &dts, creso) + extra

return result_a, result_b


@cython.boundscheck(False)
cdef ndarray[int64_t] _get_dst_hours(
# vals, creso only needed here to potential render an exception message
Expand Down
7 changes: 0 additions & 7 deletions pandas/_libs/tslibs/vectorized.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -106,13 +106,6 @@ def ints_to_pydatetime(
stamps : array of i8
tz : str, optional
convert to this timezone
fold : bint, default is 0
Due to daylight saving time, one wall clock time can occur twice
when shifting from summer to winter time; fold describes whether the
datetime-like corresponds to the first (0) or the second time (1)
the wall clock hits the ambiguous time

.. versionadded:: 1.1.0
box : {'datetime', 'timestamp', 'date', 'time'}, default 'datetime'
* If datetime, convert to datetime.datetime
* If date, convert to datetime.date
Expand Down
6 changes: 4 additions & 2 deletions pandas/tests/arrays/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -715,10 +715,12 @@ def test_tz_localize_t2d(self):
easts = ["US/Eastern", "dateutil/US/Eastern"]
if ZoneInfo is not None:
try:
easts.append(ZoneInfo("US/Eastern"))
tz = ZoneInfo("US/Eastern")
except KeyError:
# No tzdata
# no tzdata
pass
else:
easts.append(tz)

@pytest.mark.parametrize("tz", easts)
def test_iter_zoneinfo_fold(self, tz):
Expand Down
17 changes: 16 additions & 1 deletion pandas/tests/indexes/datetimes/test_timezones.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@
import pytest
import pytz

try:
from zoneinfo import ZoneInfo
except ImportError:
ZoneInfo = None

from pandas._libs.tslibs import (
conversion,
timezones,
Expand Down Expand Up @@ -355,7 +360,17 @@ def test_dti_tz_localize_nonexistent_raise_coerce(self):
expected = dti.tz_convert("US/Eastern")
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize("tz", [pytz.timezone("US/Eastern"), gettz("US/Eastern")])
easts = [pytz.timezone("US/Eastern"), gettz("US/Eastern")]
if ZoneInfo is not None:
try:
tz = ZoneInfo("US/Eastern")
except KeyError:
# no tzdata
pass
else:
easts.append(tz)

@pytest.mark.parametrize("tz", easts)
def test_dti_tz_localize_ambiguous_infer(self, tz):
# November 6, 2011, fall back, repeat 2 AM hour
# With no repeated hours, we cannot infer the transition
Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/scalar/timestamp/test_timezones.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@
Timestamp,
)

try:
from zoneinfo import ZoneInfo
except ImportError:
ZoneInfo = None


class TestTimestampTZOperations:
# --------------------------------------------------------------
Expand Down Expand Up @@ -70,6 +75,19 @@ def test_tz_localize_ambiguous_bool(self, unit):
with pytest.raises(pytz.AmbiguousTimeError, match=msg):
ts.tz_localize("US/Central")

with pytest.raises(pytz.AmbiguousTimeError, match=msg):
ts.tz_localize("dateutil/US/Central")

if ZoneInfo is not None:
try:
tz = ZoneInfo("US/Central")
except KeyError:
# no tzdata
pass
else:
with pytest.raises(pytz.AmbiguousTimeError, match=msg):
ts.tz_localize(tz)

result = ts.tz_localize("US/Central", ambiguous=True)
assert result == expected0
assert result._creso == getattr(NpyDatetimeUnit, f"NPY_FR_{unit}").value
Expand Down