-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG/TST: Add more timezone fixtures and use is_utc more consistently #23807
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
5cf8073
7ba3649
b8bb053
5662f3a
2803041
c5e11a6
fbc50da
cff6e70
84d8a12
f351cd2
5b97f0d
8164688
069e6e1
de3c83b
ead3f92
fb6e96c
555afaa
9857897
00e31d4
c647ba0
c219ea3
44a33ab
a95af1a
9828aa5
15ba5fb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ from numpy cimport int64_t, int32_t, ndarray | |
cnp.import_array() | ||
|
||
import pytz | ||
from dateutil.tz import tzutc | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why are you switching this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No switching, this is just used here for timezone conversion between tzlocal and tzutc (making sure they are both dateutil timezones so they play nice) https://github.com/pandas-dev/pandas/pull/23807/files/de3c83bb946a7791d29a18db5f004228b24c9b02#diff-66d289312fd0b02e7721bf45fb281797R705 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok |
||
|
||
# stdlib datetime imports | ||
from datetime import time as datetime_time | ||
|
@@ -35,6 +36,7 @@ from timedeltas cimport cast_from_unit | |
from timezones cimport (is_utc, is_tzlocal, is_fixed_offset, | ||
get_utcoffset, get_dst_info, | ||
get_timezone, maybe_get_tz, tz_compare) | ||
from timezones import UTC | ||
from parsing import parse_datetime_string | ||
|
||
from nattype import nat_strings, NaT | ||
|
@@ -46,8 +48,6 @@ from nattype cimport NPY_NAT, checknull_with_nat | |
NS_DTYPE = np.dtype('M8[ns]') | ||
TD_DTYPE = np.dtype('m8[ns]') | ||
|
||
UTC = pytz.UTC | ||
|
||
|
||
# ---------------------------------------------------------------------- | ||
# Misc Helpers | ||
|
@@ -362,7 +362,7 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, | |
else: | ||
# UTC | ||
obj.value = pydatetime_to_dt64(ts, &obj.dts) | ||
obj.tzinfo = pytz.utc | ||
obj.tzinfo = tz | ||
else: | ||
obj.value = pydatetime_to_dt64(ts, &obj.dts) | ||
obj.tzinfo = ts.tzinfo | ||
|
@@ -442,7 +442,7 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit, | |
check_dts_bounds(&obj.dts) | ||
if out_local == 1: | ||
obj.tzinfo = pytz.FixedOffset(out_tzoffset) | ||
obj.value = tz_convert_single(obj.value, obj.tzinfo, 'UTC') | ||
obj.value = tz_convert_single(obj.value, obj.tzinfo, UTC) | ||
if tz is None: | ||
check_dts_bounds(&obj.dts) | ||
check_overflows(obj) | ||
|
@@ -576,8 +576,6 @@ cdef inline datetime _localize_pydatetime(datetime dt, tzinfo tz): | |
identically, i.e. discards nanos from Timestamps. | ||
It also assumes that the `tz` input is not None. | ||
""" | ||
if tz == 'UTC' or tz is UTC: | ||
return UTC.localize(dt) | ||
try: | ||
# datetime.replace with pytz may be incorrect result | ||
return tz.localize(dt) | ||
|
@@ -603,8 +601,8 @@ cpdef inline datetime localize_pydatetime(datetime dt, object tz): | |
elif not PyDateTime_CheckExact(dt): | ||
# i.e. is a Timestamp | ||
return dt.tz_localize(tz) | ||
elif tz == 'UTC' or tz is UTC: | ||
return UTC.localize(dt) | ||
elif is_utc(tz): | ||
return _localize_pydatetime(dt, tz) | ||
try: | ||
# datetime.replace with pytz may be incorrect result | ||
return tz.localize(dt) | ||
|
@@ -642,15 +640,20 @@ cdef inline int64_t[:] _tz_convert_dst(int64_t[:] values, tzinfo tz, | |
int64_t[:] deltas | ||
int64_t v | ||
|
||
trans, deltas, typ = get_dst_info(tz) | ||
if not to_utc: | ||
# We add `offset` below instead of subtracting it | ||
deltas = -1 * np.array(deltas, dtype='i8') | ||
if not is_tzlocal(tz): | ||
# get_dst_info cannot extract offsets from tzlocal because its | ||
# dependent on a datetime | ||
trans, deltas, typ = get_dst_info(tz) | ||
if not to_utc: | ||
# We add `offset` below instead of subtracting it | ||
deltas = -1 * np.array(deltas, dtype='i8') | ||
|
||
for i in range(n): | ||
v = values[i] | ||
if v == NPY_NAT: | ||
result[i] = v | ||
elif is_tzlocal(tz): | ||
result[i] = _tz_convert_tzlocal_utc(v, tz, to_utc=to_utc) | ||
else: | ||
# TODO: Is it more efficient to call searchsorted pointwise or | ||
# on `values` outside the loop? We are not consistent about this. | ||
|
@@ -689,7 +692,12 @@ cdef inline int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, | |
|
||
dt64_to_dtstruct(val, &dts) | ||
dt = datetime(dts.year, dts.month, dts.day, dts.hour, | ||
dts.min, dts.sec, dts.us, tz) | ||
dts.min, dts.sec, dts.us) | ||
# get_utcoffset (tz.utcoffset under the hood) only makes sense if datetime | ||
# is _wall time_, so if val is a UTC timestamp convert to wall time | ||
if not to_utc: | ||
dt = dt.replace(tzinfo=tzutc()) | ||
dt = dt.astimezone(tz) | ||
delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 | ||
|
||
if not to_utc: | ||
|
@@ -735,21 +743,21 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2): | |
int64_t arr[1] | ||
|
||
# See GH#17734 We should always be converting either from UTC or to UTC | ||
assert (is_utc(tz1) or tz1 == 'UTC') or (is_utc(tz2) or tz2 == 'UTC') | ||
assert is_utc(tz1) or is_utc(tz2) | ||
|
||
if val == NPY_NAT: | ||
return val | ||
|
||
# Convert to UTC | ||
if is_tzlocal(tz1): | ||
utc_date = _tz_convert_tzlocal_utc(val, tz1, to_utc=True) | ||
elif get_timezone(tz1) != 'UTC': | ||
elif not is_utc(get_timezone(tz1)): | ||
arr[0] = val | ||
utc_date = _tz_convert_dst(arr, tz1, to_utc=True)[0] | ||
else: | ||
utc_date = val | ||
|
||
if get_timezone(tz2) == 'UTC': | ||
if is_utc(get_timezone(tz2)): | ||
return utc_date | ||
elif is_tzlocal(tz2): | ||
return _tz_convert_tzlocal_utc(utc_date, tz2, to_utc=False) | ||
|
@@ -785,7 +793,7 @@ cdef inline int64_t[:] _tz_convert_one_way(int64_t[:] vals, object tz, | |
Py_ssize_t i, n = len(vals) | ||
int64_t val | ||
|
||
if get_timezone(tz) != 'UTC': | ||
if not is_utc(get_timezone(tz)): | ||
converted = np.empty(n, dtype=np.int64) | ||
if is_tzlocal(tz): | ||
for i in range(n): | ||
|
@@ -890,7 +898,10 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, | |
if is_tzlocal(tz): | ||
for i in range(n): | ||
v = vals[i] | ||
result[i] = _tz_convert_tzlocal_utc(v, tz, to_utc=True) | ||
if v == NPY_NAT: | ||
result[i] = NPY_NAT | ||
else: | ||
result[i] = _tz_convert_tzlocal_utc(v, tz, to_utc=True) | ||
return result | ||
|
||
if is_string_object(ambiguous): | ||
|
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
@@ -1,12 +1,12 @@ | ||||
import importlib | ||||
import os | ||||
|
||||
from dateutil.tz import tzutc | ||||
from dateutil.tz import tzlocal, tzutc | ||||
import hypothesis | ||||
from hypothesis import strategies as st | ||||
import numpy as np | ||||
import pytest | ||||
from pytz import utc | ||||
from pytz import FixedOffset, utc | ||||
|
||||
from pandas.compat import PY3 | ||||
import pandas.util._test_decorators as td | ||||
|
@@ -245,7 +245,7 @@ def datetime_tz_utc(): | |||
return timezone.utc | ||||
|
||||
|
||||
utc_objs = ['utc', utc, tzutc()] | ||||
utc_objs = ['utc', 'dateutil/UTC', utc, tzutc()] | ||||
if PY3: | ||||
from datetime import timezone | ||||
utc_objs.append(timezone.utc) | ||||
|
@@ -354,7 +354,8 @@ def unique_nulls_fixture(request): | |||
|
||||
|
||||
TIMEZONES = [None, 'UTC', 'US/Eastern', 'Asia/Tokyo', 'dateutil/US/Pacific', | ||||
'dateutil/Asia/Singapore'] | ||||
'dateutil/Asia/Singapore', tzutc(), tzlocal(), FixedOffset(300), | ||||
FixedOffset(0), FixedOffset(-300)] | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Brainstorm:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how much additional time do the tests take with these added fixtures? how many more tests There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can add these in a follow up and further discuss. |
||||
|
||||
|
||||
@td.parametrize_fixture_doc(str(TIMEZONES)) | ||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nitpick: I'm guessing this happens for users for whom tzlocal is UTC-N but not those for whom tzlocal is UTC+N (@jorisvandenbossche can you check this when convenient?)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This was actually because
NaT
wasn't handled intz_localize_to_utc
soNaT
was converted to its i8 value which was less thanTimestamp.min
https://github.com/pandas-dev/pandas/pull/23807/files#diff-66d289312fd0b02e7721bf45fb281797R911