diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 4ff3cc728f7f7..48a3bfdab62c9 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1249,6 +1249,9 @@ Timezones - Bug when indexing a :class:`Series` with a DST transition (:issue:`21846`) - Bug in :meth:`DataFrame.resample` and :meth:`Series.resample` where an ``AmbiguousTimeError`` or ``NonExistentTimeError`` would raise if a timezone aware timeseries ended on a DST transition (:issue:`19375`, :issue:`10117`) - Bug in :meth:`DataFrame.drop` and :meth:`Series.drop` when specifying a tz-aware Timestamp key to drop from a :class:`DatetimeIndex` with a DST transition (:issue:`21761`) +- Bug in :class:`DatetimeIndex` constructor where :class:`NaT` and ``dateutil.tz.tzlocal`` would raise an ``OutOfBoundsDatetime`` error (:issue:`23807`) +- Bug in :meth:`DatetimeIndex.tz_localize` and :meth:`Timestamp.tz_localize` with ``dateutil.tz.tzlocal`` near a DST transition that would return an incorrectly localized datetime (:issue:`23807`) +- Bug in :class:`Timestamp` constructor where a ``dateutil.tz.tzutc`` timezone passed with a ``datetime.datetime`` argument would be converted to a ``pytz.UTC`` timezone (:issue:`23807`) Offsets ^^^^^^^ diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index e346eb7e598ed..c5bc969ede3c9 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -33,6 +33,7 @@ from tslibs.parsing import parse_datetime_string from tslibs.timedeltas cimport cast_from_unit from tslibs.timezones cimport is_utc, is_tzlocal, get_dst_info +from tslibs.timezones import UTC from tslibs.conversion cimport (tz_convert_single, _TSObject, convert_datetime_to_tsobject, get_datetime64_nanos, @@ -211,7 +212,7 @@ def _test_parse_iso8601(object ts): check_dts_bounds(&obj.dts) if out_local == 1: obj.tzinfo = pytz.FixedOffset(out_tzoffset) - obj.value = tz_convert_single(obj.value, obj.tzinfo, 'UTC') + obj.value = tz_convert_single(obj.value, obj.tzinfo, UTC) return Timestamp(obj.value, tz=obj.tzinfo) else: return Timestamp(obj.value) @@ -673,7 +674,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', # dateutil.tz.tzoffset objects out_tzoffset_vals.add(out_tzoffset * 60.) tz = pytz.FixedOffset(out_tzoffset) - value = tz_convert_single(value, tz, 'UTC') + value = tz_convert_single(value, tz, UTC) else: # Add a marker for naive string, to track if we are # parsing mixed naive and aware strings diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 28b68b9ee1925..b4a13d3d0ada9 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -8,6 +8,7 @@ from numpy cimport int64_t, int32_t, ndarray cnp.import_array() import pytz +from dateutil.tz import tzutc # stdlib datetime imports from datetime import time as datetime_time @@ -35,6 +36,7 @@ from timedeltas cimport cast_from_unit from timezones cimport (is_utc, is_tzlocal, is_fixed_offset, get_utcoffset, get_dst_info, get_timezone, maybe_get_tz, tz_compare) +from timezones import UTC from parsing import parse_datetime_string from nattype import nat_strings, NaT @@ -46,8 +48,6 @@ from nattype cimport NPY_NAT, checknull_with_nat NS_DTYPE = np.dtype('M8[ns]') TD_DTYPE = np.dtype('m8[ns]') -UTC = pytz.UTC - # ---------------------------------------------------------------------- # Misc Helpers @@ -362,7 +362,7 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, else: # UTC obj.value = pydatetime_to_dt64(ts, &obj.dts) - obj.tzinfo = pytz.utc + obj.tzinfo = tz else: obj.value = pydatetime_to_dt64(ts, &obj.dts) obj.tzinfo = ts.tzinfo @@ -442,7 +442,7 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit, check_dts_bounds(&obj.dts) if out_local == 1: obj.tzinfo = pytz.FixedOffset(out_tzoffset) - obj.value = tz_convert_single(obj.value, obj.tzinfo, 'UTC') + obj.value = tz_convert_single(obj.value, obj.tzinfo, UTC) if tz is None: check_dts_bounds(&obj.dts) check_overflows(obj) @@ -576,8 +576,6 @@ cdef inline datetime _localize_pydatetime(datetime dt, tzinfo tz): identically, i.e. discards nanos from Timestamps. It also assumes that the `tz` input is not None. """ - if tz == 'UTC' or tz is UTC: - return UTC.localize(dt) try: # datetime.replace with pytz may be incorrect result return tz.localize(dt) @@ -603,8 +601,8 @@ cpdef inline datetime localize_pydatetime(datetime dt, object tz): elif not PyDateTime_CheckExact(dt): # i.e. is a Timestamp return dt.tz_localize(tz) - elif tz == 'UTC' or tz is UTC: - return UTC.localize(dt) + elif is_utc(tz): + return _localize_pydatetime(dt, tz) try: # datetime.replace with pytz may be incorrect result return tz.localize(dt) @@ -642,15 +640,20 @@ cdef inline int64_t[:] _tz_convert_dst(int64_t[:] values, tzinfo tz, int64_t[:] deltas int64_t v - trans, deltas, typ = get_dst_info(tz) - if not to_utc: - # We add `offset` below instead of subtracting it - deltas = -1 * np.array(deltas, dtype='i8') + if not is_tzlocal(tz): + # get_dst_info cannot extract offsets from tzlocal because its + # dependent on a datetime + trans, deltas, typ = get_dst_info(tz) + if not to_utc: + # We add `offset` below instead of subtracting it + deltas = -1 * np.array(deltas, dtype='i8') for i in range(n): v = values[i] if v == NPY_NAT: result[i] = v + elif is_tzlocal(tz): + result[i] = _tz_convert_tzlocal_utc(v, tz, to_utc=to_utc) else: # TODO: Is it more efficient to call searchsorted pointwise or # on `values` outside the loop? We are not consistent about this. @@ -689,7 +692,12 @@ cdef inline int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, dt64_to_dtstruct(val, &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, - dts.min, dts.sec, dts.us, tz) + dts.min, dts.sec, dts.us) + # get_utcoffset (tz.utcoffset under the hood) only makes sense if datetime + # is _wall time_, so if val is a UTC timestamp convert to wall time + if not to_utc: + dt = dt.replace(tzinfo=tzutc()) + dt = dt.astimezone(tz) delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 if not to_utc: @@ -735,7 +743,7 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2): int64_t arr[1] # See GH#17734 We should always be converting either from UTC or to UTC - assert (is_utc(tz1) or tz1 == 'UTC') or (is_utc(tz2) or tz2 == 'UTC') + assert is_utc(tz1) or is_utc(tz2) if val == NPY_NAT: return val @@ -743,13 +751,13 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2): # Convert to UTC if is_tzlocal(tz1): utc_date = _tz_convert_tzlocal_utc(val, tz1, to_utc=True) - elif get_timezone(tz1) != 'UTC': + elif not is_utc(get_timezone(tz1)): arr[0] = val utc_date = _tz_convert_dst(arr, tz1, to_utc=True)[0] else: utc_date = val - if get_timezone(tz2) == 'UTC': + if is_utc(get_timezone(tz2)): return utc_date elif is_tzlocal(tz2): return _tz_convert_tzlocal_utc(utc_date, tz2, to_utc=False) @@ -785,7 +793,7 @@ cdef inline int64_t[:] _tz_convert_one_way(int64_t[:] vals, object tz, Py_ssize_t i, n = len(vals) int64_t val - if get_timezone(tz) != 'UTC': + if not is_utc(get_timezone(tz)): converted = np.empty(n, dtype=np.int64) if is_tzlocal(tz): for i in range(n): @@ -890,7 +898,10 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, if is_tzlocal(tz): for i in range(n): v = vals[i] - result[i] = _tz_convert_tzlocal_utc(v, tz, to_utc=True) + if v == NPY_NAT: + result[i] = NPY_NAT + else: + result[i] = _tz_convert_tzlocal_utc(v, tz, to_utc=True) return result if is_string_object(ambiguous): diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 7ef38cba0c37f..f3ac102bf177e 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -26,6 +26,7 @@ from conversion cimport tz_convert_single, pydt_to_i8, localize_pydatetime from nattype cimport NPY_NAT from np_datetime cimport (npy_datetimestruct, dtstruct_to_dt64, dt64_to_dtstruct) +from timezones import UTC # --------------------------------------------------------------------- # Constants @@ -211,7 +212,7 @@ def _to_dt64(dt, dtype='datetime64'): # Thus astype is needed to cast datetime to datetime64[D] if getattr(dt, 'tzinfo', None) is not None: i8 = pydt_to_i8(dt) - dt = tz_convert_single(i8, 'UTC', dt.tzinfo) + dt = tz_convert_single(i8, UTC, dt.tzinfo) dt = np.int64(dt).astype('datetime64[ns]') else: dt = np.datetime64(dt) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index f6a6257f92e7c..bb7a9a57b8a75 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -36,6 +36,7 @@ from timedeltas import Timedelta from timedeltas cimport delta_to_nanoseconds from timezones cimport ( get_timezone, is_utc, maybe_get_tz, treat_tz_as_pytz, tz_compare) +from timezones import UTC # ---------------------------------------------------------------------- # Constants @@ -416,7 +417,7 @@ cdef class _Timestamp(datetime): int64_t val val = self.value if self.tz is not None and not is_utc(self.tz): - val = tz_convert_single(self.value, 'UTC', self.tz) + val = tz_convert_single(self.value, UTC, self.tz) return val cpdef bint _get_start_end_field(self, str field): @@ -633,7 +634,7 @@ class Timestamp(_Timestamp): Return a new Timestamp representing UTC day and time. """ - return cls.now('UTC') + return cls.now(UTC) @classmethod def utcfromtimestamp(cls, ts): @@ -1108,7 +1109,7 @@ class Timestamp(_Timestamp): else: if tz is None: # reset tz - value = tz_convert_single(self.value, 'UTC', self.tz) + value = tz_convert_single(self.value, UTC, self.tz) return Timestamp(value, tz=None) else: raise TypeError('Cannot localize tz-aware Timestamp, use ' @@ -1178,7 +1179,7 @@ class Timestamp(_Timestamp): _tzinfo = self.tzinfo value = self.value if _tzinfo is not None: - value_tz = tz_convert_single(value, _tzinfo, 'UTC') + value_tz = tz_convert_single(value, _tzinfo, UTC) value += value - value_tz # setup components diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index a2a40a8aa1ca4..5fa8a45af3083 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -58,7 +58,7 @@ cpdef inline object get_timezone(object tz): UJSON/pytables. maybe_get_tz (below) is the inverse of this process. """ if is_utc(tz): - return 'UTC' + return tz else: if treat_tz_as_dateutil(tz): if '.tar.gz' in tz._filename: diff --git a/pandas/conftest.py b/pandas/conftest.py index 479471332a274..f450193d9388e 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1,12 +1,12 @@ import importlib import os -from dateutil.tz import tzutc +from dateutil.tz import tzlocal, tzutc import hypothesis from hypothesis import strategies as st import numpy as np import pytest -from pytz import utc +from pytz import FixedOffset, utc from pandas.compat import PY3 import pandas.util._test_decorators as td @@ -245,7 +245,7 @@ def datetime_tz_utc(): return timezone.utc -utc_objs = ['utc', utc, tzutc()] +utc_objs = ['utc', 'dateutil/UTC', utc, tzutc()] if PY3: from datetime import timezone utc_objs.append(timezone.utc) @@ -354,7 +354,8 @@ def unique_nulls_fixture(request): TIMEZONES = [None, 'UTC', 'US/Eastern', 'Asia/Tokyo', 'dateutil/US/Pacific', - 'dateutil/Asia/Singapore'] + 'dateutil/Asia/Singapore', tzutc(), tzlocal(), FixedOffset(300), + FixedOffset(0), FixedOffset(-300)] @td.parametrize_fixture_doc(str(TIMEZONES)) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 2187ff28844a0..cdc4f7e61e7ca 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -790,7 +790,8 @@ def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', if self.tz is not None: if tz is None: - new_dates = conversion.tz_convert(self.asi8, 'UTC', self.tz) + new_dates = conversion.tz_convert(self.asi8, timezones.UTC, + self.tz) else: raise TypeError("Already tz-aware, use tz_convert to convert.") else: diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 4dfefdec031b2..bd7c6630c7c5d 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -5,7 +5,7 @@ import numpy as np from pandas._libs.interval import Interval -from pandas._libs.tslibs import NaT, Period, Timestamp +from pandas._libs.tslibs import NaT, Period, Timestamp, timezones from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCIndexClass @@ -516,7 +516,7 @@ def __new__(cls, unit=None, tz=None): m = cls._match.search(unit) if m is not None: unit = m.groupdict()['unit'] - tz = m.groupdict()['tz'] + tz = timezones.maybe_get_tz(m.groupdict()['tz']) except TypeError: raise ValueError("could not construct DatetimeTZDtype") diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 4b2c07af6af68..02755c7e58a1d 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -2,6 +2,7 @@ from functools import partial from operator import attrgetter +import dateutil import numpy as np import pytest import pytz @@ -527,6 +528,12 @@ def test_construction_with_tz_and_tz_aware_dti(self): with pytest.raises(TypeError): DatetimeIndex(dti, tz='Asia/Tokyo') + def test_construction_with_nat_and_tzlocal(self): + tz = dateutil.tz.tzlocal() + result = DatetimeIndex(['2018', 'NaT'], tz=tz) + expected = DatetimeIndex([Timestamp('2018', tz=tz), pd.NaT]) + tm.assert_index_equal(result, expected) + class TestTimeSeries(object): diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 9ad540b174438..52f4f3c309038 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -774,10 +774,7 @@ def test_time_accessor(self, dtype): def test_timetz_accessor(self, tz_naive_fixture): # GH21358 - if tz_naive_fixture is not None: - tz = dateutil.tz.gettz(tz_naive_fixture) - else: - tz = None + tz = timezones.maybe_get_tz(tz_naive_fixture) expected = np.array([time(10, 20, 30, tzinfo=tz), pd.NaT]) diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 2d5c8f77dd338..b2c05d1564a48 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -589,6 +589,11 @@ def test_depreciate_tz_and_tzinfo_in_datetime_input(self, box): with tm.assert_produces_warning(FutureWarning): Timestamp(box(**kwargs), tz='US/Pacific') + def test_dont_convert_dateutil_utc_to_pytz_utc(self): + result = Timestamp(datetime(2018, 1, 1), tz=tzutc()) + expected = Timestamp(datetime(2018, 1, 1)).tz_localize(tzutc()) + assert result == expected + class TestTimestamp(object): @@ -612,7 +617,7 @@ def test_tz(self): assert conv.hour == 19 def test_utc_z_designator(self): - assert get_timezone(Timestamp('2014-11-02 01:00Z').tzinfo) == 'UTC' + assert get_timezone(Timestamp('2014-11-02 01:00Z').tzinfo) is utc def test_asm8(self): np.random.seed(7960929) diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 72e4fd42ae15a..c02dc1083c366 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -11,6 +11,7 @@ import pytz from pytz.exceptions import AmbiguousTimeError, NonExistentTimeError +from pandas._libs.tslibs import timezones from pandas.errors import OutOfBoundsDatetime import pandas.util._test_decorators as td @@ -342,10 +343,7 @@ def test_timestamp_add_timedelta_push_over_dst_boundary(self, tz): def test_timestamp_timetz_equivalent_with_datetime_tz(self, tz_naive_fixture): # GH21358 - if tz_naive_fixture is not None: - tz = dateutil.tz.gettz(tz_naive_fixture) - else: - tz = None + tz = timezones.maybe_get_tz(tz_naive_fixture) stamp = Timestamp('2018-06-04 10:20:30', tz=tz) _datetime = datetime(2018, 6, 4, hour=10, diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index a5a7cc2217864..e01d91661c2e6 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -338,7 +338,7 @@ def test_describe(self): def test_describe_with_tz(self, tz_naive_fixture): # GH 21332 tz = tz_naive_fixture - name = tz_naive_fixture + name = str(tz_naive_fixture) start = Timestamp(2018, 1, 1) end = Timestamp(2018, 1, 5) s = Series(date_range(start, end, tz=tz), name=name) diff --git a/pandas/tests/tslibs/test_conversion.py b/pandas/tests/tslibs/test_conversion.py index de36c0bb2f789..fde1d1718a2a2 100644 --- a/pandas/tests/tslibs/test_conversion.py +++ b/pandas/tests/tslibs/test_conversion.py @@ -2,6 +2,7 @@ import numpy as np import pytest +from pytz import UTC from pandas._libs.tslib import iNaT from pandas._libs.tslibs import conversion, timezones @@ -11,15 +12,15 @@ def compare_utc_to_local(tz_didx, utc_didx): - f = lambda x: conversion.tz_convert_single(x, 'UTC', tz_didx.tz) - result = conversion.tz_convert(tz_didx.asi8, 'UTC', tz_didx.tz) + f = lambda x: conversion.tz_convert_single(x, UTC, tz_didx.tz) + result = conversion.tz_convert(tz_didx.asi8, UTC, tz_didx.tz) result_single = np.vectorize(f)(tz_didx.asi8) tm.assert_numpy_array_equal(result, result_single) def compare_local_to_utc(tz_didx, utc_didx): - f = lambda x: conversion.tz_convert_single(x, tz_didx.tz, 'UTC') - result = conversion.tz_convert(utc_didx.asi8, tz_didx.tz, 'UTC') + f = lambda x: conversion.tz_convert_single(x, tz_didx.tz, UTC) + result = conversion.tz_convert(utc_didx.asi8, tz_didx.tz, UTC) result_single = np.vectorize(f)(utc_didx.asi8) tm.assert_numpy_array_equal(result, result_single) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 95904fab05322..97ef91a02dfb8 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -17,6 +17,7 @@ from pandas._libs.tslibs.offsets import _offset_to_period_map # noqa:E402 import pandas._libs.tslibs.resolution as libresolution from pandas._libs.tslibs.resolution import Resolution +from pandas._libs.tslibs.timezones import UTC import pandas.compat as compat from pandas.compat import zip from pandas.util._decorators import cache_readonly @@ -287,7 +288,7 @@ def __init__(self, index, warn=True): # the timezone so they are in local time if hasattr(index, 'tz'): if index.tz is not None: - self.values = tz_convert(self.values, 'UTC', index.tz) + self.values = tz_convert(self.values, UTC, index.tz) self.warn = warn diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index ca81b3bcfef2a..fedcf46277515 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -9,7 +9,7 @@ from pandas._libs.tslibs import ( NaT, OutOfBoundsDatetime, Timedelta, Timestamp, ccalendar, conversion, delta_to_nanoseconds, frequencies as libfrequencies, normalize_date, - offsets as liboffsets) + offsets as liboffsets, timezones) from pandas._libs.tslibs.offsets import ( ApplyTypeError, BaseOffset, _get_calendar, _is_normalized, _to_dt64, apply_index_wraps, as_datetime, roll_yearday, shift_month) @@ -81,7 +81,7 @@ def wrapper(self, other): if result.tz is not None: # convert to UTC value = conversion.tz_convert_single( - result.value, 'UTC', result.tz) + result.value, timezones.UTC, result.tz) else: value = result.value result = Timestamp(value + nano)