Skip to content

Commit 03db912

Browse files
mroeschkePingviinituutti
authored andcommitted
BUG/TST: Add more timezone fixtures and use is_utc more consistently (pandas-dev#23807)
1 parent 34ca1c3 commit 03db912

File tree

17 files changed

+78
-50
lines changed

17 files changed

+78
-50
lines changed

doc/source/whatsnew/v0.24.0.rst

+3
Original file line numberDiff line numberDiff line change
@@ -1249,6 +1249,9 @@ Timezones
12491249
- Bug when indexing a :class:`Series` with a DST transition (:issue:`21846`)
12501250
- Bug in :meth:`DataFrame.resample` and :meth:`Series.resample` where an ``AmbiguousTimeError`` or ``NonExistentTimeError`` would raise if a timezone aware timeseries ended on a DST transition (:issue:`19375`, :issue:`10117`)
12511251
- Bug in :meth:`DataFrame.drop` and :meth:`Series.drop` when specifying a tz-aware Timestamp key to drop from a :class:`DatetimeIndex` with a DST transition (:issue:`21761`)
1252+
- Bug in :class:`DatetimeIndex` constructor where :class:`NaT` and ``dateutil.tz.tzlocal`` would raise an ``OutOfBoundsDatetime`` error (:issue:`23807`)
1253+
- Bug in :meth:`DatetimeIndex.tz_localize` and :meth:`Timestamp.tz_localize` with ``dateutil.tz.tzlocal`` near a DST transition that would return an incorrectly localized datetime (:issue:`23807`)
1254+
- Bug in :class:`Timestamp` constructor where a ``dateutil.tz.tzutc`` timezone passed with a ``datetime.datetime`` argument would be converted to a ``pytz.UTC`` timezone (:issue:`23807`)
12521255

12531256
Offsets
12541257
^^^^^^^

pandas/_libs/tslib.pyx

+3-2
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ from tslibs.parsing import parse_datetime_string
3333

3434
from tslibs.timedeltas cimport cast_from_unit
3535
from tslibs.timezones cimport is_utc, is_tzlocal, get_dst_info
36+
from tslibs.timezones import UTC
3637
from tslibs.conversion cimport (tz_convert_single, _TSObject,
3738
convert_datetime_to_tsobject,
3839
get_datetime64_nanos,
@@ -211,7 +212,7 @@ def _test_parse_iso8601(object ts):
211212
check_dts_bounds(&obj.dts)
212213
if out_local == 1:
213214
obj.tzinfo = pytz.FixedOffset(out_tzoffset)
214-
obj.value = tz_convert_single(obj.value, obj.tzinfo, 'UTC')
215+
obj.value = tz_convert_single(obj.value, obj.tzinfo, UTC)
215216
return Timestamp(obj.value, tz=obj.tzinfo)
216217
else:
217218
return Timestamp(obj.value)
@@ -673,7 +674,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
673674
# dateutil.tz.tzoffset objects
674675
out_tzoffset_vals.add(out_tzoffset * 60.)
675676
tz = pytz.FixedOffset(out_tzoffset)
676-
value = tz_convert_single(value, tz, 'UTC')
677+
value = tz_convert_single(value, tz, UTC)
677678
else:
678679
# Add a marker for naive string, to track if we are
679680
# parsing mixed naive and aware strings

pandas/_libs/tslibs/conversion.pyx

+29-18
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ from numpy cimport int64_t, int32_t, ndarray
88
cnp.import_array()
99

1010
import pytz
11+
from dateutil.tz import tzutc
1112

1213
# stdlib datetime imports
1314
from datetime import time as datetime_time
@@ -35,6 +36,7 @@ from timedeltas cimport cast_from_unit
3536
from timezones cimport (is_utc, is_tzlocal, is_fixed_offset,
3637
get_utcoffset, get_dst_info,
3738
get_timezone, maybe_get_tz, tz_compare)
39+
from timezones import UTC
3840
from parsing import parse_datetime_string
3941

4042
from nattype import nat_strings, NaT
@@ -46,8 +48,6 @@ from nattype cimport NPY_NAT, checknull_with_nat
4648
NS_DTYPE = np.dtype('M8[ns]')
4749
TD_DTYPE = np.dtype('m8[ns]')
4850

49-
UTC = pytz.UTC
50-
5151

5252
# ----------------------------------------------------------------------
5353
# Misc Helpers
@@ -362,7 +362,7 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz,
362362
else:
363363
# UTC
364364
obj.value = pydatetime_to_dt64(ts, &obj.dts)
365-
obj.tzinfo = pytz.utc
365+
obj.tzinfo = tz
366366
else:
367367
obj.value = pydatetime_to_dt64(ts, &obj.dts)
368368
obj.tzinfo = ts.tzinfo
@@ -442,7 +442,7 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit,
442442
check_dts_bounds(&obj.dts)
443443
if out_local == 1:
444444
obj.tzinfo = pytz.FixedOffset(out_tzoffset)
445-
obj.value = tz_convert_single(obj.value, obj.tzinfo, 'UTC')
445+
obj.value = tz_convert_single(obj.value, obj.tzinfo, UTC)
446446
if tz is None:
447447
check_dts_bounds(&obj.dts)
448448
check_overflows(obj)
@@ -576,8 +576,6 @@ cdef inline datetime _localize_pydatetime(datetime dt, tzinfo tz):
576576
identically, i.e. discards nanos from Timestamps.
577577
It also assumes that the `tz` input is not None.
578578
"""
579-
if tz == 'UTC' or tz is UTC:
580-
return UTC.localize(dt)
581579
try:
582580
# datetime.replace with pytz may be incorrect result
583581
return tz.localize(dt)
@@ -603,8 +601,8 @@ cpdef inline datetime localize_pydatetime(datetime dt, object tz):
603601
elif not PyDateTime_CheckExact(dt):
604602
# i.e. is a Timestamp
605603
return dt.tz_localize(tz)
606-
elif tz == 'UTC' or tz is UTC:
607-
return UTC.localize(dt)
604+
elif is_utc(tz):
605+
return _localize_pydatetime(dt, tz)
608606
try:
609607
# datetime.replace with pytz may be incorrect result
610608
return tz.localize(dt)
@@ -642,15 +640,20 @@ cdef inline int64_t[:] _tz_convert_dst(int64_t[:] values, tzinfo tz,
642640
int64_t[:] deltas
643641
int64_t v
644642

645-
trans, deltas, typ = get_dst_info(tz)
646-
if not to_utc:
647-
# We add `offset` below instead of subtracting it
648-
deltas = -1 * np.array(deltas, dtype='i8')
643+
if not is_tzlocal(tz):
644+
# get_dst_info cannot extract offsets from tzlocal because its
645+
# dependent on a datetime
646+
trans, deltas, typ = get_dst_info(tz)
647+
if not to_utc:
648+
# We add `offset` below instead of subtracting it
649+
deltas = -1 * np.array(deltas, dtype='i8')
649650

650651
for i in range(n):
651652
v = values[i]
652653
if v == NPY_NAT:
653654
result[i] = v
655+
elif is_tzlocal(tz):
656+
result[i] = _tz_convert_tzlocal_utc(v, tz, to_utc=to_utc)
654657
else:
655658
# TODO: Is it more efficient to call searchsorted pointwise or
656659
# on `values` outside the loop? We are not consistent about this.
@@ -689,7 +692,12 @@ cdef inline int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz,
689692

690693
dt64_to_dtstruct(val, &dts)
691694
dt = datetime(dts.year, dts.month, dts.day, dts.hour,
692-
dts.min, dts.sec, dts.us, tz)
695+
dts.min, dts.sec, dts.us)
696+
# get_utcoffset (tz.utcoffset under the hood) only makes sense if datetime
697+
# is _wall time_, so if val is a UTC timestamp convert to wall time
698+
if not to_utc:
699+
dt = dt.replace(tzinfo=tzutc())
700+
dt = dt.astimezone(tz)
693701
delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000
694702

695703
if not to_utc:
@@ -735,21 +743,21 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2):
735743
int64_t arr[1]
736744

737745
# See GH#17734 We should always be converting either from UTC or to UTC
738-
assert (is_utc(tz1) or tz1 == 'UTC') or (is_utc(tz2) or tz2 == 'UTC')
746+
assert is_utc(tz1) or is_utc(tz2)
739747

740748
if val == NPY_NAT:
741749
return val
742750

743751
# Convert to UTC
744752
if is_tzlocal(tz1):
745753
utc_date = _tz_convert_tzlocal_utc(val, tz1, to_utc=True)
746-
elif get_timezone(tz1) != 'UTC':
754+
elif not is_utc(get_timezone(tz1)):
747755
arr[0] = val
748756
utc_date = _tz_convert_dst(arr, tz1, to_utc=True)[0]
749757
else:
750758
utc_date = val
751759

752-
if get_timezone(tz2) == 'UTC':
760+
if is_utc(get_timezone(tz2)):
753761
return utc_date
754762
elif is_tzlocal(tz2):
755763
return _tz_convert_tzlocal_utc(utc_date, tz2, to_utc=False)
@@ -785,7 +793,7 @@ cdef inline int64_t[:] _tz_convert_one_way(int64_t[:] vals, object tz,
785793
Py_ssize_t i, n = len(vals)
786794
int64_t val
787795

788-
if get_timezone(tz) != 'UTC':
796+
if not is_utc(get_timezone(tz)):
789797
converted = np.empty(n, dtype=np.int64)
790798
if is_tzlocal(tz):
791799
for i in range(n):
@@ -890,7 +898,10 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
890898
if is_tzlocal(tz):
891899
for i in range(n):
892900
v = vals[i]
893-
result[i] = _tz_convert_tzlocal_utc(v, tz, to_utc=True)
901+
if v == NPY_NAT:
902+
result[i] = NPY_NAT
903+
else:
904+
result[i] = _tz_convert_tzlocal_utc(v, tz, to_utc=True)
894905
return result
895906

896907
if is_string_object(ambiguous):

pandas/_libs/tslibs/offsets.pyx

+2-1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ from conversion cimport tz_convert_single, pydt_to_i8, localize_pydatetime
2626
from nattype cimport NPY_NAT
2727
from np_datetime cimport (npy_datetimestruct,
2828
dtstruct_to_dt64, dt64_to_dtstruct)
29+
from timezones import UTC
2930

3031
# ---------------------------------------------------------------------
3132
# Constants
@@ -211,7 +212,7 @@ def _to_dt64(dt, dtype='datetime64'):
211212
# Thus astype is needed to cast datetime to datetime64[D]
212213
if getattr(dt, 'tzinfo', None) is not None:
213214
i8 = pydt_to_i8(dt)
214-
dt = tz_convert_single(i8, 'UTC', dt.tzinfo)
215+
dt = tz_convert_single(i8, UTC, dt.tzinfo)
215216
dt = np.int64(dt).astype('datetime64[ns]')
216217
else:
217218
dt = np.datetime64(dt)

pandas/_libs/tslibs/timestamps.pyx

+5-4
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ from timedeltas import Timedelta
3636
from timedeltas cimport delta_to_nanoseconds
3737
from timezones cimport (
3838
get_timezone, is_utc, maybe_get_tz, treat_tz_as_pytz, tz_compare)
39+
from timezones import UTC
3940

4041
# ----------------------------------------------------------------------
4142
# Constants
@@ -416,7 +417,7 @@ cdef class _Timestamp(datetime):
416417
int64_t val
417418
val = self.value
418419
if self.tz is not None and not is_utc(self.tz):
419-
val = tz_convert_single(self.value, 'UTC', self.tz)
420+
val = tz_convert_single(self.value, UTC, self.tz)
420421
return val
421422

422423
cpdef bint _get_start_end_field(self, str field):
@@ -633,7 +634,7 @@ class Timestamp(_Timestamp):
633634
634635
Return a new Timestamp representing UTC day and time.
635636
"""
636-
return cls.now('UTC')
637+
return cls.now(UTC)
637638

638639
@classmethod
639640
def utcfromtimestamp(cls, ts):
@@ -1108,7 +1109,7 @@ class Timestamp(_Timestamp):
11081109
else:
11091110
if tz is None:
11101111
# reset tz
1111-
value = tz_convert_single(self.value, 'UTC', self.tz)
1112+
value = tz_convert_single(self.value, UTC, self.tz)
11121113
return Timestamp(value, tz=None)
11131114
else:
11141115
raise TypeError('Cannot localize tz-aware Timestamp, use '
@@ -1178,7 +1179,7 @@ class Timestamp(_Timestamp):
11781179
_tzinfo = self.tzinfo
11791180
value = self.value
11801181
if _tzinfo is not None:
1181-
value_tz = tz_convert_single(value, _tzinfo, 'UTC')
1182+
value_tz = tz_convert_single(value, _tzinfo, UTC)
11821183
value += value - value_tz
11831184

11841185
# setup components

pandas/_libs/tslibs/timezones.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ cpdef inline object get_timezone(object tz):
5858
UJSON/pytables. maybe_get_tz (below) is the inverse of this process.
5959
"""
6060
if is_utc(tz):
61-
return 'UTC'
61+
return tz
6262
else:
6363
if treat_tz_as_dateutil(tz):
6464
if '.tar.gz' in tz._filename:

pandas/conftest.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
import importlib
22
import os
33

4-
from dateutil.tz import tzutc
4+
from dateutil.tz import tzlocal, tzutc
55
import hypothesis
66
from hypothesis import strategies as st
77
import numpy as np
88
import pytest
9-
from pytz import utc
9+
from pytz import FixedOffset, utc
1010

1111
from pandas.compat import PY3
1212
import pandas.util._test_decorators as td
@@ -245,7 +245,7 @@ def datetime_tz_utc():
245245
return timezone.utc
246246

247247

248-
utc_objs = ['utc', utc, tzutc()]
248+
utc_objs = ['utc', 'dateutil/UTC', utc, tzutc()]
249249
if PY3:
250250
from datetime import timezone
251251
utc_objs.append(timezone.utc)
@@ -354,7 +354,8 @@ def unique_nulls_fixture(request):
354354

355355

356356
TIMEZONES = [None, 'UTC', 'US/Eastern', 'Asia/Tokyo', 'dateutil/US/Pacific',
357-
'dateutil/Asia/Singapore']
357+
'dateutil/Asia/Singapore', tzutc(), tzlocal(), FixedOffset(300),
358+
FixedOffset(0), FixedOffset(-300)]
358359

359360

360361
@td.parametrize_fixture_doc(str(TIMEZONES))

pandas/core/arrays/datetimes.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -790,7 +790,8 @@ def tz_localize(self, tz, ambiguous='raise', nonexistent='raise',
790790

791791
if self.tz is not None:
792792
if tz is None:
793-
new_dates = conversion.tz_convert(self.asi8, 'UTC', self.tz)
793+
new_dates = conversion.tz_convert(self.asi8, timezones.UTC,
794+
self.tz)
794795
else:
795796
raise TypeError("Already tz-aware, use tz_convert to convert.")
796797
else:

pandas/core/dtypes/dtypes.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import numpy as np
66

77
from pandas._libs.interval import Interval
8-
from pandas._libs.tslibs import NaT, Period, Timestamp
8+
from pandas._libs.tslibs import NaT, Period, Timestamp, timezones
99

1010
from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCIndexClass
1111

@@ -516,7 +516,7 @@ def __new__(cls, unit=None, tz=None):
516516
m = cls._match.search(unit)
517517
if m is not None:
518518
unit = m.groupdict()['unit']
519-
tz = m.groupdict()['tz']
519+
tz = timezones.maybe_get_tz(m.groupdict()['tz'])
520520
except TypeError:
521521
raise ValueError("could not construct DatetimeTZDtype")
522522

pandas/tests/indexes/datetimes/test_construction.py

+7
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from functools import partial
33
from operator import attrgetter
44

5+
import dateutil
56
import numpy as np
67
import pytest
78
import pytz
@@ -527,6 +528,12 @@ def test_construction_with_tz_and_tz_aware_dti(self):
527528
with pytest.raises(TypeError):
528529
DatetimeIndex(dti, tz='Asia/Tokyo')
529530

531+
def test_construction_with_nat_and_tzlocal(self):
532+
tz = dateutil.tz.tzlocal()
533+
result = DatetimeIndex(['2018', 'NaT'], tz=tz)
534+
expected = DatetimeIndex([Timestamp('2018', tz=tz), pd.NaT])
535+
tm.assert_index_equal(result, expected)
536+
530537

531538
class TestTimeSeries(object):
532539

pandas/tests/indexes/datetimes/test_timezones.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -774,10 +774,7 @@ def test_time_accessor(self, dtype):
774774

775775
def test_timetz_accessor(self, tz_naive_fixture):
776776
# GH21358
777-
if tz_naive_fixture is not None:
778-
tz = dateutil.tz.gettz(tz_naive_fixture)
779-
else:
780-
tz = None
777+
tz = timezones.maybe_get_tz(tz_naive_fixture)
781778

782779
expected = np.array([time(10, 20, 30, tzinfo=tz), pd.NaT])
783780

pandas/tests/scalar/timestamp/test_timestamp.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -589,6 +589,11 @@ def test_depreciate_tz_and_tzinfo_in_datetime_input(self, box):
589589
with tm.assert_produces_warning(FutureWarning):
590590
Timestamp(box(**kwargs), tz='US/Pacific')
591591

592+
def test_dont_convert_dateutil_utc_to_pytz_utc(self):
593+
result = Timestamp(datetime(2018, 1, 1), tz=tzutc())
594+
expected = Timestamp(datetime(2018, 1, 1)).tz_localize(tzutc())
595+
assert result == expected
596+
592597

593598
class TestTimestamp(object):
594599

@@ -612,7 +617,7 @@ def test_tz(self):
612617
assert conv.hour == 19
613618

614619
def test_utc_z_designator(self):
615-
assert get_timezone(Timestamp('2014-11-02 01:00Z').tzinfo) == 'UTC'
620+
assert get_timezone(Timestamp('2014-11-02 01:00Z').tzinfo) is utc
616621

617622
def test_asm8(self):
618623
np.random.seed(7960929)

pandas/tests/scalar/timestamp/test_timezones.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import pytz
1212
from pytz.exceptions import AmbiguousTimeError, NonExistentTimeError
1313

14+
from pandas._libs.tslibs import timezones
1415
from pandas.errors import OutOfBoundsDatetime
1516
import pandas.util._test_decorators as td
1617

@@ -342,10 +343,7 @@ def test_timestamp_add_timedelta_push_over_dst_boundary(self, tz):
342343
def test_timestamp_timetz_equivalent_with_datetime_tz(self,
343344
tz_naive_fixture):
344345
# GH21358
345-
if tz_naive_fixture is not None:
346-
tz = dateutil.tz.gettz(tz_naive_fixture)
347-
else:
348-
tz = None
346+
tz = timezones.maybe_get_tz(tz_naive_fixture)
349347

350348
stamp = Timestamp('2018-06-04 10:20:30', tz=tz)
351349
_datetime = datetime(2018, 6, 4, hour=10,

pandas/tests/series/test_analytics.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,7 @@ def test_describe(self):
338338
def test_describe_with_tz(self, tz_naive_fixture):
339339
# GH 21332
340340
tz = tz_naive_fixture
341-
name = tz_naive_fixture
341+
name = str(tz_naive_fixture)
342342
start = Timestamp(2018, 1, 1)
343343
end = Timestamp(2018, 1, 5)
344344
s = Series(date_range(start, end, tz=tz), name=name)

0 commit comments

Comments
 (0)