Skip to content

Commit 5a372d8

Browse files
authored
API: default to stdlib timezone objects for fixed-offsets (pandas-dev#49677)
* API: default to stdlib timezone objects for fixed-offsets * update docstrings * flesh out whatsnew * handle strings * skip on windows
1 parent 5ee4dac commit 5a372d8

26 files changed

+175
-94
lines changed

doc/source/whatsnew/v2.0.0.rst

+34
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,40 @@ The new behavior, as for datetime64, either gives exactly the requested dtype or
312312
ser.astype("timedelta64[s]")
313313
ser.astype("timedelta64[D]")
314314
315+
.. _whatsnew_200.api_breaking.default_to_stdlib_tzinfos:
316+
317+
UTC and fixed-offset timezones default to standard-library tzinfo objects
318+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
319+
In previous versions, the default ``tzinfo`` object used to represent UTC
320+
was ``pytz.UTC``. In pandas 2.0, we default to ``datetime.timezone.utc`` instead.
321+
Similarly, for timezones represent fixed UTC offsets, we use ``datetime.timezone``
322+
objects instead of ``pytz.FixedOffset`` objects. See (:issue:`34916`)
323+
324+
*Previous behavior*:
325+
326+
.. code-block:: ipython
327+
328+
In [2]: ts = pd.Timestamp("2016-01-01", tz="UTC")
329+
In [3]: type(ts.tzinfo)
330+
Out[3]: pytz.UTC
331+
332+
In [4]: ts2 = pd.Timestamp("2016-01-01 04:05:06-07:00")
333+
In [3]: type(ts2.tzinfo)
334+
Out[5]: pytz._FixedOffset
335+
336+
*New behavior*:
337+
338+
.. ipython:: python
339+
340+
ts = pd.Timestamp("2016-01-01", tz="UTC")
341+
type(ts.tzinfo)
342+
343+
ts2 = pd.Timestamp("2016-01-01 04:05:06-07:00")
344+
type(ts2.tzinfo)
345+
346+
For timezones that are neither UTC nor fixed offsets, e.g. "US/Pacific", we
347+
continue to default to ``pytz`` objects.
348+
315349
.. _whatsnew_200.api_breaking.zero_len_indexes:
316350

317351
Empty DataFrames/Series will now default to have a ``RangeIndex``

pandas/_libs/tslib.pyx

+8-6
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
cimport cython
2+
3+
from datetime import timezone
4+
25
from cpython.datetime cimport (
36
PyDate_Check,
47
PyDateTime_Check,
58
datetime,
69
import_datetime,
10+
timedelta,
711
tzinfo,
812
)
913
from cpython.object cimport PyObject
@@ -23,8 +27,6 @@ import numpy as np
2327

2428
cnp.import_array()
2529

26-
import pytz
27-
2830
from pandas._libs.tslibs.np_datetime cimport (
2931
NPY_DATETIMEUNIT,
3032
NPY_FR_ns,
@@ -95,7 +97,7 @@ def _test_parse_iso8601(ts: str):
9597
obj.value = npy_datetimestruct_to_datetime(NPY_FR_ns, &obj.dts)
9698
check_dts_bounds(&obj.dts)
9799
if out_local == 1:
98-
obj.tzinfo = pytz.FixedOffset(out_tzoffset)
100+
obj.tzinfo = timezone(timedelta(minutes=out_tzoffset))
99101
obj.value = tz_localize_to_utc_single(obj.value, obj.tzinfo)
100102
return Timestamp(obj.value, tz=obj.tzinfo)
101103
else:
@@ -460,7 +462,7 @@ cpdef array_to_datetime(
460462
2) datetime.datetime objects, if OutOfBoundsDatetime or TypeError
461463
is encountered
462464
463-
Also returns a pytz.FixedOffset if an array of strings with the same
465+
Also returns a fixed-offset tzinfo object if an array of strings with the same
464466
timezone offset is passed and utc=True is not passed. Otherwise, None
465467
is returned
466468
@@ -650,7 +652,7 @@ cpdef array_to_datetime(
650652
# since we store the total_seconds of
651653
# dateutil.tz.tzoffset objects
652654
out_tzoffset_vals.add(out_tzoffset * 60.)
653-
tz = pytz.FixedOffset(out_tzoffset)
655+
tz = timezone(timedelta(minutes=out_tzoffset))
654656
value = tz_localize_to_utc_single(value, tz)
655657
out_local = 0
656658
out_tzoffset = 0
@@ -718,7 +720,7 @@ cpdef array_to_datetime(
718720
return _array_to_datetime_object(values, errors, dayfirst, yearfirst)
719721
else:
720722
tz_offset = out_tzoffset_vals.pop()
721-
tz_out = pytz.FixedOffset(tz_offset / 60.)
723+
tz_out = timezone(timedelta(seconds=tz_offset))
722724
return result, tz_out
723725

724726

pandas/_libs/tslibs/conversion.pyx

+4-3
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,17 @@ from numpy cimport (
88

99
cnp.import_array()
1010

11-
import pytz
12-
1311
# stdlib datetime imports
1412

13+
from datetime import timezone
14+
1515
from cpython.datetime cimport (
1616
PyDate_Check,
1717
PyDateTime_Check,
1818
datetime,
1919
import_datetime,
2020
time,
21+
timedelta,
2122
tzinfo,
2223
)
2324

@@ -428,7 +429,7 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts,
428429

429430
value = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts)
430431
obj.dts = dts
431-
obj.tzinfo = pytz.FixedOffset(tzoffset)
432+
obj.tzinfo = timezone(timedelta(minutes=tzoffset))
432433
obj.value = tz_localize_to_utc_single(value, obj.tzinfo)
433434
if tz is None:
434435
check_overflows(obj, NPY_FR_ns)

pandas/_libs/tslibs/strptime.pyx

+12-9
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
"""Strptime-related classes and functions.
22
"""
3+
from datetime import timezone
4+
35
from cpython.datetime cimport (
46
PyDate_Check,
57
PyDateTime_Check,
68
date,
79
import_datetime,
10+
timedelta,
811
tzinfo,
912
)
1013

@@ -96,7 +99,7 @@ def array_strptime(
9699
int week_of_year, week_of_year_start, parse_code, ordinal
97100
int iso_week, iso_year
98101
int64_t us, ns
99-
object val, group_key, ampm, found, timezone
102+
object val, group_key, ampm, found, tz
100103
bint is_raise = errors=="raise"
101104
bint is_ignore = errors=="ignore"
102105
bint is_coerce = errors=="coerce"
@@ -214,7 +217,7 @@ def array_strptime(
214217
year = 1900
215218
month = day = 1
216219
hour = minute = second = ns = us = 0
217-
timezone = None
220+
tz = None
218221
# Default to -1 to signify that values not known; not critical to have,
219222
# though
220223
iso_week = week_of_year = -1
@@ -304,9 +307,9 @@ def array_strptime(
304307
# W starts week on Monday.
305308
week_of_year_start = 0
306309
elif parse_code == 17:
307-
timezone = pytz.timezone(found_dict["Z"])
310+
tz = pytz.timezone(found_dict["Z"])
308311
elif parse_code == 19:
309-
timezone = parse_timezone_directive(found_dict["z"])
312+
tz = parse_timezone_directive(found_dict["z"])
310313
elif parse_code == 20:
311314
iso_year = int(found_dict["G"])
312315
elif parse_code == 21:
@@ -388,7 +391,7 @@ def array_strptime(
388391
continue
389392
raise
390393

391-
result_timezone[i] = timezone
394+
result_timezone[i] = tz
392395

393396
return result, result_timezone.base
394397

@@ -538,15 +541,15 @@ cdef (int, int) _calc_julian_from_V(int iso_year, int iso_week, int iso_weekday)
538541

539542
cdef tzinfo parse_timezone_directive(str z):
540543
"""
541-
Parse the '%z' directive and return a pytz.FixedOffset
544+
Parse the '%z' directive and return a datetime.timezone object.
542545
543546
Parameters
544547
----------
545548
z : string of the UTC offset
546549
547550
Returns
548551
-------
549-
pytz.FixedOffset
552+
datetime.timezone
550553
551554
Notes
552555
-----
@@ -560,7 +563,7 @@ cdef tzinfo parse_timezone_directive(str z):
560563
object gmtoff_remainder, gmtoff_remainder_padding
561564

562565
if z == "Z":
563-
return pytz.FixedOffset(0)
566+
return timezone(timedelta(0))
564567
if z[3] == ":":
565568
z = z[:3] + z[4:]
566569
if len(z) > 5:
@@ -580,4 +583,4 @@ cdef tzinfo parse_timezone_directive(str z):
580583
total_minutes = ((hours * 60) + minutes + (seconds // 60) +
581584
(microseconds // 60_000_000))
582585
total_minutes = -total_minutes if z.startswith("-") else total_minutes
583-
return pytz.FixedOffset(total_minutes)
586+
return timezone(timedelta(minutes=total_minutes))

pandas/_libs/tslibs/timestamps.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ from pandas._libs.tslibs.timezones cimport (
111111
is_utc,
112112
maybe_get_tz,
113113
treat_tz_as_pytz,
114-
utc_pytz as UTC,
114+
utc_stdlib as UTC,
115115
)
116116
from pandas._libs.tslibs.tzconversion cimport (
117117
tz_convert_from_utc_single,

pandas/_libs/tslibs/timezones.pxd

-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ from cpython.datetime cimport (
55
)
66

77

8-
cdef tzinfo utc_pytz
98
cdef tzinfo utc_stdlib
109

1110
cpdef bint is_utc(tzinfo tz)

pandas/_libs/tslibs/timezones.pyx

+5-7
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,10 @@ from dateutil.tz import (
2727
tzlocal as _dateutil_tzlocal,
2828
tzutc as _dateutil_tzutc,
2929
)
30+
import numpy as np
3031
import pytz
3132
from pytz.tzinfo import BaseTzInfo as _pytz_BaseTzInfo
3233

33-
UTC = pytz.utc
34-
35-
36-
import numpy as np
37-
3834
cimport numpy as cnp
3935
from numpy cimport int64_t
4036

@@ -49,7 +45,7 @@ from pandas._libs.tslibs.util cimport (
4945

5046
cdef int64_t NPY_NAT = get_nat()
5147
cdef tzinfo utc_stdlib = timezone.utc
52-
cdef tzinfo utc_pytz = UTC
48+
cdef tzinfo utc_pytz = pytz.utc
5349
cdef tzinfo utc_dateutil_str = dateutil_gettz("UTC") # NB: *not* the same as tzutc()
5450

5551
cdef tzinfo utc_zoneinfo = None
@@ -168,10 +164,12 @@ cpdef inline tzinfo maybe_get_tz(object tz):
168164
hours = int(tz[3:6])
169165
minutes = int(tz[3] + tz[7:9])
170166
tz = timezone(timedelta(hours=hours, minutes=minutes))
167+
elif tz == "UTC" or tz == "utc":
168+
tz = utc_stdlib
171169
else:
172170
tz = pytz.timezone(tz)
173171
elif is_integer_object(tz):
174-
tz = pytz.FixedOffset(tz / 60)
172+
tz = timezone(timedelta(seconds=tz))
175173
elif isinstance(tz, tzinfo):
176174
pass
177175
elif tz is None:

pandas/_libs/tslibs/tzconversion.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -738,7 +738,7 @@ cdef datetime _astimezone(npy_datetimestruct dts, tzinfo tz):
738738
Optimized equivalent to:
739739
740740
dt = datetime(dts.year, dts.month, dts.day, dts.hour,
741-
dts.min, dts.sec, dts.us, utc_pytz)
741+
dts.min, dts.sec, dts.us, utc_stdlib)
742742
dt = dt.astimezone(tz)
743743
744744
Derived from the datetime.astimezone implementation at

pandas/core/tools/datetimes.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -991,7 +991,7 @@ def to_datetime(
991991
992992
>>> pd.to_datetime(['2018-10-26 12:00 -0500', '2018-10-26 13:00 -0500'])
993993
DatetimeIndex(['2018-10-26 12:00:00-05:00', '2018-10-26 13:00:00-05:00'],
994-
dtype='datetime64[ns, pytz.FixedOffset(-300)]', freq=None)
994+
dtype='datetime64[ns, UTC-05:00]', freq=None)
995995
996996
- However, timezone-aware inputs *with mixed time offsets* (for example
997997
issued from a timezone with daylight savings, such as Europe/Paris)
@@ -1010,7 +1010,7 @@ def to_datetime(
10101010
>>> from datetime import datetime
10111011
>>> pd.to_datetime(["2020-01-01 01:00 -01:00", datetime(2020, 1, 1, 3, 0)])
10121012
DatetimeIndex(['2020-01-01 01:00:00-01:00', '2020-01-01 02:00:00-01:00'],
1013-
dtype='datetime64[ns, pytz.FixedOffset(-60)]', freq=None)
1013+
dtype='datetime64[ns, UTC-01:00]', freq=None)
10141014
10151015
|
10161016

pandas/io/json/_table_schema.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import warnings
1414

1515
from pandas._libs.json import loads
16+
from pandas._libs.tslibs import timezones
1617
from pandas._typing import (
1718
DtypeObj,
1819
JSONSerializable,
@@ -140,7 +141,11 @@ def convert_pandas_type_to_json_field(arr) -> dict[str, JSONSerializable]:
140141
elif is_period_dtype(dtype):
141142
field["freq"] = dtype.freq.freqstr
142143
elif is_datetime64tz_dtype(dtype):
143-
field["tz"] = dtype.tz.zone
144+
if timezones.is_utc(dtype.tz):
145+
# timezone.utc has no "zone" attr
146+
field["tz"] = "UTC"
147+
else:
148+
field["tz"] = dtype.tz.zone
144149
elif is_extension_array_dtype(dtype):
145150
field["extDtype"] = dtype.name
146151
return field

pandas/tests/frame/methods/test_align.py

+8-7
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1+
from datetime import timezone
2+
13
import numpy as np
24
import pytest
3-
import pytz
45

56
import pandas as pd
67
from pandas import (
@@ -27,17 +28,17 @@ def test_frame_align_aware(self):
2728
# frame with frame
2829
df1_central = df1.tz_convert("US/Central")
2930
new1, new2 = df1.align(df1_central)
30-
assert new1.index.tz == pytz.UTC
31-
assert new2.index.tz == pytz.UTC
31+
assert new1.index.tz is timezone.utc
32+
assert new2.index.tz is timezone.utc
3233

3334
# frame with Series
3435
new1, new2 = df1.align(df1_central[0], axis=0)
35-
assert new1.index.tz == pytz.UTC
36-
assert new2.index.tz == pytz.UTC
36+
assert new1.index.tz is timezone.utc
37+
assert new2.index.tz is timezone.utc
3738

3839
df1[0].align(df1_central, axis=0)
39-
assert new1.index.tz == pytz.UTC
40-
assert new2.index.tz == pytz.UTC
40+
assert new1.index.tz is timezone.utc
41+
assert new2.index.tz is timezone.utc
4142

4243
def test_align_float(self, float_frame):
4344
af, bf = float_frame.align(float_frame)

pandas/tests/frame/methods/test_tz_localize.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from datetime import timezone
2+
13
import numpy as np
24
import pytest
35

@@ -23,7 +25,7 @@ def test_tz_localize(self, frame_or_series):
2325
expected = DataFrame({"a": 1}, rng.tz_localize("UTC"))
2426
expected = tm.get_obj(expected, frame_or_series)
2527

26-
assert result.index.tz.zone == "UTC"
28+
assert result.index.tz is timezone.utc
2729
tm.assert_equal(result, expected)
2830

2931
def test_tz_localize_axis1(self):
@@ -33,7 +35,7 @@ def test_tz_localize_axis1(self):
3335

3436
df = df.T
3537
result = df.tz_localize("utc", axis=1)
36-
assert result.columns.tz.zone == "UTC"
38+
assert result.columns.tz is timezone.utc
3739

3840
expected = DataFrame({"a": 1}, rng.tz_localize("UTC"))
3941

0 commit comments

Comments
 (0)