Skip to content

Commit 1cf98aa

Browse files
authored
REF: Prefer testing and documenting zoneinfo instead of pytz (#59016)
* REF: Prefer testing and documenting zoneinfo instead of pytz * Fix tests * Remove bad test case, fix bad attribute
1 parent bd7ece0 commit 1cf98aa

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+504
-430
lines changed

asv_bench/benchmarks/tslibs/timestamp.py

+9-6
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1-
from datetime import datetime
1+
from datetime import (
2+
datetime,
3+
timezone,
4+
)
5+
import zoneinfo
26

37
import numpy as np
4-
import pytz
58

69
from pandas import Timestamp
710

@@ -12,7 +15,7 @@ class TimestampConstruction:
1215
def setup(self):
1316
self.npdatetime64 = np.datetime64("2020-01-01 00:00:00")
1417
self.dttime_unaware = datetime(2020, 1, 1, 0, 0, 0)
15-
self.dttime_aware = datetime(2020, 1, 1, 0, 0, 0, 0, pytz.UTC)
18+
self.dttime_aware = datetime(2020, 1, 1, 0, 0, 0, 0, timezone.utc)
1619
self.ts = Timestamp("2020-01-01 00:00:00")
1720

1821
def time_parse_iso8601_no_tz(self):
@@ -113,7 +116,7 @@ def setup(self, tz):
113116
self.ts = Timestamp("2017-08-25 08:16:14", tz=tz)
114117

115118
def time_replace_tz(self, tz):
116-
self.ts.replace(tzinfo=pytz.timezone("US/Eastern"))
119+
self.ts.replace(tzinfo=zoneinfo.ZoneInfo("US/Eastern"))
117120

118121
def time_replace_None(self, tz):
119122
self.ts.replace(tzinfo=None)
@@ -144,8 +147,8 @@ def time_ceil(self, tz):
144147

145148
class TimestampAcrossDst:
146149
def setup(self):
147-
dt = datetime(2016, 3, 27, 1)
148-
self.tzinfo = pytz.timezone("CET").localize(dt, is_dst=False).tzinfo
150+
dt = datetime(2016, 3, 27, 1, fold=0)
151+
self.tzinfo = dt.astimezone(zoneinfo.ZoneInfo("Europe/Berlin")).tzinfo
149152
self.ts2 = Timestamp(dt)
150153

151154
def time_replace_across_dst(self):

asv_bench/benchmarks/tslibs/tslib.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,13 @@
2020
timedelta,
2121
timezone,
2222
)
23+
import zoneinfo
2324

2425
from dateutil.tz import (
2526
gettz,
2627
tzlocal,
2728
)
2829
import numpy as np
29-
import pytz
3030

3131
try:
3232
from pandas._libs.tslibs import ints_to_pydatetime
@@ -38,7 +38,7 @@
3838
None,
3939
timezone.utc,
4040
timezone(timedelta(minutes=60)),
41-
pytz.timezone("US/Pacific"),
41+
zoneinfo.ZoneInfo("US/Pacific"),
4242
gettz("Asia/Tokyo"),
4343
tzlocal_obj,
4444
]

asv_bench/benchmarks/tslibs/tz_convert.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1+
from datetime import timezone
2+
13
import numpy as np
2-
from pytz import UTC
34

45
from pandas._libs.tslibs.tzconversion import tz_localize_to_utc
56

@@ -41,7 +42,7 @@ def time_tz_convert_from_utc(self, size, tz):
4142
# dti = DatetimeIndex(self.i8data, tz=tz)
4243
# dti.tz_localize(None)
4344
if old_sig:
44-
tz_convert_from_utc(self.i8data, UTC, tz)
45+
tz_convert_from_utc(self.i8data, timezone.utc, tz)
4546
else:
4647
tz_convert_from_utc(self.i8data, tz)
4748

doc/source/user_guide/io.rst

+4-2
Original file line numberDiff line numberDiff line change
@@ -4990,7 +4990,7 @@ Caveats
49904990
convenience you can use ``store.flush(fsync=True)`` to do this for you.
49914991
* Once a ``table`` is created columns (DataFrame)
49924992
are fixed; only exactly the same columns can be appended
4993-
* Be aware that timezones (e.g., ``pytz.timezone('US/Eastern')``)
4993+
* Be aware that timezones (e.g., ``zoneinfo.ZoneInfo('US/Eastern')``)
49944994
are not necessarily equal across timezone versions. So if data is
49954995
localized to a specific timezone in the HDFStore using one version
49964996
of a timezone library and that data is updated with another version, the data
@@ -5169,6 +5169,8 @@ See the `Full Documentation <https://github.com/wesm/feather>`__.
51695169

51705170
.. ipython:: python
51715171
5172+
import pytz
5173+
51725174
df = pd.DataFrame(
51735175
{
51745176
"a": list("abc"),
@@ -5178,7 +5180,7 @@ See the `Full Documentation <https://github.com/wesm/feather>`__.
51785180
"e": [True, False, True],
51795181
"f": pd.Categorical(list("abc")),
51805182
"g": pd.date_range("20130101", periods=3),
5181-
"h": pd.date_range("20130101", periods=3, tz="US/Eastern"),
5183+
"h": pd.date_range("20130101", periods=3, tz=pytz.timezone("US/Eastern")),
51825184
"i": pd.date_range("20130101", periods=3, freq="ns"),
51835185
}
51845186
)

doc/source/user_guide/timeseries.rst

+9-7
Original file line numberDiff line numberDiff line change
@@ -2337,7 +2337,7 @@ Time zone handling
23372337
------------------
23382338

23392339
pandas provides rich support for working with timestamps in different time
2340-
zones using the ``pytz`` and ``dateutil`` libraries or :class:`datetime.timezone`
2340+
zones using the ``zoneinfo``, ``pytz`` and ``dateutil`` libraries or :class:`datetime.timezone`
23412341
objects from the standard library.
23422342

23432343

@@ -2354,14 +2354,14 @@ By default, pandas objects are time zone unaware:
23542354
To localize these dates to a time zone (assign a particular time zone to a naive date),
23552355
you can use the ``tz_localize`` method or the ``tz`` keyword argument in
23562356
:func:`date_range`, :class:`Timestamp`, or :class:`DatetimeIndex`.
2357-
You can either pass ``pytz`` or ``dateutil`` time zone objects or Olson time zone database strings.
2357+
You can either pass ``zoneinfo``, ``pytz`` or ``dateutil`` time zone objects or Olson time zone database strings.
23582358
Olson time zone strings will return ``pytz`` time zone objects by default.
23592359
To return ``dateutil`` time zone objects, append ``dateutil/`` before the string.
23602360

2361-
* In ``pytz`` you can find a list of common (and less common) time zones using
2362-
``from pytz import common_timezones, all_timezones``.
2361+
* For ``zoneinfo``, a list of available timezones are available from :py:func:`zoneinfo.available_timezones`.
2362+
* In ``pytz`` you can find a list of common (and less common) time zones using ``pytz.all_timezones``.
23632363
* ``dateutil`` uses the OS time zones so there isn't a fixed list available. For
2364-
common zones, the names are the same as ``pytz``.
2364+
common zones, the names are the same as ``pytz`` and ``zoneinfo``.
23652365

23662366
.. ipython:: python
23672367
@@ -2466,7 +2466,7 @@ you can use the ``tz_convert`` method.
24662466

24672467
.. warning::
24682468

2469-
If you are using dates beyond 2038-01-18, due to current deficiencies
2469+
If you are using dates beyond 2038-01-18 with ``pytz``, due to current deficiencies
24702470
in the underlying libraries caused by the year 2038 problem, daylight saving time (DST) adjustments
24712471
to timezone aware dates will not be applied. If and when the underlying libraries are fixed,
24722472
the DST transitions will be applied.
@@ -2475,9 +2475,11 @@ you can use the ``tz_convert`` method.
24752475

24762476
.. ipython:: python
24772477
2478+
import pytz
2479+
24782480
d_2037 = "2037-03-31T010101"
24792481
d_2038 = "2038-03-31T010101"
2480-
DST = "Europe/London"
2482+
DST = pytz.timezone("Europe/London")
24812483
assert pd.Timestamp(d_2037, tz=DST) != pd.Timestamp(d_2037, tz="GMT")
24822484
assert pd.Timestamp(d_2038, tz=DST) == pd.Timestamp(d_2038, tz="GMT")
24832485

pandas/_libs/tslibs/nattype.pyx

+7-7
Original file line numberDiff line numberDiff line change
@@ -841,7 +841,7 @@ class NaTType(_NaT):
841841
842842
Parameters
843843
----------
844-
tz : str, pytz.timezone, dateutil.tz.tzfile or None
844+
tz : str, zoneinfo.ZoneInfo, pytz.timezone, dateutil.tz.tzfile or None
845845
Time zone for time which Timestamp will be converted to.
846846
None will remove timezone holding UTC time.
847847
@@ -894,7 +894,7 @@ class NaTType(_NaT):
894894
----------
895895
ordinal : int
896896
Date corresponding to a proleptic Gregorian ordinal.
897-
tz : str, pytz.timezone, dateutil.tz.tzfile or None
897+
tz : str, zoneinfo.ZoneInfo, pytz.timezone, dateutil.tz.tzfile or None
898898
Time zone for the Timestamp.
899899
900900
Notes
@@ -1307,7 +1307,7 @@ timedelta}, default 'raise'
13071307
13081308
Parameters
13091309
----------
1310-
tz : str, pytz.timezone, dateutil.tz.tzfile or None
1310+
tz : str, zoneinfo.ZoneInfo, pytz.timezone, dateutil.tz.tzfile or None
13111311
Time zone for time which Timestamp will be converted to.
13121312
None will remove timezone holding UTC time.
13131313
@@ -1361,7 +1361,7 @@ timedelta}, default 'raise'
13611361
13621362
Parameters
13631363
----------
1364-
tz : str, pytz.timezone, dateutil.tz.tzfile or None
1364+
tz : str, zoneinfo.ZoneInfo, pytz.timezone, dateutil.tz.tzfile or None
13651365
Time zone for time which Timestamp will be converted to.
13661366
None will remove timezone holding local time.
13671367
@@ -1461,13 +1461,13 @@ default 'raise'
14611461
14621462
Replace timezone (not a conversion):
14631463
1464-
>>> import pytz
1465-
>>> ts.replace(tzinfo=pytz.timezone('US/Pacific'))
1464+
>>> import zoneinfo
1465+
>>> ts.replace(tzinfo=zoneinfo.ZoneInfo('US/Pacific'))
14661466
Timestamp('2020-03-14 15:32:52.192548651-0700', tz='US/Pacific')
14671467
14681468
Analogous for ``pd.NaT``:
14691469
1470-
>>> pd.NaT.replace(tzinfo=pytz.timezone('US/Pacific'))
1470+
>>> pd.NaT.replace(tzinfo=zoneinfo.ZoneInfo('US/Pacific'))
14711471
NaT
14721472
""",
14731473
)

pandas/_libs/tslibs/timestamps.pyx

+7-7
Original file line numberDiff line numberDiff line change
@@ -1374,7 +1374,7 @@ class Timestamp(_Timestamp):
13741374
Timezone info.
13751375
nanosecond : int, optional, default 0
13761376
Value of nanosecond.
1377-
tz : str, pytz.timezone, dateutil.tz.tzfile or None
1377+
tz : str, zoneinfo.ZoneInfo, pytz.timezone, dateutil.tz.tzfile or None
13781378
Time zone for time which Timestamp will have.
13791379
unit : str
13801380
Unit used for conversion if ts_input is of type int or float. The
@@ -1446,7 +1446,7 @@ class Timestamp(_Timestamp):
14461446
----------
14471447
ordinal : int
14481448
Date corresponding to a proleptic Gregorian ordinal.
1449-
tz : str, pytz.timezone, dateutil.tz.tzfile or None
1449+
tz : str, zoneinfo.ZoneInfo, pytz.timezone, dateutil.tz.tzfile or None
14501450
Time zone for the Timestamp.
14511451
14521452
Notes
@@ -2393,7 +2393,7 @@ timedelta}, default 'raise'
23932393
23942394
Parameters
23952395
----------
2396-
tz : str, pytz.timezone, dateutil.tz.tzfile or None
2396+
tz : str, zoneinfo.ZoneInfo, pytz.timezone, dateutil.tz.tzfile or None
23972397
Time zone for time which Timestamp will be converted to.
23982398
None will remove timezone holding local time.
23992399
@@ -2500,7 +2500,7 @@ default 'raise'
25002500
25012501
Parameters
25022502
----------
2503-
tz : str, pytz.timezone, dateutil.tz.tzfile or None
2503+
tz : str, zoneinfo.ZoneInfo, pytz.timezone, dateutil.tz.tzfile or None
25042504
Time zone for time which Timestamp will be converted to.
25052505
None will remove timezone holding UTC time.
25062506
@@ -2604,13 +2604,13 @@ default 'raise'
26042604
26052605
Replace timezone (not a conversion):
26062606
2607-
>>> import pytz
2608-
>>> ts.replace(tzinfo=pytz.timezone('US/Pacific'))
2607+
>>> import zoneinfo
2608+
>>> ts.replace(tzinfo=zoneinfo.ZoneInfo('US/Pacific'))
26092609
Timestamp('2020-03-14 15:32:52.192548651-0700', tz='US/Pacific')
26102610
26112611
Analogous for ``pd.NaT``:
26122612
2613-
>>> pd.NaT.replace(tzinfo=pytz.timezone('US/Pacific'))
2613+
>>> pd.NaT.replace(tzinfo=zoneinfo.ZoneInfo('US/Pacific'))
26142614
NaT
26152615
"""
26162616

pandas/_libs/tslibs/timezones.pyx

+19-20
Original file line numberDiff line numberDiff line change
@@ -119,27 +119,26 @@ cpdef inline object get_timezone(tzinfo tz):
119119
raise TypeError("tz argument cannot be None")
120120
if is_utc(tz):
121121
return tz
122+
elif is_zoneinfo(tz):
123+
return tz.key
124+
elif treat_tz_as_pytz(tz):
125+
zone = tz.zone
126+
if zone is None:
127+
return tz
128+
return zone
129+
elif treat_tz_as_dateutil(tz):
130+
if ".tar.gz" in tz._filename:
131+
raise ValueError(
132+
"Bad tz filename. Dateutil on python 3 on windows has a "
133+
"bug which causes tzfile._filename to be the same for all "
134+
"timezone files. Please construct dateutil timezones "
135+
'implicitly by passing a string like "dateutil/Europe'
136+
'/London" when you construct your pandas objects instead '
137+
"of passing a timezone object. See "
138+
"https://github.com/pandas-dev/pandas/pull/7362")
139+
return "dateutil/" + tz._filename
122140
else:
123-
if treat_tz_as_dateutil(tz):
124-
if ".tar.gz" in tz._filename:
125-
raise ValueError(
126-
"Bad tz filename. Dateutil on python 3 on windows has a "
127-
"bug which causes tzfile._filename to be the same for all "
128-
"timezone files. Please construct dateutil timezones "
129-
'implicitly by passing a string like "dateutil/Europe'
130-
'/London" when you construct your pandas objects instead '
131-
"of passing a timezone object. See "
132-
"https://github.com/pandas-dev/pandas/pull/7362")
133-
return "dateutil/" + tz._filename
134-
else:
135-
# tz is a pytz timezone or unknown.
136-
try:
137-
zone = tz.zone
138-
if zone is None:
139-
return tz
140-
return zone
141-
except AttributeError:
142-
return tz
141+
return tz
143142

144143

145144
cpdef inline tzinfo maybe_get_tz(object tz):

pandas/_testing/_hypothesis.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66

77
from hypothesis import strategies as st
88
from hypothesis.extra.dateutil import timezones as dateutil_timezones
9-
from hypothesis.extra.pytz import timezones as pytz_timezones
109

1110
from pandas.compat import is_platform_windows
1211

@@ -57,7 +56,7 @@
5756
DATETIME_JAN_1_1900_OPTIONAL_TZ = st.datetimes(
5857
min_value=pd.Timestamp(1900, 1, 1).to_pydatetime(), # pyright: ignore[reportArgumentType]
5958
max_value=pd.Timestamp(1900, 1, 1).to_pydatetime(), # pyright: ignore[reportArgumentType]
60-
timezones=st.one_of(st.none(), dateutil_timezones(), pytz_timezones()),
59+
timezones=st.one_of(st.none(), dateutil_timezones(), st.timezones()),
6160
)
6261

6362
DATETIME_IN_PD_TIMESTAMP_RANGE_NO_TZ = st.datetimes(

pandas/core/arrays/datetimes.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -594,7 +594,7 @@ def tz(self) -> tzinfo | None:
594594
595595
Returns
596596
-------
597-
datetime.tzinfo, pytz.tzinfo.BaseTZInfo, dateutil.tz.tz.tzfile, or None
597+
zoneinfo.ZoneInfo,, datetime.tzinfo, pytz.tzinfo.BaseTZInfo, dateutil.tz.tz.tzfile, or None
598598
Returns None when the array is tz-naive.
599599
600600
See Also
@@ -624,7 +624,7 @@ def tz(self) -> tzinfo | None:
624624
... )
625625
>>> idx.tz
626626
datetime.timezone.utc
627-
"""
627+
""" # noqa: E501
628628
# GH 18595
629629
return getattr(self.dtype, "tz", None)
630630

@@ -863,7 +863,7 @@ def tz_convert(self, tz) -> Self:
863863
864864
Parameters
865865
----------
866-
tz : str, pytz.timezone, dateutil.tz.tzfile, datetime.tzinfo or None
866+
tz : str, zoneinfo.ZoneInfo, pytz.timezone, dateutil.tz.tzfile, datetime.tzinfo or None
867867
Time zone for time. Corresponding timestamps would be converted
868868
to this time zone of the Datetime Array/Index. A `tz` of None will
869869
convert to UTC and remove the timezone information.
@@ -923,7 +923,7 @@ def tz_convert(self, tz) -> Self:
923923
'2014-08-01 08:00:00',
924924
'2014-08-01 09:00:00'],
925925
dtype='datetime64[ns]', freq='h')
926-
"""
926+
""" # noqa: E501
927927
tz = timezones.maybe_get_tz(tz)
928928

929929
if self.tz is None:
@@ -955,7 +955,7 @@ def tz_localize(
955955
956956
Parameters
957957
----------
958-
tz : str, pytz.timezone, dateutil.tz.tzfile, datetime.tzinfo or None
958+
tz : str, zoneinfo.ZoneInfo,, pytz.timezone, dateutil.tz.tzfile, datetime.tzinfo or None
959959
Time zone to convert timestamps to. Passing ``None`` will
960960
remove the time zone information preserving local time.
961961
ambiguous : 'infer', 'NaT', bool array, default 'raise'
@@ -1081,7 +1081,7 @@ def tz_localize(
10811081
0 2015-03-29 03:30:00+02:00
10821082
1 2015-03-29 03:30:00+02:00
10831083
dtype: datetime64[ns, Europe/Warsaw]
1084-
"""
1084+
""" # noqa: E501
10851085
nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward")
10861086
if nonexistent not in nonexistent_options and not isinstance(
10871087
nonexistent, timedelta

pandas/core/indexes/datetimes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ class DatetimeIndex(DatetimeTimedeltaMixin):
147147
One of pandas date offset strings or corresponding objects. The string
148148
'infer' can be passed in order to set the frequency of the index as the
149149
inferred frequency upon creation.
150-
tz : pytz.timezone or dateutil.tz.tzfile or datetime.tzinfo or str
150+
tz : zoneinfo.ZoneInfo, pytz.timezone, dateutil.tz.tzfile, datetime.tzinfo or str
151151
Set the Timezone of the data.
152152
ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'
153153
When clocks moved backward due to DST, ambiguous times may arise.

0 commit comments

Comments
 (0)