From c061d3bb27d170394c66c8a620c47b8f71673e76 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 12 Nov 2022 16:35:04 -0800 Subject: [PATCH 1/5] API: default to stdlib timezone objects for fixed-offsets --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/_libs/tslib.pyx | 14 ++++--- pandas/_libs/tslibs/conversion.pyx | 7 ++-- pandas/_libs/tslibs/strptime.pyx | 11 ++++-- pandas/_libs/tslibs/timezones.pyx | 2 +- .../indexes/datetimes/test_constructors.py | 10 ++--- pandas/tests/io/parser/test_parse_dates.py | 10 ++++- .../tests/scalar/timestamp/test_rendering.py | 6 +-- .../tests/scalar/timestamp/test_timestamp.py | 10 ++--- pandas/tests/tools/test_to_datetime.py | 39 ++++++++++++------- pandas/tests/tslibs/test_array_to_datetime.py | 7 ++-- 11 files changed, 70 insertions(+), 47 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 032bcf09244e5..425c2424976f7 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -325,6 +325,7 @@ Other API changes ^^^^^^^^^^^^^^^^^ - The ``freq``, ``tz``, ``nanosecond``, and ``unit`` keywords in the :class:`Timestamp` constructor are now keyword-only (:issue:`45307`) - Passing ``nanoseconds`` greater than 999 or less than 0 in :class:`Timestamp` now raises a ``ValueError`` (:issue:`48538`, :issue:`48255`) +- When inferring a fixed-offset ``tzinfo`` from a string or integer ``tz``, a standard-library ``datetime.timezone`` object is returned instead of a ``pytz`` object (:issue:`34916`) - :func:`read_csv`: specifying an incorrect number of columns with ``index_col`` of now raises ``ParserError`` instead of ``IndexError`` when using the c parser. - Default value of ``dtype`` in :func:`get_dummies` is changed to ``bool`` from ``uint8`` (:issue:`45848`) - :meth:`DataFrame.astype`, :meth:`Series.astype`, and :meth:`DatetimeIndex.astype` casting datetime64 data to any of "datetime64[s]", "datetime64[ms]", "datetime64[us]" will return an object with the given resolution instead of coercing back to "datetime64[ns]" (:issue:`48928`) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index d7c4c022a2556..68183190c9c00 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -1,9 +1,13 @@ cimport cython + +from datetime import timezone + from cpython.datetime cimport ( PyDate_Check, PyDateTime_Check, datetime, import_datetime, + timedelta, tzinfo, ) from cpython.object cimport PyObject @@ -23,8 +27,6 @@ import numpy as np cnp.import_array() -import pytz - from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, NPY_FR_ns, @@ -94,7 +96,7 @@ def _test_parse_iso8601(ts: str): obj.value = npy_datetimestruct_to_datetime(NPY_FR_ns, &obj.dts) check_dts_bounds(&obj.dts) if out_local == 1: - obj.tzinfo = pytz.FixedOffset(out_tzoffset) + obj.tzinfo = timezone(timedelta(minutes=out_tzoffset)) obj.value = tz_localize_to_utc_single(obj.value, obj.tzinfo) return Timestamp(obj.value, tz=obj.tzinfo) else: @@ -454,7 +456,7 @@ cpdef array_to_datetime( 2) datetime.datetime objects, if OutOfBoundsDatetime or TypeError is encountered - Also returns a pytz.FixedOffset if an array of strings with the same + Also returns a fixed-offset tzinfo object if an array of strings with the same timezone offset is passed and utc=True is not passed. Otherwise, None is returned @@ -655,7 +657,7 @@ cpdef array_to_datetime( # since we store the total_seconds of # dateutil.tz.tzoffset objects out_tzoffset_vals.add(out_tzoffset * 60.) - tz = pytz.FixedOffset(out_tzoffset) + tz = timezone(timedelta(minutes=out_tzoffset)) value = tz_localize_to_utc_single(value, tz) out_local = 0 out_tzoffset = 0 @@ -729,7 +731,7 @@ cpdef array_to_datetime( return _array_to_datetime_object(values, errors, dayfirst, yearfirst) else: tz_offset = out_tzoffset_vals.pop() - tz_out = pytz.FixedOffset(tz_offset / 60.) + tz_out = timezone(timedelta(seconds=tz_offset)) return result, tz_out diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 923dfa3c54d26..22c593d2f9685 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -8,16 +8,17 @@ from numpy cimport ( cnp.import_array() -import pytz - # stdlib datetime imports +from datetime import timezone + from cpython.datetime cimport ( PyDate_Check, PyDateTime_Check, datetime, import_datetime, time, + timedelta, tzinfo, ) @@ -407,7 +408,7 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts, value = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts) obj.dts = dts - obj.tzinfo = pytz.FixedOffset(tzoffset) + obj.tzinfo = timezone(timedelta(minutes=tzoffset)) obj.value = tz_localize_to_utc_single(value, obj.tzinfo) if tz is None: check_overflows(obj, NPY_FR_ns) diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index f540ad19c48d2..c53a2db702be7 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -1,7 +1,10 @@ """Strptime-related classes and functions. """ +from datetime import timezone + from cpython.datetime cimport ( date, + timedelta, tzinfo, ) @@ -488,7 +491,7 @@ cdef (int, int) _calc_julian_from_V(int iso_year, int iso_week, int iso_weekday) cdef tzinfo parse_timezone_directive(str z): """ - Parse the '%z' directive and return a pytz.FixedOffset + Parse the '%z' directive and return a datetime.timezone object. Parameters ---------- @@ -496,7 +499,7 @@ cdef tzinfo parse_timezone_directive(str z): Returns ------- - pytz.FixedOffset + datetime.timezone Notes ----- @@ -510,7 +513,7 @@ cdef tzinfo parse_timezone_directive(str z): object gmtoff_remainder, gmtoff_remainder_padding if z == 'Z': - return pytz.FixedOffset(0) + return timezone(timedelta(0)) if z[3] == ':': z = z[:3] + z[4:] if len(z) > 5: @@ -530,4 +533,4 @@ cdef tzinfo parse_timezone_directive(str z): total_minutes = ((hours * 60) + minutes + (seconds // 60) + (microseconds // 60_000_000)) total_minutes = -total_minutes if z.startswith("-") else total_minutes - return pytz.FixedOffset(total_minutes) + return timezone(timedelta(minutes=total_minutes)) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index abf8bbc5ca5b9..5e566db43d036 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -171,7 +171,7 @@ cpdef inline tzinfo maybe_get_tz(object tz): else: tz = pytz.timezone(tz) elif is_integer_object(tz): - tz = pytz.FixedOffset(tz / 60) + tz = timezone(timedelta(seconds=tz)) elif isinstance(tz, tzinfo): pass elif tz is None: diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 4aaa2b694102d..5f38480129636 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -627,7 +627,7 @@ def test_constructor_coverage(self): @pytest.mark.parametrize("freq", ["AS", "W-SUN"]) def test_constructor_datetime64_tzformat(self, freq): - # see GH#6572: ISO 8601 format results in pytz.FixedOffset + # see GH#6572: ISO 8601 format results in stdlib timezone object idx = date_range( "2013-01-01T00:00:00-05:00", "2016-01-01T23:59:59-05:00", freq=freq ) @@ -635,7 +635,7 @@ def test_constructor_datetime64_tzformat(self, freq): "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, - tz=pytz.FixedOffset(-300), + tz=timezone(timedelta(minutes=-300)), ) tm.assert_index_equal(idx, expected) # Unable to use `US/Eastern` because of DST @@ -651,7 +651,7 @@ def test_constructor_datetime64_tzformat(self, freq): "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, - tz=pytz.FixedOffset(540), + tz=timezone(timedelta(minutes=540)), ) tm.assert_index_equal(idx, expected) expected_i8 = date_range( @@ -665,7 +665,7 @@ def test_constructor_datetime64_tzformat(self, freq): "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, - tz=pytz.FixedOffset(-300), + tz=timezone(timedelta(minutes=-300)), ) tm.assert_index_equal(idx, expected) # Unable to use `US/Eastern` because of DST @@ -679,7 +679,7 @@ def test_constructor_datetime64_tzformat(self, freq): "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, - tz=pytz.FixedOffset(540), + tz=timezone(timedelta(minutes=540)), ) tm.assert_index_equal(idx, expected) expected_i8 = date_range( diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 1a8149ae41fcb..4b53cea4a1e33 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -6,6 +6,8 @@ from datetime import ( date, datetime, + timedelta, + timezone, ) from io import StringIO import warnings @@ -935,7 +937,11 @@ def test_parse_tz_aware(all_parsers, request): {"x": [0.5]}, index=Index([Timestamp("2012-06-13 01:39:00+00:00")], name="Date") ) tm.assert_frame_equal(result, expected) - assert result.index.tz is pytz.utc + if parser.engine == "pyarrow": + expected_tz = pytz.utc + else: + expected_tz = timezone.utc + assert result.index.tz is expected_tz @xfail_pyarrow @@ -1563,7 +1569,7 @@ def test_parse_timezone(all_parsers): start="2018-01-04 09:01:00", end="2018-01-04 09:05:00", freq="1min", - tz=pytz.FixedOffset(540), + tz=timezone(timedelta(minutes=540)), ) ), freq=None, diff --git a/pandas/tests/scalar/timestamp/test_rendering.py b/pandas/tests/scalar/timestamp/test_rendering.py index 3998142e568fe..c2886f8f285f3 100644 --- a/pandas/tests/scalar/timestamp/test_rendering.py +++ b/pandas/tests/scalar/timestamp/test_rendering.py @@ -41,10 +41,8 @@ def test_repr_utcoffset(self): date_with_utc_offset = Timestamp("2014-03-13 00:00:00-0400", tz=None) assert "2014-03-13 00:00:00-0400" in repr(date_with_utc_offset) assert "tzoffset" not in repr(date_with_utc_offset) - assert "pytz.FixedOffset(-240)" in repr(date_with_utc_offset) - expr = repr(date_with_utc_offset).replace( - "'pytz.FixedOffset(-240)'", "pytz.FixedOffset(-240)" - ) + assert "UTC-04:00" in repr(date_with_utc_offset) + expr = repr(date_with_utc_offset) assert date_with_utc_offset == eval(expr) def test_timestamp_repr_pre1900(self): diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index f5b9a35a53a24..3effa6c726722 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -4,6 +4,7 @@ from datetime import ( datetime, timedelta, + timezone, ) import locale import unicodedata @@ -12,10 +13,7 @@ import numpy as np import pytest import pytz -from pytz import ( - timezone, - utc, -) +from pytz import utc from pandas._libs.tslibs.dtypes import NpyDatetimeUnit from pandas._libs.tslibs.timezones import ( @@ -233,7 +231,7 @@ def test_tz(self): assert conv.hour == 19 def test_utc_z_designator(self): - assert get_timezone(Timestamp("2014-11-02 01:00Z").tzinfo) is utc + assert get_timezone(Timestamp("2014-11-02 01:00Z").tzinfo) is timezone.utc def test_asm8(self): np.random.seed(7_960_929) @@ -251,7 +249,7 @@ def compare(x, y): assert int((Timestamp(x).value - Timestamp(y).value) / 1e9) == 0 compare(Timestamp.now(), datetime.now()) - compare(Timestamp.now("UTC"), datetime.now(timezone("UTC"))) + compare(Timestamp.now("UTC"), datetime.now(pytz.timezone("UTC"))) compare(Timestamp.utcnow(), datetime.utcnow()) compare(Timestamp.today(), datetime.today()) current_time = calendar.timegm(datetime.now().utctimetuple()) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index c3b4159c2cbfc..823b6a75d9e06 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -387,19 +387,33 @@ def test_to_datetime_format_weeks(self, value, fmt, expected, cache): [ "%Y-%m-%d %H:%M:%S%z", ["2010-01-01 12:00:00+0100"] * 2, - [Timestamp("2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(60))] * 2, + [ + Timestamp( + "2010-01-01 12:00:00", tzinfo=timezone(timedelta(minutes=60)) + ) + ] + * 2, ], [ "%Y-%m-%d %H:%M:%S %z", ["2010-01-01 12:00:00 +0100"] * 2, - [Timestamp("2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(60))] * 2, + [ + Timestamp( + "2010-01-01 12:00:00", tzinfo=timezone(timedelta(minutes=60)) + ) + ] + * 2, ], [ "%Y-%m-%d %H:%M:%S %z", ["2010-01-01 12:00:00 +0100", "2010-01-01 12:00:00 -0100"], [ - Timestamp("2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(60)), - Timestamp("2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(-60)), + Timestamp( + "2010-01-01 12:00:00", tzinfo=timezone(timedelta(minutes=60)) + ), + Timestamp( + "2010-01-01 12:00:00", tzinfo=timezone(timedelta(minutes=-60)) + ), ], ], [ @@ -466,7 +480,7 @@ def test_to_datetime_mixed_datetime_and_string(self): d1 = datetime(2020, 1, 1, 17, tzinfo=timezone(-timedelta(hours=1))) d2 = datetime(2020, 1, 1, 18, tzinfo=timezone(-timedelta(hours=1))) res = to_datetime(["2020-01-01 17:00 -0100", d2]) - expected = to_datetime([d1, d2]).tz_convert(pytz.FixedOffset(-60)) + expected = to_datetime([d1, d2]).tz_convert(timezone(timedelta(minutes=-60))) tm.assert_index_equal(res, expected) @pytest.mark.parametrize("infer_datetime_format", [True, False]) @@ -1274,7 +1288,7 @@ def test_mixed_offsets_with_native_datetime_raises(self): def test_non_iso_strings_with_tz_offset(self): result = to_datetime(["March 1, 2018 12:00:00+0400"] * 2) expected = DatetimeIndex( - [datetime(2018, 3, 1, 12, tzinfo=pytz.FixedOffset(240))] * 2 + [datetime(2018, 3, 1, 12, tzinfo=timezone(timedelta(minutes=240)))] * 2 ) tm.assert_index_equal(result, expected) @@ -2196,9 +2210,8 @@ def test_infer_datetime_format_tz_name(self, tz_name, offset): # GH 33133 ser = Series([f"2019-02-02 08:07:13 {tz_name}"]) result = to_datetime(ser, infer_datetime_format=True) - expected = Series( - [Timestamp("2019-02-02 08:07:13").tz_localize(pytz.FixedOffset(offset))] - ) + tz = timezone(timedelta(minutes=offset)) + expected = Series([Timestamp("2019-02-02 08:07:13").tz_localize(tz)]) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( @@ -2483,13 +2496,13 @@ def test_parsers_timestring(self, date_str, exp_def): [ ( "2013-01-01 05:45+0545", - pytz.FixedOffset(345), - "Timestamp('2013-01-01 05:45:00+0545', tz='pytz.FixedOffset(345)')", + timezone(timedelta(minutes=345)), + "Timestamp('2013-01-01 05:45:00+0545', tz='UTC+05:45')", ), ( "2013-01-01 05:30+0530", - pytz.FixedOffset(330), - "Timestamp('2013-01-01 05:30:00+0530', tz='pytz.FixedOffset(330)')", + timezone(timedelta(minutes=330)), + "Timestamp('2013-01-01 05:30:00+0530', tz='UTC+05:30')", ), ], ) diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index 64a45f6507810..80aa5d7fb1c19 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -1,12 +1,13 @@ from datetime import ( date, datetime, + timedelta, + timezone, ) from dateutil.tz.tz import tzoffset import numpy as np import pytest -import pytz from pandas._libs import ( iNaT, @@ -63,7 +64,7 @@ def test_parsing_timezone_offsets(dt_string, expected_tz): result, result_tz = tslib.array_to_datetime(arr) tm.assert_numpy_array_equal(result, expected) - assert result_tz is pytz.FixedOffset(expected_tz) + assert result_tz == timezone(timedelta(minutes=expected_tz)) def test_parsing_non_iso_timezone_offset(): @@ -74,7 +75,7 @@ def test_parsing_non_iso_timezone_offset(): expected = np.array([np.datetime64("2013-01-01 00:00:00.000000000")]) tm.assert_numpy_array_equal(result, expected) - assert result_tz is pytz.FixedOffset(0) + assert result_tz is timezone.utc def test_parsing_different_timezone_offsets(): From e1ec130190fa89f9e2b938cff4d16f1ab1e3cb3e Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 13 Nov 2022 14:54:12 -0800 Subject: [PATCH 2/5] update docstrings --- pandas/_libs/tslibs/timestamps.pyx | 2 +- pandas/_libs/tslibs/timezones.pxd | 2 +- pandas/_libs/tslibs/timezones.pyx | 8 ++------ pandas/core/tools/datetimes.py | 4 ++-- 4 files changed, 6 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index b0208f9ca3296..7ff28952dbfbb 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -111,7 +111,7 @@ from pandas._libs.tslibs.timezones cimport ( is_utc, maybe_get_tz, treat_tz_as_pytz, - utc_pytz as UTC, + utc_stdlib as UTC, ) from pandas._libs.tslibs.tzconversion cimport ( tz_convert_from_utc_single, diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd index c1a4e2bd5e1ac..5629deb965a2d 100644 --- a/pandas/_libs/tslibs/timezones.pxd +++ b/pandas/_libs/tslibs/timezones.pxd @@ -5,7 +5,7 @@ from cpython.datetime cimport ( ) -cdef tzinfo utc_pytz +cdef tzinfo utc_stdlib cpdef bint is_utc(tzinfo tz) cdef bint is_tzlocal(tzinfo tz) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 5e566db43d036..98db5e63b2678 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -27,14 +27,10 @@ from dateutil.tz import ( tzlocal as _dateutil_tzlocal, tzutc as _dateutil_tzutc, ) +import numpy as np import pytz from pytz.tzinfo import BaseTzInfo as _pytz_BaseTzInfo -UTC = pytz.utc - - -import numpy as np - cimport numpy as cnp from numpy cimport int64_t @@ -49,7 +45,7 @@ from pandas._libs.tslibs.util cimport ( cdef int64_t NPY_NAT = get_nat() cdef tzinfo utc_stdlib = timezone.utc -cdef tzinfo utc_pytz = UTC +cdef tzinfo utc_pytz = pytz.utc cdef tzinfo utc_dateutil_str = dateutil_gettz("UTC") # NB: *not* the same as tzutc() cdef tzinfo utc_zoneinfo = None diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index bb2f663dedb33..31e910442f404 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -991,7 +991,7 @@ def to_datetime( >>> pd.to_datetime(['2018-10-26 12:00 -0500', '2018-10-26 13:00 -0500']) DatetimeIndex(['2018-10-26 12:00:00-05:00', '2018-10-26 13:00:00-05:00'], - dtype='datetime64[ns, pytz.FixedOffset(-300)]', freq=None) + dtype='datetime64[ns, UTC-05:00]', freq=None) - However, timezone-aware inputs *with mixed time offsets* (for example issued from a timezone with daylight savings, such as Europe/Paris) @@ -1010,7 +1010,7 @@ def to_datetime( >>> from datetime import datetime >>> pd.to_datetime(["2020-01-01 01:00 -01:00", datetime(2020, 1, 1, 3, 0)]) DatetimeIndex(['2020-01-01 01:00:00-01:00', '2020-01-01 02:00:00-01:00'], - dtype='datetime64[ns, pytz.FixedOffset(-60)]', freq=None) + dtype='datetime64[ns, UTC-01:00]', freq=None) | From ea6de52081a47377cf528a434530ce088ad802c1 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 6 Dec 2022 16:25:51 -0800 Subject: [PATCH 3/5] flesh out whatsnew --- doc/source/whatsnew/v2.0.0.rst | 35 +++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 959ed1b4644a1..5e46c65ff8b02 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -311,6 +311,40 @@ The new behavior, as for datetime64, either gives exactly the requested dtype or ser.astype("timedelta64[s]") ser.astype("timedelta64[D]") +.. _whatsnew_200.api_breaking.default_to_stdlib_tzinfos: + +UTC and fixed-offset timezones default to standard-library tzinfo objects +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +In previous versions, the default ``tzinfo`` object used to represent UTC +was ``pytz.UTC``. In pandas 2.0, we default to ``datetime.timezone.utc`` instead. +Similarly, for timezones represent fixed UTC offsets, we use ``datetime.timezone`` +objects instead of ``pytz.FixedOffset`` objects. See (:issue:`34916`) + +*Previous behavior*: + +.. code-block:: ipython + + In [2]: ts = pd.Timestamp("2016-01-01", tz="UTC") + In [3]: type(ts.tzinfo) + Out[3]: pytz.UTC + + In [4]: ts2 = pd.Timestamp("2016-01-01 04:05:06-07:00") + In [3]: type(ts2.tzinfo) + Out[5]: pytz._FixedOffset + +*New behavior*: + +.. ipython:: python + + ts = pd.Timestamp("2016-01-01", tz="UTC") + type(ts.tzinfo) + + ts2 = pd.Timestamp("2016-01-01 04:05:06-07:00") + type(ts2.tzinfo) + +For timezones that are neither UTC nor fixed offsets, e.g. "US/Pacific", we +continue to default to ``pytz`` objects. + .. _whatsnew_200.api_breaking.deps: Increased minimum versions for dependencies @@ -350,7 +384,6 @@ Other API changes ^^^^^^^^^^^^^^^^^ - The ``freq``, ``tz``, ``nanosecond``, and ``unit`` keywords in the :class:`Timestamp` constructor are now keyword-only (:issue:`45307`) - Passing ``nanoseconds`` greater than 999 or less than 0 in :class:`Timestamp` now raises a ``ValueError`` (:issue:`48538`, :issue:`48255`) -- When inferring a fixed-offset ``tzinfo`` from a string or integer ``tz``, a standard-library ``datetime.timezone`` object is returned instead of a ``pytz`` object (:issue:`34916`) - :func:`read_csv`: specifying an incorrect number of columns with ``index_col`` of now raises ``ParserError`` instead of ``IndexError`` when using the c parser. - Default value of ``dtype`` in :func:`get_dummies` is changed to ``bool`` from ``uint8`` (:issue:`45848`) - :meth:`DataFrame.astype`, :meth:`Series.astype`, and :meth:`DatetimeIndex.astype` casting datetime64 data to any of "datetime64[s]", "datetime64[ms]", "datetime64[us]" will return an object with the given resolution instead of coercing back to "datetime64[ns]" (:issue:`48928`) From 2548727f0a3af94944ed5f05cdbf7c484d78abb5 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 8 Dec 2022 13:13:33 -0800 Subject: [PATCH 4/5] handle strings --- pandas/_libs/tslibs/timezones.pyx | 2 ++ pandas/io/json/_table_schema.py | 7 ++++++- pandas/tests/frame/methods/test_align.py | 15 ++++++++------- pandas/tests/frame/methods/test_tz_localize.py | 6 ++++-- pandas/tests/frame/test_arithmetic.py | 10 ++++++---- pandas/tests/indexes/datetimes/test_join.py | 7 +++++-- pandas/tests/indexes/datetimes/test_timezones.py | 11 ++++++----- pandas/tests/scalar/timestamp/test_timestamp.py | 5 +++++ pandas/tests/scalar/timestamp/test_timezones.py | 3 ++- pandas/tests/series/methods/test_align.py | 7 ++++--- pandas/tests/series/methods/test_tz_localize.py | 4 +++- pandas/tests/series/test_arithmetic.py | 8 +++++--- pandas/tests/tools/test_to_datetime.py | 2 +- 13 files changed, 57 insertions(+), 30 deletions(-) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 98ab27f098f04..6105f96a3b1b8 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -164,6 +164,8 @@ cpdef inline tzinfo maybe_get_tz(object tz): hours = int(tz[3:6]) minutes = int(tz[3] + tz[7:9]) tz = timezone(timedelta(hours=hours, minutes=minutes)) + elif tz == "UTC" or tz == "utc": + tz = utc_stdlib else: tz = pytz.timezone(tz) elif is_integer_object(tz): diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index 88ff8c699cc49..5417a9d1d451b 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -13,6 +13,7 @@ import warnings from pandas._libs.json import loads +from pandas._libs.tslibs import timezones from pandas._typing import ( DtypeObj, JSONSerializable, @@ -140,7 +141,11 @@ def convert_pandas_type_to_json_field(arr) -> dict[str, JSONSerializable]: elif is_period_dtype(dtype): field["freq"] = dtype.freq.freqstr elif is_datetime64tz_dtype(dtype): - field["tz"] = dtype.tz.zone + if timezones.is_utc(dtype.tz): + # timezone.utc has no "zone" attr + field["tz"] = "UTC" + else: + field["tz"] = dtype.tz.zone elif is_extension_array_dtype(dtype): field["extDtype"] = dtype.name return field diff --git a/pandas/tests/frame/methods/test_align.py b/pandas/tests/frame/methods/test_align.py index 575db40f171a2..73c79996e5b81 100644 --- a/pandas/tests/frame/methods/test_align.py +++ b/pandas/tests/frame/methods/test_align.py @@ -1,6 +1,7 @@ +from datetime import timezone + import numpy as np import pytest -import pytz import pandas as pd from pandas import ( @@ -27,17 +28,17 @@ def test_frame_align_aware(self): # frame with frame df1_central = df1.tz_convert("US/Central") new1, new2 = df1.align(df1_central) - assert new1.index.tz == pytz.UTC - assert new2.index.tz == pytz.UTC + assert new1.index.tz is timezone.utc + assert new2.index.tz is timezone.utc # frame with Series new1, new2 = df1.align(df1_central[0], axis=0) - assert new1.index.tz == pytz.UTC - assert new2.index.tz == pytz.UTC + assert new1.index.tz is timezone.utc + assert new2.index.tz is timezone.utc df1[0].align(df1_central, axis=0) - assert new1.index.tz == pytz.UTC - assert new2.index.tz == pytz.UTC + assert new1.index.tz is timezone.utc + assert new2.index.tz is timezone.utc def test_align_float(self, float_frame): af, bf = float_frame.align(float_frame) diff --git a/pandas/tests/frame/methods/test_tz_localize.py b/pandas/tests/frame/methods/test_tz_localize.py index 43c6eb4594f28..e34b21a73453c 100644 --- a/pandas/tests/frame/methods/test_tz_localize.py +++ b/pandas/tests/frame/methods/test_tz_localize.py @@ -1,3 +1,5 @@ +from datetime import timezone + import numpy as np import pytest @@ -23,7 +25,7 @@ def test_tz_localize(self, frame_or_series): expected = DataFrame({"a": 1}, rng.tz_localize("UTC")) expected = tm.get_obj(expected, frame_or_series) - assert result.index.tz.zone == "UTC" + assert result.index.tz is timezone.utc tm.assert_equal(result, expected) def test_tz_localize_axis1(self): @@ -33,7 +35,7 @@ def test_tz_localize_axis1(self): df = df.T result = df.tz_localize("utc", axis=1) - assert result.columns.tz.zone == "UTC" + assert result.columns.tz is timezone.utc expected = DataFrame({"a": 1}, rng.tz_localize("UTC")) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 8aedac036c2c9..623ee0461b7e8 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -1,5 +1,8 @@ from collections import deque -from datetime import datetime +from datetime import ( + datetime, + timezone, +) from enum import Enum import functools import operator @@ -7,7 +10,6 @@ import numpy as np import pytest -import pytz import pandas.util._test_decorators as td @@ -1210,10 +1212,10 @@ def test_frame_add_tz_mismatch_converts_to_utc(self): df_moscow = df.tz_convert("Europe/Moscow") result = df + df_moscow - assert result.index.tz is pytz.utc + assert result.index.tz is timezone.utc result = df_moscow + df - assert result.index.tz is pytz.utc + assert result.index.tz is timezone.utc def test_align_frame(self): rng = pd.period_range("1/1/2000", "1/1/2010", freq="A") diff --git a/pandas/tests/indexes/datetimes/test_join.py b/pandas/tests/indexes/datetimes/test_join.py index 9afeb7ce924df..3739d247e9a2d 100644 --- a/pandas/tests/indexes/datetimes/test_join.py +++ b/pandas/tests/indexes/datetimes/test_join.py @@ -1,4 +1,7 @@ -from datetime import datetime +from datetime import ( + datetime, + timezone, +) import numpy as np import pytest @@ -71,7 +74,7 @@ def test_join_utc_convert(self, join_type): result = left.join(right[:-5], how=join_type) assert isinstance(result, DatetimeIndex) - assert result.tz.zone == "UTC" + assert result.tz is timezone.utc def test_datetimeindex_union_join_empty(self, sort): dti = date_range(start="1/1/2001", end="2/1/2001", freq="D") diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index e8bb1252c3033..ecc8da512a95c 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -6,6 +6,7 @@ datetime, time, timedelta, + timezone, tzinfo, ) @@ -1059,12 +1060,12 @@ def test_dti_to_pydatetime(self): arr = np.array([dt], dtype=object) result = to_datetime(arr, utc=True) - assert result.tz is pytz.utc + assert result.tz is timezone.utc rng = date_range("2012-11-03 03:00", "2012-11-05 03:00", tz=tzlocal()) arr = rng.to_pydatetime() result = to_datetime(arr, utc=True) - assert result.tz is pytz.utc + assert result.tz is timezone.utc def test_dti_to_pydatetime_fizedtz(self): dates = np.array( @@ -1139,7 +1140,7 @@ def test_dti_convert_tz_aware_datetime_datetime(self, tz): converted = to_datetime(dates_aware, utc=True) ex_vals = np.array([Timestamp(x).as_unit("ns").value for x in dates_aware]) tm.assert_numpy_array_equal(converted.asi8, ex_vals) - assert converted.tz is pytz.utc + assert converted.tz is timezone.utc # Note: not difference, as there is no symmetry requirement there @pytest.mark.parametrize("setop", ["union", "intersection", "symmetric_difference"]) @@ -1158,8 +1159,8 @@ def test_dti_setop_aware(self, setop): tm.assert_index_equal(result, expected) assert result.tz == left.tz if len(result): - assert result[0].tz.zone == "UTC" - assert result[-1].tz.zone == "UTC" + assert result[0].tz is timezone.utc + assert result[-1].tz is timezone.utc def test_dti_union_mixed(self): # GH 21671 diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index a66567dabfa99..0384417771056 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -211,6 +211,11 @@ def test_resolution(self): class TestTimestamp: + def test_default_to_stdlib_utc(self): + assert Timestamp.utcnow().tz is timezone.utc + assert Timestamp.now("UTC").tz is timezone.utc + assert Timestamp("2016-01-01", tz="UTC").tz is timezone.utc + def test_tz(self): tstr = "2014-02-01 09:00" ts = Timestamp(tstr) diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 3ebffaad23910..1c732252ddb4b 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -5,6 +5,7 @@ date, datetime, timedelta, + timezone, ) import dateutil @@ -373,7 +374,7 @@ def test_tz_convert_utc_with_system_utc(self): def test_timestamp_constructor_tz_utc(self): utc_stamp = Timestamp("3/11/2012 05:00", tz="utc") - assert utc_stamp.tzinfo is pytz.utc + assert utc_stamp.tzinfo is timezone.utc assert utc_stamp.hour == 5 utc_stamp = Timestamp("3/11/2012 05:00").tz_localize("utc") diff --git a/pandas/tests/series/methods/test_align.py b/pandas/tests/series/methods/test_align.py index 33e2b1ccecf2d..f3fc46e1e39af 100644 --- a/pandas/tests/series/methods/test_align.py +++ b/pandas/tests/series/methods/test_align.py @@ -1,6 +1,7 @@ +from datetime import timezone + import numpy as np import pytest -import pytz import pandas as pd from pandas import ( @@ -174,8 +175,8 @@ def test_align_dt64tzindex_mismatched_tzs(): # different timezones convert to UTC new1, new2 = ser.align(ser_central) - assert new1.index.tz == pytz.UTC - assert new2.index.tz == pytz.UTC + assert new1.index.tz is timezone.utc + assert new2.index.tz is timezone.utc def test_align_periodindex(join_type): diff --git a/pandas/tests/series/methods/test_tz_localize.py b/pandas/tests/series/methods/test_tz_localize.py index a9e28bfeeb76b..b71e7ed5500c3 100644 --- a/pandas/tests/series/methods/test_tz_localize.py +++ b/pandas/tests/series/methods/test_tz_localize.py @@ -1,3 +1,5 @@ +from datetime import timezone + import pytest import pytz @@ -124,7 +126,7 @@ def test_series_tz_localize_empty(self, tzstr): ser = Series(dtype=object) ser2 = ser.tz_localize("utc") - assert ser2.index.tz == pytz.utc + assert ser2.index.tz == timezone.utc ser2 = ser.tz_localize(tzstr) timezones.tz_compare(ser2.index.tz, timezones.maybe_get_tz(tzstr)) diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 8f9164fce4977..3f31e355d466f 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -1,9 +1,11 @@ -from datetime import timedelta +from datetime import ( + timedelta, + timezone, +) import operator import numpy as np import pytest -import pytz from pandas._libs.tslibs import IncompatibleFrequency @@ -676,7 +678,7 @@ def test_series_add_tz_mismatch_converts_to_utc(self): uts2 = ser2.tz_convert("utc") expected = uts1 + uts2 - assert result.index.tz == pytz.UTC + assert result.index.tz is timezone.utc tm.assert_series_equal(result, expected) def test_series_add_aware_naive_raises(self): diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 850c7a301858f..7697dc5f17376 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1413,7 +1413,7 @@ def test_to_datetime_utc(self): arr = np.array([parse("2012-06-13T01:39:00Z")], dtype=object) result = to_datetime(arr, utc=True) - assert result.tz is pytz.utc + assert result.tz is timezone.utc def test_to_datetime_fixed_offset(self): from pandas.tests.indexes.datetimes.test_timezones import fixed_off From 684274e5dd764c0e67e346dd1e558ece42a535a2 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 12 Dec 2022 15:06:47 -0800 Subject: [PATCH 5/5] skip on windows --- pandas/tests/tslibs/test_timezones.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tests/tslibs/test_timezones.py b/pandas/tests/tslibs/test_timezones.py index aa10ab15f4744..4692a2086c529 100644 --- a/pandas/tests/tslibs/test_timezones.py +++ b/pandas/tests/tslibs/test_timezones.py @@ -12,6 +12,7 @@ conversion, timezones, ) +from pandas.compat import is_platform_windows from pandas import Timestamp @@ -29,7 +30,9 @@ def test_cache_keys_are_distinct_for_pytz_vs_dateutil(tz_name): if tz_d is None: pytest.skip(tz_name + ": dateutil does not know about this one") - assert timezones._p_tz_cache_key(tz_p) != timezones._p_tz_cache_key(tz_d) + if not (tz_name == "UTC" and is_platform_windows()): + # they both end up as tzwin("UTC") on windows + assert timezones._p_tz_cache_key(tz_p) != timezones._p_tz_cache_key(tz_d) def test_tzlocal_repr():