From b04455fb24b1e8bc9ca782b0dd726d9755f3c22f Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Fri, 18 Aug 2023 14:14:48 +0200 Subject: [PATCH 1/6] Prevent OutOfBoundsDatetime error for tz-aware series --- pandas/_libs/tslib.pyx | 4 ++-- pandas/core/arrays/datetimes.py | 8 ++++---- pandas/tests/series/test_constructors.py | 9 +++++++++ 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 20a18cf56779f..672332fb756eb 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -681,7 +681,7 @@ cdef _array_to_datetime_object( return oresult_nd, None -def array_to_datetime_with_tz(ndarray values, tzinfo tz): +def array_to_datetime_with_tz(ndarray values, tzinfo tz, unit="ns"): """ Vectorized analogue to pd.Timestamp(value, tz=tz) @@ -717,7 +717,7 @@ def array_to_datetime_with_tz(ndarray values, tzinfo tz): else: # datetime64, tznaive pydatetime, int, float ts = ts.tz_localize(tz) - ts = ts.as_unit("ns") + ts = ts.as_unit(unit) ival = ts._value # Analogous to: result[i] = ival diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 8ad51e4a90027..dcb0775d94d00 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -336,7 +336,7 @@ def _from_sequence_not_strict( # DatetimeTZDtype unit = dtype.unit - subarr, tz, inferred_freq = _sequence_to_dt64ns( + subarr, tz, inferred_freq = _sequence_to_dt64( data, copy=copy, tz=tz, @@ -2156,7 +2156,7 @@ def std( # Constructor Helpers -def _sequence_to_dt64ns( +def _sequence_to_dt64( data, *, copy: bool = False, @@ -2218,8 +2218,8 @@ def _sequence_to_dt64ns( elif tz is not None and ambiguous == "raise": # TODO: yearfirst/dayfirst/etc? obj_data = np.asarray(data, dtype=object) - i8data = tslib.array_to_datetime_with_tz(obj_data, tz) - return i8data.view(DT64NS_DTYPE), tz, None + i8data = tslib.array_to_datetime_with_tz(obj_data, tz, out_unit) + return i8data.view(out_dtype), tz, None else: # data comes back here as either i8 to denote UTC timestamps # or M8[ns] to denote wall times diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 611f4a7f790a6..2728d762de6a1 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1148,6 +1148,15 @@ def test_constructor_with_datetime_tz(self): result = DatetimeIndex(s, freq="infer") tm.assert_index_equal(result, dr) + def test_constructor_with_datetime_tz_ms(self): + # explicit frequency + result = Series([Timestamp("2999-01-01")], dtype="datetime64[ms, US/Pacific]") + expected = Series( + np.array(["2999-01-01"], dtype="datetime64[ms]") + ).dt.tz_localize("US/Pacific") + tm.assert_series_equal(result, expected) + assert result.dtype == "datetime64[ms, US/Pacific]" + def test_constructor_with_datetime_tz4(self): # inference s = Series( From 9a5418278d88f79f494786f923e2e4f08d419bf4 Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Fri, 18 Aug 2023 15:19:57 +0200 Subject: [PATCH 2/6] prevent unit=None from being passed to array_to_datetime_with_tz --- pandas/core/arrays/datetimes.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index dcb0775d94d00..f4ba559af2d99 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -2182,7 +2182,8 @@ def _sequence_to_dt64( Returns ------- result : numpy.ndarray - The sequence converted to a numpy array with dtype ``datetime64[ns]``. + The sequence converted to a numpy array with dtype ``datetime64[unit]``. + Where `unit` is ns unless specified otherwise. tz : tzinfo or None Either the user-provided tzinfo or one inferred from the data. inferred_freq : Tick or None @@ -2205,9 +2206,9 @@ def _sequence_to_dt64( data, copy = maybe_convert_dtype(data, copy, tz=tz) data_dtype = getattr(data, "dtype", None) - out_dtype = DT64NS_DTYPE - if out_unit is not None: - out_dtype = np.dtype(f"M8[{out_unit}]") + if out_unit is None: + out_unit = "ns" + out_dtype = np.dtype(f"M8[{out_unit}]") if data_dtype == object or is_string_dtype(data_dtype): # TODO: We do not have tests specific to string-dtypes, From 263f1f2645238cca0f88f376a0b185d867e8afa3 Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Fri, 18 Aug 2023 16:28:10 +0200 Subject: [PATCH 3/6] Rename occurances of _sequence_to_dt64ns --- pandas/tests/arrays/datetimes/test_constructors.py | 10 +++++----- pandas/tests/arrays/test_datetimelike.py | 4 ++-- pandas/tests/test_downstream.py | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/tests/arrays/datetimes/test_constructors.py b/pandas/tests/arrays/datetimes/test_constructors.py index 30f47e37fedf5..fe3d70ef744d2 100644 --- a/pandas/tests/arrays/datetimes/test_constructors.py +++ b/pandas/tests/arrays/datetimes/test_constructors.py @@ -8,7 +8,7 @@ import pandas as pd import pandas._testing as tm from pandas.core.arrays import DatetimeArray -from pandas.core.arrays.datetimes import _sequence_to_dt64ns +from pandas.core.arrays.datetimes import _sequence_to_dt64 class TestDatetimeArrayConstructor: @@ -44,7 +44,7 @@ def test_freq_validation(self): "meth", [ DatetimeArray._from_sequence, - _sequence_to_dt64ns, + _sequence_to_dt64, pd.to_datetime, pd.DatetimeIndex, ], @@ -105,7 +105,7 @@ def test_bool_dtype_raises(self): DatetimeArray._from_sequence(arr) with pytest.raises(TypeError, match=msg): - _sequence_to_dt64ns(arr) + _sequence_to_dt64(arr) with pytest.raises(TypeError, match=msg): pd.DatetimeIndex(arr) @@ -160,8 +160,8 @@ def test_2d(self, order): if order == "F": arr = arr.T - res = _sequence_to_dt64ns(arr) - expected = _sequence_to_dt64ns(arr.ravel()) + res = _sequence_to_dt64(arr) + expected = _sequence_to_dt64(arr.ravel()) tm.assert_numpy_array_equal(res[0].ravel(), expected[0]) assert res[1] == expected[1] diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 9eee2e0bea687..733c4957e0f75 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -26,7 +26,7 @@ PeriodArray, TimedeltaArray, ) -from pandas.core.arrays.datetimes import _sequence_to_dt64ns +from pandas.core.arrays.datetimes import _sequence_to_dt64 from pandas.core.arrays.timedeltas import sequence_to_td64ns @@ -1313,7 +1313,7 @@ def test_from_pandas_array(dtype): expected = cls._from_sequence(data) tm.assert_extension_array_equal(result, expected) - func = {"M8[ns]": _sequence_to_dt64ns, "m8[ns]": sequence_to_td64ns}[dtype] + func = {"M8[ns]": _sequence_to_dt64, "m8[ns]": sequence_to_td64ns}[dtype] result = func(arr)[0] expected = func(data)[0] tm.assert_equal(result, expected) diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index c541c5792ec7c..f69f9cf1ad661 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -23,7 +23,7 @@ DatetimeArray, TimedeltaArray, ) -from pandas.core.arrays.datetimes import _sequence_to_dt64ns +from pandas.core.arrays.datetimes import _sequence_to_dt64 from pandas.core.arrays.timedeltas import sequence_to_td64ns @@ -314,7 +314,7 @@ def test_from_obscure_array(dtype, array_likes): result = cls._from_sequence(data) tm.assert_extension_array_equal(result, expected) - func = {"M8[ns]": _sequence_to_dt64ns, "m8[ns]": sequence_to_td64ns}[dtype] + func = {"M8[ns]": _sequence_to_dt64, "m8[ns]": sequence_to_td64ns}[dtype] result = func(arr)[0] expected = func(data)[0] tm.assert_equal(result, expected) From 68857be4cb90ca6292a27eec65819e218a8f6afa Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Mon, 21 Aug 2023 13:37:06 +0100 Subject: [PATCH 4/6] Edit typing stubs for array_to_datetime_with_tz --- pandas/_libs/tslib.pyi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslib.pyi b/pandas/_libs/tslib.pyi index 9819b5173db56..35a5c2626f102 100644 --- a/pandas/_libs/tslib.pyi +++ b/pandas/_libs/tslib.pyi @@ -28,5 +28,5 @@ def array_to_datetime( # returned ndarray may be object dtype or datetime64[ns] def array_to_datetime_with_tz( - values: npt.NDArray[np.object_], tz: tzinfo + values: npt.NDArray[np.object_], tz: tzinfo, unit: str = ... ) -> npt.NDArray[np.int64]: ... From 03900459ca4fe1a3e9ee2fded615deb4e81679ea Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Tue, 10 Oct 2023 12:05:52 +0100 Subject: [PATCH 5/6] Address GH PR comments --- pandas/core/arrays/datetimes.py | 2 +- pandas/tests/series/test_constructors.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index ec96ae67153fb..5e1abfb2fa2b9 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -2187,7 +2187,7 @@ def _sequence_to_dt64( ------- result : numpy.ndarray The sequence converted to a numpy array with dtype ``datetime64[unit]``. - Where `unit` is ns unless specified otherwise. + Where `unit` is ns unless specified otherwise by `out_unit`. tz : tzinfo or None Either the user-provided tzinfo or one inferred from the data. inferred_freq : Tick or None diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index a732be2bfc364..60a347d11fe9c 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1149,7 +1149,7 @@ def test_constructor_with_datetime_tz(self): tm.assert_index_equal(result, dr) def test_constructor_with_datetime_tz_ms(self): - # explicit frequency + # GH#54620 explicit frequency result = Series([Timestamp("2999-01-01")], dtype="datetime64[ms, US/Pacific]") expected = Series( np.array(["2999-01-01"], dtype="datetime64[ms]") From 4527313dc5a4c647e5031145faf80c404667c044 Mon Sep 17 00:00:00 2001 From: Adrian D'Alessandro Date: Tue, 10 Oct 2023 12:24:13 +0100 Subject: [PATCH 6/6] Add entry to whatsnew --- doc/source/whatsnew/v2.1.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.2.rst b/doc/source/whatsnew/v2.1.2.rst index 569864ba71122..156f09dd2f06e 100644 --- a/doc/source/whatsnew/v2.1.2.rst +++ b/doc/source/whatsnew/v2.1.2.rst @@ -29,8 +29,8 @@ Bug fixes - Fixed bug in :meth:`Index.insert` raising when inserting ``None`` into :class:`Index` with ``dtype="string[pyarrow_numpy]"`` (:issue:`55365`) - Fixed bug in :meth:`Series.all` and :meth:`Series.any` not treating missing values correctly for ``dtype="string[pyarrow_numpy]"`` (:issue:`55367`) - Fixed bug in :meth:`Series.rank` for ``string[pyarrow_numpy]`` dtype (:issue:`55362`) +- Fixed bug in constructing :class:`Series` when dtype is a timezone aware datetime with non-nanosecond resolution raising ``OutOfBoundsDatetime`` (:issue:`54620`) - Silence ``Period[B]`` warnings introduced by :issue:`53446` during normal plotting activity (:issue:`55138`) -- .. --------------------------------------------------------------------------- .. _whatsnew_212.other: