Skip to content

BUG: Prevent OutOfBoundsDatetime error for constructing tz-aware series from list #54620

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 9 commits into from
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ Bug fixes
- Fixed bug in :meth:`Series.floordiv` for :class:`ArrowDtype` (:issue:`55561`)
- Fixed bug in :meth:`Series.rank` for ``string[pyarrow_numpy]`` dtype (:issue:`55362`)
- Fixed bug in :meth:`Series.str.extractall` for :class:`ArrowDtype` dtype being converted to object (:issue:`53846`)
- Fixed bug in constructing :class:`Series` when dtype is a timezone aware datetime with non-nanosecond resolution raising ``OutOfBoundsDatetime`` (:issue:`54620`)
- Silence ``Period[B]`` warnings introduced by :issue:`53446` during normal plotting activity (:issue:`55138`)

.. ---------------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslib.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,5 @@ def array_to_datetime(
# returned ndarray may be object dtype or datetime64[ns]

def array_to_datetime_with_tz(
values: npt.NDArray[np.object_], tz: tzinfo
values: npt.NDArray[np.object_], tz: tzinfo, unit: str = ...
) -> npt.NDArray[np.int64]: ...
4 changes: 2 additions & 2 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -678,7 +678,7 @@ cdef _array_to_datetime_object(
return oresult_nd, None


def array_to_datetime_with_tz(ndarray values, tzinfo tz):
def array_to_datetime_with_tz(ndarray values, tzinfo tz, unit="ns"):
"""
Vectorized analogue to pd.Timestamp(value, tz=tz)

Expand Down Expand Up @@ -714,7 +714,7 @@ def array_to_datetime_with_tz(ndarray values, tzinfo tz):
else:
# datetime64, tznaive pydatetime, int, float
ts = ts.tz_localize(tz)
ts = ts.as_unit("ns")
ts = ts.as_unit(unit)
ival = ts._value

# Analogous to: result[i] = ival
Expand Down
17 changes: 9 additions & 8 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ def _from_sequence_not_strict(
# DatetimeTZDtype
unit = dtype.unit

subarr, tz, inferred_freq = _sequence_to_dt64ns(
subarr, tz, inferred_freq = _sequence_to_dt64(
data,
copy=copy,
tz=tz,
Expand Down Expand Up @@ -2172,7 +2172,7 @@ def std(
# Constructor Helpers


def _sequence_to_dt64ns(
def _sequence_to_dt64(
data,
*,
copy: bool = False,
Expand All @@ -2198,7 +2198,8 @@ def _sequence_to_dt64ns(
Returns
-------
result : numpy.ndarray
The sequence converted to a numpy array with dtype ``datetime64[ns]``.
The sequence converted to a numpy array with dtype ``datetime64[unit]``.
Where `unit` is ns unless specified otherwise by `out_unit`.
tz : tzinfo or None
Either the user-provided tzinfo or one inferred from the data.
inferred_freq : Tick or None
Expand All @@ -2221,9 +2222,9 @@ def _sequence_to_dt64ns(
data, copy = maybe_convert_dtype(data, copy, tz=tz)
data_dtype = getattr(data, "dtype", None)

out_dtype = DT64NS_DTYPE
if out_unit is not None:
out_dtype = np.dtype(f"M8[{out_unit}]")
if out_unit is None:
out_unit = "ns"
out_dtype = np.dtype(f"M8[{out_unit}]")

if data_dtype == object or is_string_dtype(data_dtype):
# TODO: We do not have tests specific to string-dtypes,
Expand All @@ -2234,8 +2235,8 @@ def _sequence_to_dt64ns(
elif tz is not None and ambiguous == "raise":
# TODO: yearfirst/dayfirst/etc?
obj_data = np.asarray(data, dtype=object)
i8data = tslib.array_to_datetime_with_tz(obj_data, tz)
return i8data.view(DT64NS_DTYPE), tz, None
i8data = tslib.array_to_datetime_with_tz(obj_data, tz, out_unit)
return i8data.view(out_dtype), tz, None
else:
# data comes back here as either i8 to denote UTC timestamps
# or M8[ns] to denote wall times
Expand Down
10 changes: 5 additions & 5 deletions pandas/tests/arrays/datetimes/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import pandas as pd
import pandas._testing as tm
from pandas.core.arrays import DatetimeArray
from pandas.core.arrays.datetimes import _sequence_to_dt64ns
from pandas.core.arrays.datetimes import _sequence_to_dt64


class TestDatetimeArrayConstructor:
Expand Down Expand Up @@ -44,7 +44,7 @@ def test_freq_validation(self):
"meth",
[
DatetimeArray._from_sequence,
_sequence_to_dt64ns,
_sequence_to_dt64,
pd.to_datetime,
pd.DatetimeIndex,
],
Expand Down Expand Up @@ -105,7 +105,7 @@ def test_bool_dtype_raises(self):
DatetimeArray._from_sequence(arr)

with pytest.raises(TypeError, match=msg):
_sequence_to_dt64ns(arr)
_sequence_to_dt64(arr)

with pytest.raises(TypeError, match=msg):
pd.DatetimeIndex(arr)
Expand Down Expand Up @@ -160,8 +160,8 @@ def test_2d(self, order):
if order == "F":
arr = arr.T

res = _sequence_to_dt64ns(arr)
expected = _sequence_to_dt64ns(arr.ravel())
res = _sequence_to_dt64(arr)
expected = _sequence_to_dt64(arr.ravel())

tm.assert_numpy_array_equal(res[0].ravel(), expected[0])
assert res[1] == expected[1]
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/arrays/test_datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
PeriodArray,
TimedeltaArray,
)
from pandas.core.arrays.datetimes import _sequence_to_dt64ns
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we no longer test this function directly

from pandas.core.arrays.datetimes import _sequence_to_dt64
from pandas.core.arrays.timedeltas import sequence_to_td64ns


Expand Down Expand Up @@ -1314,7 +1314,7 @@ def test_from_pandas_array(dtype):
expected = cls._from_sequence(data)
tm.assert_extension_array_equal(result, expected)

func = {"M8[ns]": _sequence_to_dt64ns, "m8[ns]": sequence_to_td64ns}[dtype]
func = {"M8[ns]": _sequence_to_dt64, "m8[ns]": sequence_to_td64ns}[dtype]
result = func(arr)[0]
expected = func(data)[0]
tm.assert_equal(result, expected)
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1148,6 +1148,15 @@ def test_constructor_with_datetime_tz(self):
result = DatetimeIndex(s, freq="infer")
tm.assert_index_equal(result, dr)

def test_constructor_with_datetime_tz_ms(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think this will also fix .astype to non-nano. can you add a test for that

# GH#54620 explicit frequency
result = Series([Timestamp("2999-01-01")], dtype="datetime64[ms, US/Pacific]")
expected = Series(
np.array(["2999-01-01"], dtype="datetime64[ms]")
).dt.tz_localize("US/Pacific")
tm.assert_series_equal(result, expected)
assert result.dtype == "datetime64[ms, US/Pacific]"

def test_constructor_with_datetime_tz4(self):
# inference
s = Series(
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/test_downstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
DatetimeArray,
TimedeltaArray,
)
from pandas.core.arrays.datetimes import _sequence_to_dt64ns
from pandas.core.arrays.datetimes import _sequence_to_dt64
from pandas.core.arrays.timedeltas import sequence_to_td64ns


Expand Down Expand Up @@ -316,7 +316,7 @@ def test_from_obscure_array(dtype, array_likes):
result = cls._from_sequence(data)
tm.assert_extension_array_equal(result, expected)

func = {"M8[ns]": _sequence_to_dt64ns, "m8[ns]": sequence_to_td64ns}[dtype]
func = {"M8[ns]": _sequence_to_dt64, "m8[ns]": sequence_to_td64ns}[dtype]
result = func(arr)[0]
expected = func(data)[0]
tm.assert_equal(result, expected)
Expand Down