Skip to content

Commit 386a1eb

Browse files
authored
BUG: OutOfBoundsDatetime with non-nano dt64tz dtype (#55768)
1 parent 4b24974 commit 386a1eb

File tree

6 files changed

+29
-19
lines changed

6 files changed

+29
-19
lines changed

doc/source/whatsnew/v2.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,7 @@ Datetimelike
330330
- Bug in addition or subtraction of :class:`BusinessDay` offset with ``offset`` attribute to non-nanosecond :class:`Index`, :class:`Series`, or :class:`DataFrame` column giving incorrect results (:issue:`55608`)
331331
- Bug in addition or subtraction of :class:`DateOffset` objects with microsecond components to ``datetime64`` :class:`Index`, :class:`Series`, or :class:`DataFrame` columns with non-nanosecond resolution (:issue:`55595`)
332332
- Bug in addition or subtraction of very large :class:`Tick` objects with :class:`Timestamp` or :class:`Timedelta` objects raising ``OverflowError`` instead of ``OutOfBoundsTimedelta`` (:issue:`55503`)
333+
- Bug in creating a :class:`Index`, :class:`Series`, or :class:`DataFrame` with a non-nanosecond :class:`DatetimeTZDtype` and inputs that would be out of bounds with nanosecond resolution incorrectly raising ``OutOfBoundsDatetime`` (:issue:`54620`)
333334
- Bug in creating a :class:`Index`, :class:`Series`, or :class:`DataFrame` with a non-nanosecond ``datetime64`` dtype and inputs that would be out of bounds for a ``datetime64[ns]`` incorrectly raising ``OutOfBoundsDatetime`` (:issue:`55756`)
334335
-
335336

pandas/_libs/tslib.pyi

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,5 +29,5 @@ def array_to_datetime(
2929
# returned ndarray may be object dtype or datetime64[ns]
3030

3131
def array_to_datetime_with_tz(
32-
values: npt.NDArray[np.object_], tz: tzinfo
32+
values: npt.NDArray[np.object_], tz: tzinfo, creso: int
3333
) -> npt.NDArray[np.int64]: ...

pandas/_libs/tslib.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -671,7 +671,7 @@ cdef _array_to_datetime_object(
671671
return oresult_nd, None
672672

673673

674-
def array_to_datetime_with_tz(ndarray values, tzinfo tz):
674+
def array_to_datetime_with_tz(ndarray values, tzinfo tz, NPY_DATETIMEUNIT creso):
675675
"""
676676
Vectorized analogue to pd.Timestamp(value, tz=tz)
677677
@@ -707,7 +707,7 @@ def array_to_datetime_with_tz(ndarray values, tzinfo tz):
707707
else:
708708
# datetime64, tznaive pydatetime, int, float
709709
ts = ts.tz_localize(tz)
710-
ts = ts.as_unit("ns")
710+
ts = (<_Timestamp>ts)._as_creso(creso)
711711
ival = ts._value
712712

713713
# Analogous to: result[i] = ival

pandas/core/arrays/datetimes.py

+11-8
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ def _from_sequence_not_strict(
355355
# DatetimeTZDtype
356356
unit = dtype.unit
357357

358-
subarr, tz, inferred_freq = _sequence_to_dt64ns(
358+
subarr, tz, inferred_freq = _sequence_to_dt64(
359359
data,
360360
copy=copy,
361361
tz=tz,
@@ -2179,7 +2179,7 @@ def std(
21792179
# Constructor Helpers
21802180

21812181

2182-
def _sequence_to_dt64ns(
2182+
def _sequence_to_dt64(
21832183
data,
21842184
*,
21852185
copy: bool = False,
@@ -2205,7 +2205,8 @@ def _sequence_to_dt64ns(
22052205
Returns
22062206
-------
22072207
result : numpy.ndarray
2208-
The sequence converted to a numpy array with dtype ``datetime64[ns]``.
2208+
The sequence converted to a numpy array with dtype ``datetime64[unit]``.
2209+
Where `unit` is "ns" unless specified otherwise by `out_unit`.
22092210
tz : tzinfo or None
22102211
Either the user-provided tzinfo or one inferred from the data.
22112212
inferred_freq : Tick or None
@@ -2228,9 +2229,9 @@ def _sequence_to_dt64ns(
22282229
data, copy = maybe_convert_dtype(data, copy, tz=tz)
22292230
data_dtype = getattr(data, "dtype", None)
22302231

2231-
out_dtype = DT64NS_DTYPE
2232-
if out_unit is not None:
2233-
out_dtype = np.dtype(f"M8[{out_unit}]")
2232+
if out_unit is None:
2233+
out_unit = "ns"
2234+
out_dtype = np.dtype(f"M8[{out_unit}]")
22342235

22352236
if data_dtype == object or is_string_dtype(data_dtype):
22362237
# TODO: We do not have tests specific to string-dtypes,
@@ -2241,8 +2242,10 @@ def _sequence_to_dt64ns(
22412242
elif tz is not None and ambiguous == "raise":
22422243
# TODO: yearfirst/dayfirst/etc?
22432244
obj_data = np.asarray(data, dtype=object)
2244-
i8data = tslib.array_to_datetime_with_tz(obj_data, tz)
2245-
return i8data.view(DT64NS_DTYPE), tz, None
2245+
i8data = tslib.array_to_datetime_with_tz(
2246+
obj_data, tz, abbrev_to_npy_unit(out_unit)
2247+
)
2248+
return i8data.view(out_dtype), tz, None
22462249
else:
22472250
# data comes back here as either i8 to denote UTC timestamps
22482251
# or M8[ns] to denote wall times

pandas/tests/indexes/datetimes/test_constructors.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -1013,16 +1013,19 @@ def test_dti_convert_datetime_list(self, tzstr):
10131013
dr2 = DatetimeIndex(list(dr), name="foo", freq="D")
10141014
tm.assert_index_equal(dr, dr2)
10151015

1016-
def test_dti_constructor_with_non_nano_dtype(self):
1017-
# GH#55756
1016+
@pytest.mark.parametrize("tz", [None, "UTC", "US/Pacific"])
1017+
def test_dti_constructor_with_non_nano_dtype(self, tz):
1018+
# GH#55756, GH#54620
10181019
ts = Timestamp("2999-01-01")
10191020
dtype = "M8[us]"
1021+
if tz is not None:
1022+
dtype = f"M8[us, {tz}]"
10201023
# NB: the 2500 is interpreted as nanoseconds and rounded *down*
10211024
# to 2 microseconds
10221025
vals = [ts, "2999-01-02 03:04:05.678910", 2500]
10231026
result = DatetimeIndex(vals, dtype=dtype)
1024-
exp_arr = np.array([ts.asm8, vals[1], 2], dtype=dtype)
1025-
expected = DatetimeIndex(exp_arr, dtype=dtype)
1027+
exp_arr = np.array([ts.asm8, vals[1], 2], dtype="M8[us]")
1028+
expected = DatetimeIndex(exp_arr, dtype="M8[us]").tz_localize(tz)
10261029
tm.assert_index_equal(result, expected)
10271030

10281031
result2 = DatetimeIndex(np.array(vals, dtype=object), dtype=dtype)

pandas/tests/series/methods/test_astype.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -107,18 +107,21 @@ def test_astype_dict_like(self, dtype_class):
107107

108108

109109
class TestAstype:
110-
def test_astype_object_to_dt64_non_nano(self):
111-
# GH#55756
110+
@pytest.mark.parametrize("tz", [None, "UTC", "US/Pacific"])
111+
def test_astype_object_to_dt64_non_nano(self, tz):
112+
# GH#55756, GH#54620
112113
ts = Timestamp("2999-01-01")
113114
dtype = "M8[us]"
115+
if tz is not None:
116+
dtype = f"M8[us, {tz}]"
114117
# NB: the 2500 is interpreted as nanoseconds and rounded *down*
115118
# to 2 microseconds
116119
vals = [ts, "2999-01-02 03:04:05.678910", 2500]
117120
ser = Series(vals, dtype=object)
118121
result = ser.astype(dtype)
119122

120-
exp_arr = np.array([ts.asm8, vals[1], 2], dtype=dtype)
121-
expected = Series(exp_arr, dtype=dtype)
123+
exp_arr = np.array([ts.asm8, vals[1], 2], dtype="M8[us]")
124+
expected = Series(exp_arr, dtype="M8[us]").dt.tz_localize(tz)
122125
tm.assert_series_equal(result, expected)
123126

124127
def test_astype_mixed_object_to_dt64tz(self):

0 commit comments

Comments
 (0)