Skip to content

Commit 2d2d67d

Browse files
authored
BUG: incorrect OutOfBoundsDatetime with non-nano dtype (#55756)
* BUG: incorrect OutOfBoundsDatetime with non-nano dtype * GH ref
1 parent 0287cde commit 2d2d67d

File tree

16 files changed

+118
-39
lines changed

16 files changed

+118
-39
lines changed

doc/source/whatsnew/v2.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,7 @@ Datetimelike
330330
- Bug in addition or subtraction of :class:`BusinessDay` offset with ``offset`` attribute to non-nanosecond :class:`Index`, :class:`Series`, or :class:`DataFrame` column giving incorrect results (:issue:`55608`)
331331
- Bug in addition or subtraction of :class:`DateOffset` objects with microsecond components to ``datetime64`` :class:`Index`, :class:`Series`, or :class:`DataFrame` columns with non-nanosecond resolution (:issue:`55595`)
332332
- Bug in addition or subtraction of very large :class:`Tick` objects with :class:`Timestamp` or :class:`Timedelta` objects raising ``OverflowError`` instead of ``OutOfBoundsTimedelta`` (:issue:`55503`)
333+
- Bug in creating a :class:`Index`, :class:`Series`, or :class:`DataFrame` with a non-nanosecond ``datetime64`` dtype and inputs that would be out of bounds for a ``datetime64[ns]`` incorrectly raising ``OutOfBoundsDatetime`` (:issue:`55756`)
333334
-
334335

335336
Timedelta

pandas/_libs/tslib.pyi

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ def array_to_datetime(
2323
dayfirst: bool = ...,
2424
yearfirst: bool = ...,
2525
utc: bool = ...,
26+
creso: int = ...,
2627
) -> tuple[np.ndarray, tzinfo | None]: ...
2728

2829
# returned ndarray may be object dtype or datetime64[ns]

pandas/_libs/tslib.pyx

+13-8
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ from pandas._libs.tslibs.conversion cimport (
6464
get_datetime64_nanos,
6565
parse_pydatetime,
6666
)
67+
from pandas._libs.tslibs.dtypes cimport npy_unit_to_abbrev
6768
from pandas._libs.tslibs.nattype cimport (
6869
NPY_NAT,
6970
c_NaT as NaT,
@@ -277,6 +278,7 @@ def array_with_unit_to_datetime(
277278
result, tz = array_to_datetime(
278279
values.astype(object, copy=False),
279280
errors=errors,
281+
creso=NPY_FR_ns,
280282
)
281283
return result, tz
282284

@@ -408,6 +410,7 @@ cpdef array_to_datetime(
408410
bint dayfirst=False,
409411
bint yearfirst=False,
410412
bint utc=False,
413+
NPY_DATETIMEUNIT creso=NPY_FR_ns,
411414
):
412415
"""
413416
Converts a 1D array of date-like values to a numpy array of either:
@@ -434,6 +437,7 @@ cpdef array_to_datetime(
434437
yearfirst parsing behavior when encountering datetime strings
435438
utc : bool, default False
436439
indicator whether the dates should be UTC
440+
creso : NPY_DATETIMEUNIT, default NPY_FR_ns
437441
438442
Returns
439443
-------
@@ -457,13 +461,14 @@ cpdef array_to_datetime(
457461
set out_tzoffset_vals = set()
458462
tzinfo tz_out = None
459463
cnp.flatiter it = cnp.PyArray_IterNew(values)
460-
NPY_DATETIMEUNIT creso = NPY_FR_ns
461464
DatetimeParseState state = DatetimeParseState()
465+
str reso_str
462466

463467
# specify error conditions
464468
assert is_raise or is_ignore or is_coerce
465469

466-
result = np.empty((<object>values).shape, dtype="M8[ns]")
470+
reso_str = npy_unit_to_abbrev(creso)
471+
result = np.empty((<object>values).shape, dtype=f"M8[{reso_str}]")
467472
iresult = result.view("i8").ravel()
468473

469474
for i in range(n):
@@ -480,11 +485,11 @@ cpdef array_to_datetime(
480485
iresult[i] = parse_pydatetime(val, &dts, creso=creso)
481486

482487
elif PyDate_Check(val):
483-
iresult[i] = pydate_to_dt64(val, &dts)
484-
check_dts_bounds(&dts)
488+
iresult[i] = pydate_to_dt64(val, &dts, reso=creso)
489+
check_dts_bounds(&dts, creso)
485490

486491
elif is_datetime64_object(val):
487-
iresult[i] = get_datetime64_nanos(val, NPY_FR_ns)
492+
iresult[i] = get_datetime64_nanos(val, creso)
488493

489494
elif is_integer_object(val) or is_float_object(val):
490495
# these must be ns unit by-definition
@@ -493,23 +498,23 @@ cpdef array_to_datetime(
493498
iresult[i] = NPY_NAT
494499
else:
495500
# we now need to parse this as if unit='ns'
496-
iresult[i] = cast_from_unit(val, "ns")
501+
iresult[i] = cast_from_unit(val, "ns", out_reso=creso)
497502

498503
elif isinstance(val, str):
499504
# string
500505
if type(val) is not str:
501506
# GH#32264 np.str_ object
502507
val = str(val)
503508

504-
if parse_today_now(val, &iresult[i], utc):
509+
if parse_today_now(val, &iresult[i], utc, creso):
505510
# We can't _quite_ dispatch this to convert_str_to_tsobject
506511
# bc there isn't a nice way to pass "utc"
507512
continue
508513

509514
_ts = convert_str_to_tsobject(
510515
val, None, unit="ns", dayfirst=dayfirst, yearfirst=yearfirst
511516
)
512-
_ts.ensure_reso(NPY_FR_ns, val)
517+
_ts.ensure_reso(creso, val)
513518

514519
iresult[i] = _ts.value
515520

pandas/_libs/tslibs/conversion.pxd

+3-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,9 @@ cdef int64_t get_datetime64_nanos(object val, NPY_DATETIMEUNIT reso) except? -1
4343

4444
cpdef datetime localize_pydatetime(datetime dt, tzinfo tz)
4545
cdef int64_t cast_from_unit(object ts, str unit, NPY_DATETIMEUNIT out_reso=*) except? -1
46-
cpdef (int64_t, int) precision_from_unit(str unit, NPY_DATETIMEUNIT out_reso=*)
46+
cpdef (int64_t, int) precision_from_unit(
47+
NPY_DATETIMEUNIT in_reso, NPY_DATETIMEUNIT out_reso=*
48+
)
4749

4850
cdef maybe_localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso)
4951

pandas/_libs/tslibs/conversion.pyi

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,6 @@ DT64NS_DTYPE: np.dtype
99
TD64NS_DTYPE: np.dtype
1010

1111
def precision_from_unit(
12-
unit: str,
12+
in_reso: int, # NPY_DATETIMEUNIT
1313
) -> tuple[int, int]: ... # (int64_t, _)
1414
def localize_pydatetime(dt: datetime, tz: tzinfo | None) -> datetime: ...

pandas/_libs/tslibs/conversion.pyx

+16-9
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ cdef int64_t cast_from_unit(
106106
cdef:
107107
int64_t m
108108
int p
109+
NPY_DATETIMEUNIT in_reso
109110

110111
if unit in ["Y", "M"]:
111112
if is_float_object(ts) and not ts.is_integer():
@@ -123,7 +124,14 @@ cdef int64_t cast_from_unit(
123124
dt64obj = np.datetime64(ts, unit)
124125
return get_datetime64_nanos(dt64obj, out_reso)
125126

126-
m, p = precision_from_unit(unit, out_reso)
127+
in_reso = abbrev_to_npy_unit(unit)
128+
if out_reso < in_reso and in_reso != NPY_DATETIMEUNIT.NPY_FR_GENERIC:
129+
# We will end up rounding (always *down*), so don't need the fractional
130+
# part of `ts`.
131+
m, _ = precision_from_unit(out_reso, in_reso)
132+
return (<int64_t>ts) // m
133+
134+
m, p = precision_from_unit(in_reso, out_reso)
127135

128136
# cast the unit, multiply base/frac separately
129137
# to avoid precision issues from float -> int
@@ -146,8 +154,8 @@ cdef int64_t cast_from_unit(
146154
) from err
147155

148156

149-
cpdef inline (int64_t, int) precision_from_unit(
150-
str unit,
157+
cpdef (int64_t, int) precision_from_unit(
158+
NPY_DATETIMEUNIT in_reso,
151159
NPY_DATETIMEUNIT out_reso=NPY_DATETIMEUNIT.NPY_FR_ns,
152160
):
153161
"""
@@ -163,25 +171,24 @@ cpdef inline (int64_t, int) precision_from_unit(
163171
int64_t m
164172
int64_t multiplier
165173
int p
166-
NPY_DATETIMEUNIT reso = abbrev_to_npy_unit(unit)
167174

168-
if reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
169-
reso = NPY_DATETIMEUNIT.NPY_FR_ns
170-
if reso == NPY_DATETIMEUNIT.NPY_FR_Y:
175+
if in_reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
176+
in_reso = NPY_DATETIMEUNIT.NPY_FR_ns
177+
if in_reso == NPY_DATETIMEUNIT.NPY_FR_Y:
171178
# each 400 years we have 97 leap years, for an average of 97/400=.2425
172179
# extra days each year. We get 31556952 by writing
173180
# 3600*24*365.2425=31556952
174181
multiplier = periods_per_second(out_reso)
175182
m = multiplier * 31556952
176-
elif reso == NPY_DATETIMEUNIT.NPY_FR_M:
183+
elif in_reso == NPY_DATETIMEUNIT.NPY_FR_M:
177184
# 2629746 comes from dividing the "Y" case by 12.
178185
multiplier = periods_per_second(out_reso)
179186
m = multiplier * 2629746
180187
else:
181188
# Careful: if get_conversion_factor raises, the exception does
182189
# not propagate, instead we get a warning about an ignored exception.
183190
# https://github.com/pandas-dev/pandas/pull/51483#discussion_r1115198951
184-
m = get_conversion_factor(reso, out_reso)
191+
m = get_conversion_factor(in_reso, out_reso)
185192

186193
p = <int>log10(m) # number of digits in 'm' minus 1
187194
return m, p

pandas/_libs/tslibs/strptime.pxd

+3-1
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@ from cpython.datetime cimport (
44
)
55
from numpy cimport int64_t
66

7+
from pandas._libs.tslibs.np_datetime cimport NPY_DATETIMEUNIT
78

8-
cdef bint parse_today_now(str val, int64_t* iresult, bint utc)
9+
10+
cdef bint parse_today_now(str val, int64_t* iresult, bint utc, NPY_DATETIMEUNIT creso)
911

1012

1113
cdef class DatetimeParseState:

pandas/_libs/tslibs/strptime.pyx

+12-7
Original file line numberDiff line numberDiff line change
@@ -111,22 +111,27 @@ def _test_format_is_iso(f: str) -> bool:
111111
return format_is_iso(f)
112112

113113

114-
cdef bint parse_today_now(str val, int64_t* iresult, bint utc):
114+
cdef bint parse_today_now(
115+
str val, int64_t* iresult, bint utc, NPY_DATETIMEUNIT creso
116+
):
115117
# We delay this check for as long as possible
116118
# because it catches relatively rare cases
119+
cdef:
120+
_Timestamp ts
117121

118-
# Multiply by 1000 to convert to nanos, since these methods naturally have
119-
# microsecond resolution
120122
if val == "now":
121123
if utc:
122-
iresult[0] = Timestamp.utcnow()._value * 1000
124+
ts = <_Timestamp>Timestamp.utcnow()
125+
iresult[0] = ts._as_creso(creso)._value
123126
else:
124127
# GH#18705 make sure to_datetime("now") matches Timestamp("now")
125128
# Note using Timestamp.now() is faster than Timestamp("now")
126-
iresult[0] = Timestamp.now()._value * 1000
129+
ts = <_Timestamp>Timestamp.now()
130+
iresult[0] = ts._as_creso(creso)._value
127131
return True
128132
elif val == "today":
129-
iresult[0] = Timestamp.today()._value * 1000
133+
ts = <_Timestamp>Timestamp.today()
134+
iresult[0] = ts._as_creso(creso)._value
130135
return True
131136
return False
132137

@@ -363,7 +368,7 @@ def array_strptime(
363368
check_dts_bounds(&dts)
364369
continue
365370

366-
if parse_today_now(val, &iresult[i], utc):
371+
if parse_today_now(val, &iresult[i], utc, NPY_FR_ns):
367372
continue
368373

369374
# Some ISO formats can't be parsed by string_to_dts

pandas/_libs/tslibs/timedeltas.pyx

+1-3
Original file line numberDiff line numberDiff line change
@@ -303,18 +303,16 @@ cdef object ensure_td64ns(object ts):
303303
cdef:
304304
NPY_DATETIMEUNIT td64_unit
305305
int64_t td64_value, mult
306-
str unitstr
307306

308307
td64_unit = get_datetime64_unit(ts)
309308
if (
310309
td64_unit != NPY_DATETIMEUNIT.NPY_FR_ns
311310
and td64_unit != NPY_DATETIMEUNIT.NPY_FR_GENERIC
312311
):
313-
unitstr = npy_unit_to_abbrev(td64_unit)
314312

315313
td64_value = cnp.get_timedelta64_value(ts)
316314

317-
mult = precision_from_unit(unitstr)[0]
315+
mult = precision_from_unit(td64_unit)[0]
318316
try:
319317
# NB: cython#1381 this cannot be *=
320318
td64_value = td64_value * mult

pandas/core/arrays/datetimes.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -2251,18 +2251,19 @@ def _sequence_to_dt64ns(
22512251
dayfirst=dayfirst,
22522252
yearfirst=yearfirst,
22532253
allow_object=False,
2254+
out_unit=out_unit or "ns",
22542255
)
22552256
copy = False
22562257
if tz and inferred_tz:
22572258
# two timezones: convert to intended from base UTC repr
22582259
assert converted.dtype == "i8"
22592260
# GH#42505
22602261
# by convention, these are _already_ UTC, e.g
2261-
result = converted.view(DT64NS_DTYPE)
2262+
result = converted.view(out_dtype)
22622263

22632264
elif inferred_tz:
22642265
tz = inferred_tz
2265-
result = converted.view(DT64NS_DTYPE)
2266+
result = converted.view(out_dtype)
22662267

22672268
else:
22682269
result, _ = _construct_from_dt64_naive(
@@ -2360,6 +2361,7 @@ def objects_to_datetime64ns(
23602361
utc: bool = False,
23612362
errors: DateTimeErrorChoices = "raise",
23622363
allow_object: bool = False,
2364+
out_unit: str = "ns",
23632365
):
23642366
"""
23652367
Convert data to array of timestamps.
@@ -2375,6 +2377,7 @@ def objects_to_datetime64ns(
23752377
allow_object : bool
23762378
Whether to return an object-dtype ndarray instead of raising if the
23772379
data contains more than one timezone.
2380+
out_unit : str, default "ns"
23782381
23792382
Returns
23802383
-------
@@ -2399,6 +2402,7 @@ def objects_to_datetime64ns(
23992402
utc=utc,
24002403
dayfirst=dayfirst,
24012404
yearfirst=yearfirst,
2405+
creso=abbrev_to_npy_unit(out_unit),
24022406
)
24032407

24042408
if tz_parsed is not None:

pandas/core/arrays/timedeltas.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
to_offset,
3030
)
3131
from pandas._libs.tslibs.conversion import precision_from_unit
32+
from pandas._libs.tslibs.dtypes import abbrev_to_npy_unit
3233
from pandas._libs.tslibs.fields import (
3334
get_timedelta_days,
3435
get_timedelta_field,
@@ -1078,7 +1079,7 @@ def sequence_to_td64ns(
10781079
else:
10791080
mask = np.isnan(data)
10801081
# The next few lines are effectively a vectorized 'cast_from_unit'
1081-
m, p = precision_from_unit(unit or "ns")
1082+
m, p = precision_from_unit(abbrev_to_npy_unit(unit or "ns"))
10821083
with warnings.catch_warnings():
10831084
# Suppress RuntimeWarning about All-NaN slice
10841085
warnings.filterwarnings(

pandas/core/dtypes/astype.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -105,11 +105,10 @@ def _astype_nansafe(
105105
# then coerce to datetime64[ns] and use DatetimeArray.astype
106106

107107
if lib.is_np_dtype(dtype, "M"):
108-
from pandas import to_datetime
108+
from pandas.core.arrays import DatetimeArray
109109

110-
dti = to_datetime(arr.ravel())
111-
dta = dti._data.reshape(arr.shape)
112-
return dta.astype(dtype, copy=False)._ndarray
110+
dta = DatetimeArray._from_sequence(arr, dtype=dtype)
111+
return dta._ndarray
113112

114113
elif lib.is_np_dtype(dtype, "m"):
115114
from pandas.core.construction import ensure_wrapped_if_datetimelike

pandas/core/tools/datetimes.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
timezones as libtimezones,
3232
)
3333
from pandas._libs.tslibs.conversion import precision_from_unit
34+
from pandas._libs.tslibs.dtypes import abbrev_to_npy_unit
3435
from pandas._libs.tslibs.parsing import (
3536
DateParseError,
3637
guess_datetime_format,
@@ -550,7 +551,7 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index:
550551
tz_parsed = None
551552

552553
elif arg.dtype.kind == "f":
553-
mult, _ = precision_from_unit(unit)
554+
mult, _ = precision_from_unit(abbrev_to_npy_unit(unit))
554555

555556
mask = np.isnan(arg) | (arg == iNaT)
556557
fvalues = (arg * mult).astype("f8", copy=False)

pandas/tests/frame/methods/test_astype.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,12 @@ def test_astype_from_object_to_datetime_unit(self, unit):
382382
["2017-01-01", "2017-01-02", "2017-02-03"],
383383
]
384384
df = DataFrame(vals, dtype=object)
385-
with pytest.raises(TypeError, match="Cannot cast"):
385+
msg = (
386+
rf"Unexpected value for 'dtype': 'datetime64\[{unit}\]'. "
387+
r"Must be 'datetime64\[s\]', 'datetime64\[ms\]', 'datetime64\[us\]', "
388+
r"'datetime64\[ns\]' or DatetimeTZDtype"
389+
)
390+
with pytest.raises(ValueError, match=msg):
386391
df.astype(f"M8[{unit}]")
387392

388393
@pytest.mark.parametrize("unit", ["Y", "M", "W", "D", "h", "m"])

0 commit comments

Comments
 (0)