Skip to content

Commit 0a7a09c

Browse files
jbrockmendelphofl
authored andcommitted
REF: to_datetime handle non-object cases outside cython (pandas-dev#50263)
1 parent 4f10fd5 commit 0a7a09c

File tree

3 files changed

+50
-54
lines changed

3 files changed

+50
-54
lines changed

pandas/_libs/tslib.pyi

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def format_array_from_datetime(
1212
reso: int = ..., # NPY_DATETIMEUNIT
1313
) -> npt.NDArray[np.object_]: ...
1414
def array_with_unit_to_datetime(
15-
values: np.ndarray,
15+
values: npt.NDArray[np.object_],
1616
unit: str,
1717
errors: str = ...,
1818
) -> tuple[np.ndarray, tzinfo | None]: ...

pandas/_libs/tslib.pyx

+5-52
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ import_datetime()
1818

1919
cimport numpy as cnp
2020
from numpy cimport (
21-
float64_t,
2221
int64_t,
2322
ndarray,
2423
)
@@ -231,7 +230,7 @@ def format_array_from_datetime(
231230

232231

233232
def array_with_unit_to_datetime(
234-
ndarray values,
233+
ndarray[object] values,
235234
str unit,
236235
str errors="coerce"
237236
):
@@ -266,70 +265,24 @@ def array_with_unit_to_datetime(
266265
cdef:
267266
Py_ssize_t i, n=len(values)
268267
int64_t mult
269-
int prec = 0
270-
ndarray[float64_t] fvalues
271268
bint is_ignore = errors=="ignore"
272269
bint is_coerce = errors=="coerce"
273270
bint is_raise = errors=="raise"
274-
bint need_to_iterate = True
275271
ndarray[int64_t] iresult
276272
ndarray[object] oresult
277-
ndarray mask
278273
object tz = None
279274

280275
assert is_ignore or is_coerce or is_raise
281276

282277
if unit == "ns":
283-
if issubclass(values.dtype.type, (np.integer, np.float_)):
284-
result = values.astype("M8[ns]", copy=False)
285-
else:
286-
result, tz = array_to_datetime(
287-
values.astype(object, copy=False),
288-
errors=errors,
289-
)
278+
result, tz = array_to_datetime(
279+
values.astype(object, copy=False),
280+
errors=errors,
281+
)
290282
return result, tz
291283

292284
mult, _ = precision_from_unit(unit)
293285

294-
if is_raise:
295-
# try a quick conversion to i8/f8
296-
# if we have nulls that are not type-compat
297-
# then need to iterate
298-
299-
if values.dtype.kind in ["i", "f", "u"]:
300-
iresult = values.astype("i8", copy=False)
301-
# fill missing values by comparing to NPY_NAT
302-
mask = iresult == NPY_NAT
303-
# Trying to Convert NaN to integer results in undefined
304-
# behaviour, so handle it explicitly (see GH #48705)
305-
if values.dtype.kind == "f":
306-
mask |= values != values
307-
iresult[mask] = 0
308-
fvalues = iresult.astype("f8") * mult
309-
need_to_iterate = False
310-
311-
if not need_to_iterate:
312-
# check the bounds
313-
if (fvalues < Timestamp.min.value).any() or (
314-
(fvalues > Timestamp.max.value).any()
315-
):
316-
raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'")
317-
318-
if values.dtype.kind in ["i", "u"]:
319-
result = (iresult * mult).astype("M8[ns]")
320-
321-
elif values.dtype.kind == "f":
322-
fresult = (values * mult).astype("f8")
323-
fresult[mask] = 0
324-
if prec:
325-
fresult = round(fresult, prec)
326-
result = fresult.astype("M8[ns]", copy=False)
327-
328-
iresult = result.view("i8")
329-
iresult[mask] = NPY_NAT
330-
331-
return result, tz
332-
333286
result = np.empty(n, dtype="M8[ns]")
334287
iresult = result.view("i8")
335288

pandas/core/tools/datetimes.py

+44-1
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,13 @@
2727
OutOfBoundsDatetime,
2828
Timedelta,
2929
Timestamp,
30+
astype_overflowsafe,
3031
iNaT,
3132
nat_strings,
3233
parsing,
3334
timezones as libtimezones,
3435
)
36+
from pandas._libs.tslibs.conversion import precision_from_unit
3537
from pandas._libs.tslibs.parsing import (
3638
DateParseError,
3739
format_is_iso,
@@ -557,7 +559,48 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index:
557559
tz_parsed = None
558560
else:
559561
arg = np.asarray(arg)
560-
arr, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors)
562+
563+
if arg.dtype.kind in ["i", "u"]:
564+
# Note we can't do "f" here because that could induce unwanted
565+
# rounding GH#14156, GH#20445
566+
arr = arg.astype(f"datetime64[{unit}]", copy=False)
567+
try:
568+
arr = astype_overflowsafe(arr, np.dtype("M8[ns]"), copy=False)
569+
except OutOfBoundsDatetime:
570+
if errors == "raise":
571+
raise
572+
arg = arg.astype(object)
573+
return _to_datetime_with_unit(arg, unit, name, utc, errors)
574+
tz_parsed = None
575+
576+
elif arg.dtype.kind == "f":
577+
mult, _ = precision_from_unit(unit)
578+
579+
iresult = arg.astype("i8")
580+
mask = np.isnan(arg) | (arg == iNaT)
581+
iresult[mask] = 0
582+
583+
fvalues = iresult.astype("f8") * mult
584+
585+
if (fvalues < Timestamp.min.value).any() or (
586+
fvalues > Timestamp.max.value
587+
).any():
588+
if errors != "raise":
589+
arg = arg.astype(object)
590+
return _to_datetime_with_unit(arg, unit, name, utc, errors)
591+
raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'")
592+
593+
# TODO: is fresult meaningfully different from fvalues?
594+
fresult = (arg * mult).astype("f8")
595+
fresult[mask] = 0
596+
597+
arr = fresult.astype("M8[ns]", copy=False)
598+
arr[mask] = np.datetime64("NaT", "ns")
599+
600+
tz_parsed = None
601+
else:
602+
arg = arg.astype(object, copy=False)
603+
arr, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors)
561604

562605
if errors == "ignore":
563606
# Index constructor _may_ infer to DatetimeIndex

0 commit comments

Comments
 (0)