Skip to content

Commit ef0eaa4

Browse files
authored
ENH/API: preserve non-nano in to_datetime (#50369)
* ENH/API: preserve non-nano in to_datetime * GH ref * Fix PandasArray case * mypy fixup * comment about errors=ignore * suggested edits
1 parent d7714cd commit ef0eaa4

File tree

7 files changed

+51
-28
lines changed

7 files changed

+51
-28
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,7 @@ Other API changes
517517
- Passing data with dtype of "timedelta64[s]", "timedelta64[ms]", or "timedelta64[us]" to :class:`TimedeltaIndex`, :class:`Series`, or :class:`DataFrame` constructors will now retain that dtype instead of casting to "timedelta64[ns]"; timedelta64 data with lower resolution will be cast to the lowest supported resolution "timedelta64[s]" (:issue:`49014`)
518518
- Passing ``dtype`` of "timedelta64[s]", "timedelta64[ms]", or "timedelta64[us]" to :class:`TimedeltaIndex`, :class:`Series`, or :class:`DataFrame` constructors will now retain that dtype instead of casting to "timedelta64[ns]"; passing a dtype with lower resolution for :class:`Series` or :class:`DataFrame` will be cast to the lowest supported resolution "timedelta64[s]" (:issue:`49014`)
519519
- Passing a ``np.datetime64`` object with non-nanosecond resolution to :class:`Timestamp` will retain the input resolution if it is "s", "ms", "us", or "ns"; otherwise it will be cast to the closest supported resolution (:issue:`49008`)
520+
- Passing ``datetime64`` values with resolution other than nanosecond to :func:`to_datetime` will retain the input resolution if it is "s", "ms", "us", or "ns"; otherwise it will be cast to the closest supported resolution (:issue:`50369`)
520521
- Passing a string in ISO-8601 format to :class:`Timestamp` will retain the resolution of the parsed input if it is "s", "ms", "us", or "ns"; otherwise it will be cast to the closest supported resolution (:issue:`49737`)
521522
- The ``other`` argument in :meth:`DataFrame.mask` and :meth:`Series.mask` now defaults to ``no_default`` instead of ``np.nan`` consistent with :meth:`DataFrame.where` and :meth:`Series.where`. Entries will be filled with the corresponding NULL value (``np.nan`` for numpy dtypes, ``pd.NA`` for extension dtypes). (:issue:`49111`)
522523
- Changed behavior of :meth:`Series.quantile` and :meth:`DataFrame.quantile` with :class:`SparseDtype` to retain sparse dtype (:issue:`49583`)

pandas/_libs/tslibs/np_datetime.pxd

+1
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ cpdef cnp.ndarray astype_overflowsafe(
106106
cnp.dtype dtype, # ndarray[datetime64[anyunit]]
107107
bint copy=*,
108108
bint round_ok=*,
109+
bint is_coerce=*,
109110
)
110111
cdef int64_t get_conversion_factor(NPY_DATETIMEUNIT from_unit, NPY_DATETIMEUNIT to_unit) except? -1
111112

pandas/_libs/tslibs/np_datetime.pyi

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ def astype_overflowsafe(
1313
dtype: np.dtype,
1414
copy: bool = ...,
1515
round_ok: bool = ...,
16+
is_coerce: bool = ...,
1617
) -> np.ndarray: ...
1718
def is_unitless(dtype: np.dtype) -> bool: ...
1819
def compare_mismatched_resolutions(

pandas/_libs/tslibs/np_datetime.pyx

+6-3
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,7 @@ cpdef ndarray astype_overflowsafe(
308308
cnp.dtype dtype,
309309
bint copy=True,
310310
bint round_ok=True,
311+
bint is_coerce=False,
311312
):
312313
"""
313314
Convert an ndarray with datetime64[X] to datetime64[Y]
@@ -385,7 +386,9 @@ cpdef ndarray astype_overflowsafe(
385386
try:
386387
check_dts_bounds(&dts, to_unit)
387388
except OutOfBoundsDatetime as err:
388-
if is_td:
389+
if is_coerce:
390+
new_value = NPY_DATETIME_NAT
391+
elif is_td:
389392
from_abbrev = np.datetime_data(values.dtype)[0]
390393
np_val = np.timedelta64(value, from_abbrev)
391394
msg = (
@@ -395,8 +398,8 @@ cpdef ndarray astype_overflowsafe(
395398
raise OutOfBoundsTimedelta(msg) from err
396399
else:
397400
raise
398-
399-
new_value = npy_datetimestruct_to_datetime(to_unit, &dts)
401+
else:
402+
new_value = npy_datetimestruct_to_datetime(to_unit, &dts)
400403

401404
# Analogous to: iresult[i] = new_value
402405
(<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = new_value

pandas/core/reshape/tile.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -483,7 +483,7 @@ def _coerce_to_type(x):
483483
if is_datetime64tz_dtype(x.dtype):
484484
dtype = x.dtype
485485
elif is_datetime64_dtype(x.dtype):
486-
x = to_datetime(x)
486+
x = to_datetime(x).astype("datetime64[ns]", copy=False)
487487
dtype = np.dtype("datetime64[ns]")
488488
elif is_timedelta64_dtype(x.dtype):
489489
x = to_timedelta(x)
@@ -527,7 +527,12 @@ def _convert_bin_to_numeric_type(bins, dtype):
527527
raise ValueError("bins must be of timedelta64 dtype")
528528
elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype):
529529
if bins_dtype in ["datetime", "datetime64"]:
530-
bins = to_datetime(bins).view(np.int64)
530+
bins = to_datetime(bins)
531+
if is_datetime64_dtype(bins):
532+
# As of 2.0, to_datetime may give non-nano, so we need to convert
533+
# here until the rest of this file recognizes non-nano
534+
bins = bins.astype("datetime64[ns]", copy=False)
535+
bins = bins.view(np.int64)
531536
else:
532537
raise ValueError("bins must be of datetime64 dtype")
533538

pandas/core/tools/datetimes.py

+16-2
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,9 @@
2828
Timedelta,
2929
Timestamp,
3030
astype_overflowsafe,
31+
get_unit_from_dtype,
3132
iNaT,
33+
is_supported_unit,
3234
nat_strings,
3335
parsing,
3436
timezones as libtimezones,
@@ -50,7 +52,6 @@
5052
from pandas.core.dtypes.common import (
5153
ensure_object,
5254
is_datetime64_dtype,
53-
is_datetime64_ns_dtype,
5455
is_datetime64tz_dtype,
5556
is_float,
5657
is_integer,
@@ -68,6 +69,7 @@
6869
from pandas.arrays import (
6970
DatetimeArray,
7071
IntegerArray,
72+
PandasArray,
7173
)
7274
from pandas.core import algorithms
7375
from pandas.core.algorithms import unique
@@ -384,6 +386,8 @@ def _convert_listlike_datetimes(
384386
"""
385387
if isinstance(arg, (list, tuple)):
386388
arg = np.array(arg, dtype="O")
389+
elif isinstance(arg, PandasArray):
390+
arg = np.array(arg)
387391

388392
arg_dtype = getattr(arg, "dtype", None)
389393
# these are shortcutable
@@ -395,7 +399,17 @@ def _convert_listlike_datetimes(
395399
arg = arg.tz_convert(None).tz_localize("utc")
396400
return arg
397401

398-
elif is_datetime64_ns_dtype(arg_dtype):
402+
elif is_datetime64_dtype(arg_dtype):
403+
arg_dtype = cast(np.dtype, arg_dtype)
404+
if not is_supported_unit(get_unit_from_dtype(arg_dtype)):
405+
# We go to closest supported reso, i.e. "s"
406+
arg = astype_overflowsafe(
407+
# TODO: looks like we incorrectly raise with errors=="ignore"
408+
np.asarray(arg),
409+
np.dtype("M8[s]"),
410+
is_coerce=errors == "coerce",
411+
)
412+
399413
if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
400414
return DatetimeIndex(arg, tz=tz, name=name)
401415
elif utc:

pandas/tests/tools/test_to_datetime.py

+19-21
Original file line numberDiff line numberDiff line change
@@ -1027,23 +1027,28 @@ def test_to_datetime_dt64s_and_str(self, arg, format):
10271027
@pytest.mark.parametrize(
10281028
"dt", [np.datetime64("1000-01-01"), np.datetime64("5000-01-02")]
10291029
)
1030-
def test_to_datetime_dt64s_out_of_bounds(self, cache, dt):
1031-
msg = "^Out of bounds nanosecond timestamp: .*, at position 0$"
1032-
with pytest.raises(OutOfBoundsDatetime, match=msg):
1033-
to_datetime(dt, errors="raise")
1030+
@pytest.mark.parametrize("errors", ["raise", "ignore", "coerce"])
1031+
def test_to_datetime_dt64s_out_of_ns_bounds(self, cache, dt, errors):
1032+
# GH#50369 We cast to the nearest supported reso, i.e. "s"
1033+
ts = to_datetime(dt, errors=errors, cache=cache)
1034+
assert isinstance(ts, Timestamp)
1035+
assert ts.unit == "s"
1036+
assert ts.asm8 == dt
10341037

1035-
# TODO(2.0): The Timestamp and to_datetime behaviors should match;
1036-
# as of 2022-09-28, the Timestamp constructor has been updated
1037-
# to cast to M8[s] but to_datetime has not
10381038
ts = Timestamp(dt)
10391039
assert ts.unit == "s"
10401040
assert ts.asm8 == dt
10411041

1042+
def test_to_datetime_dt64d_out_of_bounds(self, cache):
1043+
dt64 = np.datetime64(np.iinfo(np.int64).max, "D")
1044+
10421045
msg = "Out of bounds nanosecond timestamp"
10431046
with pytest.raises(OutOfBoundsDatetime, match=msg):
1044-
Timestamp(np.datetime64(np.iinfo(np.int64).max, "D"))
1047+
Timestamp(dt64)
1048+
with pytest.raises(OutOfBoundsDatetime, match=msg):
1049+
to_datetime(dt64, errors="raise", cache=cache)
10451050

1046-
assert to_datetime(dt, errors="coerce", cache=cache) is NaT
1051+
assert to_datetime(dt64, errors="coerce", cache=cache) is NaT
10471052

10481053
@pytest.mark.parametrize("unit", ["s", "D"])
10491054
def test_to_datetime_array_of_dt64s(self, cache, unit):
@@ -2516,23 +2521,16 @@ def test_string_na_nat_conversion_with_name(self, cache):
25162521
assert dresult.name == "foo"
25172522

25182523
@pytest.mark.parametrize(
2519-
"dtype",
2520-
[
2521-
"datetime64[h]",
2522-
"datetime64[m]",
2523-
"datetime64[s]",
2524-
"datetime64[ms]",
2525-
"datetime64[us]",
2526-
"datetime64[ns]",
2527-
],
2524+
"unit",
2525+
["h", "m", "s", "ms", "us", "ns"],
25282526
)
2529-
def test_dti_constructor_numpy_timeunits(self, cache, dtype):
2527+
def test_dti_constructor_numpy_timeunits(self, cache, unit):
25302528
# GH 9114
2529+
dtype = np.dtype(f"M8[{unit}]")
25312530
base = to_datetime(["2000-01-01T00:00", "2000-01-02T00:00", "NaT"], cache=cache)
25322531

25332532
values = base.values.astype(dtype)
25342533

2535-
unit = dtype.split("[")[-1][:-1]
25362534
if unit in ["h", "m"]:
25372535
# we cast to closest supported unit
25382536
unit = "s"
@@ -2541,7 +2539,7 @@ def test_dti_constructor_numpy_timeunits(self, cache, dtype):
25412539
assert expected.dtype == exp_dtype
25422540

25432541
tm.assert_index_equal(DatetimeIndex(values), expected)
2544-
tm.assert_index_equal(to_datetime(values, cache=cache), base)
2542+
tm.assert_index_equal(to_datetime(values, cache=cache), expected)
25452543

25462544
def test_dayfirst(self, cache):
25472545
# GH 5917

0 commit comments

Comments
 (0)