Skip to content

Commit 98323ee

Browse files
authored
ENH: retain reso in Timestamp(dt64_obj) (#49008)
* BUG: Timestamp.__add__(np_dt64_obj) result resolution * ENH: retain reso in Timestamp(dt64_obj) * GH ref * update GH ref * troubleshoot npdev build * implement _TSObject.ensure_reso * troubleshoot npdev build * troubleshoot npdev
1 parent 20bbd12 commit 98323ee

File tree

11 files changed

+106
-73
lines changed

11 files changed

+106
-73
lines changed

doc/source/whatsnew/v1.6.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ Other API changes
121121
- Default value of ``dtype`` in :func:`get_dummies` is changed to ``bool`` from ``uint8`` (:issue:`45848`)
122122
- :meth:`DataFrame.astype`, :meth:`Series.astype`, and :meth:`DatetimeIndex.astype` casting datetime64 data to any of "datetime64[s]", "datetime64[ms]", "datetime64[us]" will return an object with the given resolution instead of coercing back to "datetime64[ns]" (:issue:`48928`)
123123
- :meth:`DataFrame.astype`, :meth:`Series.astype`, and :meth:`DatetimeIndex.astype` casting timedelta64 data to any of "timedelta64[s]", "timedelta64[ms]", "timedelta64[us]" will return an object with the given resolution instead of coercing to "float64" dtype (:issue:`48963`)
124+
- Passing a ``np.datetime64`` object with non-nanosecond resolution to :class:`Timestamp` will retain the input resolution if it is "s", "ms", or "ns"; otherwise it will be cast to the closest supported resolution (:issue:`49008`)
124125
-
125126

126127
.. ---------------------------------------------------------------------------

pandas/_libs/tslib.pyx

+3-1
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,7 @@ cpdef array_to_datetime(
514514
found_tz = True
515515
if utc_convert:
516516
_ts = convert_datetime_to_tsobject(val, None)
517+
_ts.ensure_reso(NPY_FR_ns)
517518
iresult[i] = _ts.value
518519
elif found_naive:
519520
raise ValueError('Tz-aware datetime.datetime '
@@ -527,6 +528,7 @@ cpdef array_to_datetime(
527528
found_tz = True
528529
tz_out = val.tzinfo
529530
_ts = convert_datetime_to_tsobject(val, None)
531+
_ts.ensure_reso(NPY_FR_ns)
530532
iresult[i] = _ts.value
531533

532534
else:
@@ -535,7 +537,7 @@ cpdef array_to_datetime(
535537
raise ValueError('Cannot mix tz-aware with '
536538
'tz-naive values')
537539
if isinstance(val, _Timestamp):
538-
iresult[i] = val.value
540+
iresult[i] = val._as_unit("ns").value
539541
else:
540542
iresult[i] = pydatetime_to_dt64(val, &dts)
541543
check_dts_bounds(&dts)

pandas/_libs/tslibs/conversion.pxd

+3
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ cdef class _TSObject:
2020
int64_t value # numpy dt64
2121
tzinfo tzinfo
2222
bint fold
23+
NPY_DATETIMEUNIT reso
24+
25+
cdef void ensure_reso(self, NPY_DATETIMEUNIT reso)
2326

2427

2528
cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,

pandas/_libs/tslibs/conversion.pyx

+14-3
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,14 @@ import_datetime()
3131
from pandas._libs.tslibs.base cimport ABCTimestamp
3232
from pandas._libs.tslibs.dtypes cimport (
3333
abbrev_to_npy_unit,
34+
get_supported_reso,
3435
periods_per_second,
3536
)
3637
from pandas._libs.tslibs.np_datetime cimport (
3738
NPY_DATETIMEUNIT,
3839
NPY_FR_ns,
3940
check_dts_bounds,
41+
convert_reso,
4042
get_datetime64_unit,
4143
get_datetime64_value,
4244
get_implementation_bounds,
@@ -204,10 +206,16 @@ cdef class _TSObject:
204206
# int64_t value # numpy dt64
205207
# tzinfo tzinfo
206208
# bint fold
209+
# NPY_DATETIMEUNIT reso
207210

208211
def __cinit__(self):
209212
# GH 25057. As per PEP 495, set fold to 0 by default
210213
self.fold = 0
214+
self.reso = NPY_FR_ns # default value
215+
216+
cdef void ensure_reso(self, NPY_DATETIMEUNIT reso):
217+
if self.reso != reso:
218+
self.value = convert_reso(self.value, self.reso, reso, False)
211219

212220

213221
cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
@@ -228,6 +236,7 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
228236
"""
229237
cdef:
230238
_TSObject obj
239+
NPY_DATETIMEUNIT reso
231240

232241
obj = _TSObject()
233242

@@ -237,9 +246,11 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
237246
if ts is None or ts is NaT:
238247
obj.value = NPY_NAT
239248
elif is_datetime64_object(ts):
240-
obj.value = get_datetime64_nanos(ts, NPY_FR_ns)
249+
reso = get_supported_reso(get_datetime64_unit(ts))
250+
obj.reso = reso
251+
obj.value = get_datetime64_nanos(ts, reso)
241252
if obj.value != NPY_NAT:
242-
pandas_datetime_to_datetimestruct(obj.value, NPY_FR_ns, &obj.dts)
253+
pandas_datetime_to_datetimestruct(obj.value, reso, &obj.dts)
243254
elif is_integer_object(ts):
244255
try:
245256
ts = <int64_t>ts
@@ -295,7 +306,7 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit,
295306
raise TypeError(f'Cannot convert input [{ts}] of type {type(ts)} to '
296307
f'Timestamp')
297308

298-
maybe_localize_tso(obj, tz, NPY_FR_ns)
309+
maybe_localize_tso(obj, tz, obj.reso)
299310
return obj
300311

301312

pandas/_libs/tslibs/timestamps.pyx

+13-50
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,6 @@ from pandas._libs.tslibs.util cimport (
6565
is_array,
6666
is_datetime64_object,
6767
is_integer_object,
68-
is_timedelta64_object,
6968
)
7069

7170
from pandas._libs.tslibs.fields import (
@@ -107,7 +106,6 @@ from pandas._libs.tslibs.offsets cimport (
107106
from pandas._libs.tslibs.timedeltas cimport (
108107
_Timedelta,
109108
delta_to_nanoseconds,
110-
ensure_td64ns,
111109
is_any_td_scalar,
112110
)
113111

@@ -282,6 +280,7 @@ cdef class _Timestamp(ABCTimestamp):
282280
)
283281

284282
obj.value = value
283+
obj.reso = reso
285284
pandas_datetime_to_datetimestruct(value, reso, &obj.dts)
286285
maybe_localize_tso(obj, tz, reso)
287286

@@ -432,62 +431,26 @@ cdef class _Timestamp(ABCTimestamp):
432431
int64_t nanos = 0
433432

434433
if is_any_td_scalar(other):
435-
if is_timedelta64_object(other):
436-
other_reso = get_datetime64_unit(other)
437-
if (
438-
other_reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC
439-
):
440-
# TODO: deprecate allowing this? We only get here
441-
# with test_timedelta_add_timestamp_interval
442-
other = np.timedelta64(other.view("i8"), "ns")
443-
other_reso = NPY_DATETIMEUNIT.NPY_FR_ns
444-
elif (
445-
other_reso == NPY_DATETIMEUNIT.NPY_FR_Y or other_reso == NPY_DATETIMEUNIT.NPY_FR_M
446-
):
447-
# TODO: deprecate allowing these? or handle more like the
448-
# corresponding DateOffsets?
449-
# TODO: no tests get here
450-
other = ensure_td64ns(other)
451-
other_reso = NPY_DATETIMEUNIT.NPY_FR_ns
452-
453-
if other_reso > NPY_DATETIMEUNIT.NPY_FR_ns:
454-
# TODO: no tests
455-
other = ensure_td64ns(other)
456-
if other_reso > self._reso:
457-
# Following numpy, we cast to the higher resolution
458-
# test_sub_timedelta64_mismatched_reso
459-
self = (<_Timestamp>self)._as_reso(other_reso)
460-
461-
462-
if isinstance(other, _Timedelta):
463-
# TODO: share this with __sub__, Timedelta.__add__
464-
# Matching numpy, we cast to the higher resolution. Unlike numpy,
465-
# we raise instead of silently overflowing during this casting.
466-
if self._reso < other._reso:
467-
self = (<_Timestamp>self)._as_reso(other._reso, round_ok=True)
468-
elif self._reso > other._reso:
469-
other = (<_Timedelta>other)._as_reso(self._reso, round_ok=True)
434+
other = Timedelta(other)
470435

471-
try:
472-
nanos = delta_to_nanoseconds(
473-
other, reso=self._reso, round_ok=False
474-
)
475-
except OutOfBoundsTimedelta:
476-
raise
436+
# TODO: share this with __sub__, Timedelta.__add__
437+
# Matching numpy, we cast to the higher resolution. Unlike numpy,
438+
# we raise instead of silently overflowing during this casting.
439+
if self._reso < other._reso:
440+
self = (<_Timestamp>self)._as_reso(other._reso, round_ok=True)
441+
elif self._reso > other._reso:
442+
other = (<_Timedelta>other)._as_reso(self._reso, round_ok=True)
477443

478-
try:
479-
new_value = self.value + nanos
480-
except OverflowError:
481-
# Use Python ints
482-
# Hit in test_tdi_add_overflow
483-
new_value = int(self.value) + int(nanos)
444+
nanos = other.value
484445

485446
try:
447+
new_value = self.value + nanos
486448
result = type(self)._from_value_and_reso(
487449
new_value, reso=self._reso, tz=self.tzinfo
488450
)
489451
except OverflowError as err:
490452
# TODO: don't hard-code nanosecond here
453+
new_value = int(self.value) + int(nanos)
491454
raise OutOfBoundsDatetime(
492455
f"Out of bounds nanosecond timestamp: {new_value}"
493456
) from err
@@ -1713,7 +1676,7 @@ class Timestamp(_Timestamp):
17131676
if not is_offset_object(freq):
17141677
freq = to_offset(freq)
17151678

1716-
return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq, ts.fold)
1679+
return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq, ts.fold, ts.reso)
17171680

17181681
def _round(self, freq, mode, ambiguous='raise', nonexistent='raise'):
17191682
cdef:

pandas/tests/frame/test_constructors.py

+22-2
Original file line numberDiff line numberDiff line change
@@ -3113,14 +3113,34 @@ def test_from_scalar_datetimelike_mismatched(self, constructor, cls):
31133113
with pytest.raises(TypeError, match=msg):
31143114
constructor(scalar, dtype=dtype)
31153115

3116+
@pytest.mark.xfail(
3117+
reason="Timestamp constructor has been updated to cast dt64 to non-nano, "
3118+
"but DatetimeArray._from_sequence has not"
3119+
)
31163120
@pytest.mark.parametrize("cls", [datetime, np.datetime64])
3117-
def test_from_out_of_bounds_datetime(self, constructor, cls):
3121+
def test_from_out_of_ns_bounds_datetime(self, constructor, cls, request):
3122+
# scalar that won't fit in nanosecond dt64, but will fit in microsecond
31183123
scalar = datetime(9999, 1, 1)
3124+
exp_dtype = "M8[us]" # smallest reso that fits
31193125
if cls is np.datetime64:
31203126
scalar = np.datetime64(scalar, "D")
3127+
exp_dtype = "M8[s]" # closest reso to input
31213128
result = constructor(scalar)
31223129

3123-
assert type(get1(result)) is cls
3130+
item = get1(result)
3131+
dtype = result.dtype if isinstance(result, Series) else result.dtypes.iloc[0]
3132+
3133+
assert type(item) is Timestamp
3134+
assert item.asm8.dtype == exp_dtype
3135+
assert dtype == exp_dtype
3136+
3137+
def test_out_of_s_bounds_datetime64(self, constructor):
3138+
scalar = np.datetime64(np.iinfo(np.int64).max, "D")
3139+
result = constructor(scalar)
3140+
item = get1(result)
3141+
assert type(item) is np.datetime64
3142+
dtype = result.dtype if isinstance(result, Series) else result.dtypes.iloc[0]
3143+
assert dtype == object
31243144

31253145
@pytest.mark.xfail(
31263146
reason="TimedeltaArray constructor has been updated to cast td64 to non-nano, "

pandas/tests/scalar/timedelta/test_arithmetic.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,11 @@ def test_td_add_datetimelike_scalar(self, op):
9999
assert result is NaT
100100

101101
def test_td_add_timestamp_overflow(self):
102-
msg = "Cannot cast 259987 from D to 'ns' without overflow"
102+
msg = "Cannot cast 259987 from D to 'ns' without overflow."
103103
with pytest.raises(OutOfBoundsTimedelta, match=msg):
104104
Timestamp("1700-01-01") + Timedelta(13 * 19999, unit="D")
105105

106-
msg = "Cannot cast 259987 days, 0:00:00 to unit=ns without overflow"
106+
msg = "Cannot cast 259987 days 00:00:00 to unit='ns' without overflow"
107107
with pytest.raises(OutOfBoundsTimedelta, match=msg):
108108
Timestamp("1700-01-01") + timedelta(days=13 * 19999)
109109

pandas/tests/scalar/timestamp/test_arithmetic.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def test_overflow_offset_raises(self):
4545
r"\<-?\d+ \* Days\> and \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} "
4646
"will overflow"
4747
)
48-
lmsg2 = r"Cannot cast <-?20169940 \* Days> to unit=ns without overflow"
48+
lmsg2 = r"Cannot cast -?20169940 days \+?00:00:00 to unit='ns' without overflow"
4949

5050
with pytest.raises(OutOfBoundsTimedelta, match=lmsg2):
5151
stamp + offset_overflow
@@ -62,7 +62,9 @@ def test_overflow_offset_raises(self):
6262
stamp = Timestamp("2000/1/1")
6363
offset_overflow = to_offset("D") * 100**5
6464

65-
lmsg3 = r"Cannot cast <-?10000000000 \* Days> to unit=ns without overflow"
65+
lmsg3 = (
66+
r"Cannot cast -?10000000000 days \+?00:00:00 to unit='ns' without overflow"
67+
)
6668
with pytest.raises(OutOfBoundsTimedelta, match=lmsg3):
6769
stamp + offset_overflow
6870

pandas/tests/scalar/timestamp/test_constructors.py

+29-3
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import pytest
1212
import pytz
1313

14+
from pandas._libs.tslibs.dtypes import NpyDatetimeUnit
1415
from pandas.compat import PY310
1516
from pandas.errors import OutOfBoundsDatetime
1617

@@ -455,14 +456,26 @@ def test_out_of_bounds_value(self):
455456
Timestamp(min_ts_us)
456457
Timestamp(max_ts_us)
457458

459+
# We used to raise on these before supporting non-nano
460+
us_val = NpyDatetimeUnit.NPY_FR_us.value
461+
assert Timestamp(min_ts_us - one_us)._reso == us_val
462+
assert Timestamp(max_ts_us + one_us)._reso == us_val
463+
464+
# https://github.com/numpy/numpy/issues/22346 for why
465+
# we can't use the same construction as above with minute resolution
466+
467+
# too_low, too_high are the _just_ outside the range of M8[s]
468+
too_low = np.datetime64("-292277022657-01-27T08:29", "m")
469+
too_high = np.datetime64("292277026596-12-04T15:31", "m")
470+
458471
msg = "Out of bounds"
459472
# One us less than the minimum is an error
460473
with pytest.raises(ValueError, match=msg):
461-
Timestamp(min_ts_us - one_us)
474+
Timestamp(too_low)
462475

463476
# One us more than the maximum is an error
464477
with pytest.raises(ValueError, match=msg):
465-
Timestamp(max_ts_us + one_us)
478+
Timestamp(too_high)
466479

467480
def test_out_of_bounds_string(self):
468481
msg = "Out of bounds"
@@ -487,7 +500,20 @@ def test_bounds_with_different_units(self):
487500
for date_string in out_of_bounds_dates:
488501
for unit in time_units:
489502
dt64 = np.datetime64(date_string, unit)
490-
msg = "Out of bounds"
503+
ts = Timestamp(dt64)
504+
if unit in ["s", "ms", "us"]:
505+
# We can preserve the input unit
506+
assert ts.value == dt64.view("i8")
507+
else:
508+
# we chose the closest unit that we _do_ support
509+
assert ts._reso == NpyDatetimeUnit.NPY_FR_s.value
510+
511+
# With more extreme cases, we can't even fit inside second resolution
512+
info = np.iinfo(np.int64)
513+
msg = "Out of bounds nanosecond timestamp:"
514+
for value in [info.min + 1, info.max]:
515+
for unit in ["D", "h", "m"]:
516+
dt64 = np.datetime64(value, unit)
491517
with pytest.raises(OutOfBoundsDatetime, match=msg):
492518
Timestamp(dt64)
493519

pandas/tests/scalar/timestamp/test_timestamp.py

+2-7
Original file line numberDiff line numberDiff line change
@@ -826,7 +826,7 @@ def test_cmp_cross_reso(self):
826826

827827
# subtracting 3600*24 gives a datetime64 that _can_ fit inside the
828828
# nanosecond implementation bounds.
829-
other = Timestamp(dt64 - 3600 * 24)
829+
other = Timestamp(dt64 - 3600 * 24)._as_unit("ns")
830830
assert other < ts
831831
assert other.asm8 > ts.asm8 # <- numpy gets this wrong
832832
assert ts > other
@@ -884,12 +884,7 @@ def test_to_period(self, dt64, ts):
884884
)
885885
def test_addsub_timedeltalike_non_nano(self, dt64, ts, td):
886886

887-
if isinstance(td, Timedelta):
888-
# td._reso is ns
889-
exp_reso = td._reso
890-
else:
891-
# effective td._reso is s
892-
exp_reso = ts._reso
887+
exp_reso = max(ts._reso, Timedelta(td)._reso)
893888

894889
result = ts - td
895890
expected = Timestamp(dt64) - td

0 commit comments

Comments
 (0)