Skip to content

Commit 729d17c

Browse files
authored
astype_overflowsafe handle timedelta64 (#47110)
* astype_overflowsafe handle timedelta64 * mypy fixup * update exception messages, share more * troubleshoot min-version build
1 parent b6a058b commit 729d17c

File tree

8 files changed

+91
-52
lines changed

8 files changed

+91
-52
lines changed

pandas/_libs/tslibs/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
"BaseOffset",
2727
"tz_compare",
2828
"is_unitless",
29+
"astype_overflowsafe",
2930
"get_unit_from_dtype",
3031
"periods_per_day",
3132
]
@@ -45,6 +46,7 @@
4546
from pandas._libs.tslibs.np_datetime import (
4647
OutOfBoundsDatetime,
4748
OutOfBoundsTimedelta,
49+
astype_overflowsafe,
4850
is_unitless,
4951
py_get_unit_from_dtype as get_unit_from_dtype,
5052
)

pandas/_libs/tslibs/conversion.pyx

+1-33
Original file line numberDiff line numberDiff line change
@@ -193,8 +193,6 @@ cdef inline int64_t get_datetime64_nanos(object val) except? -1:
193193
return ival
194194

195195

196-
@cython.boundscheck(False)
197-
@cython.wraparound(False)
198196
def ensure_datetime64ns(arr: ndarray, copy: bool = True):
199197
"""
200198
Ensure a np.datetime64 array has dtype specifically 'datetime64[ns]'
@@ -213,14 +211,6 @@ def ensure_datetime64ns(arr: ndarray, copy: bool = True):
213211
dtype = arr.dtype
214212
arr = arr.astype(dtype.newbyteorder("<"))
215213

216-
if arr.size == 0:
217-
# Fastpath; doesn't matter but we have old tests for result.base
218-
# being arr.
219-
result = arr.view(DT64NS_DTYPE)
220-
if copy:
221-
result = result.copy()
222-
return result
223-
224214
return astype_overflowsafe(arr, DT64NS_DTYPE, copy=copy)
225215

226216

@@ -239,29 +229,7 @@ def ensure_timedelta64ns(arr: ndarray, copy: bool = True):
239229
"""
240230
assert arr.dtype.kind == "m", arr.dtype
241231

242-
if arr.dtype == TD64NS_DTYPE:
243-
return arr.copy() if copy else arr
244-
245-
# Re-use the datetime64 machinery to do an overflow-safe `astype`
246-
dtype = arr.dtype.str.replace("m8", "M8")
247-
dummy = arr.view(dtype)
248-
try:
249-
dt64_result = ensure_datetime64ns(dummy, copy)
250-
except OutOfBoundsDatetime as err:
251-
# Re-write the exception in terms of timedelta64 instead of dt64
252-
253-
# Find the value that we are going to report as causing an overflow
254-
tdmin = arr.min()
255-
tdmax = arr.max()
256-
if np.abs(tdmin) >= np.abs(tdmax):
257-
bad_val = tdmin
258-
else:
259-
bad_val = tdmax
260-
261-
msg = f"Out of bounds for nanosecond {arr.dtype.name} {str(bad_val)}"
262-
raise OutOfBoundsTimedelta(msg)
263-
264-
return dt64_result.view(TD64NS_DTYPE)
232+
return astype_overflowsafe(arr, dtype=TD64NS_DTYPE, copy=copy)
265233

266234

267235
# ----------------------------------------------------------------------

pandas/_libs/tslibs/np_datetime.pyx

+38-8
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ from cpython.object cimport (
1919

2020
import_datetime()
2121

22+
import numpy as np
2223
cimport numpy as cnp
2324

2425
cnp.import_array()
@@ -288,13 +289,21 @@ cpdef ndarray astype_overflowsafe(
288289
bint copy=True,
289290
):
290291
"""
291-
Convert an ndarray with datetime64[X] to datetime64[Y], raising on overflow.
292+
Convert an ndarray with datetime64[X] to datetime64[Y]
293+
or timedelta64[X] to timedelta64[Y],
294+
raising on overflow.
292295
"""
293-
if values.descr.type_num != cnp.NPY_DATETIME:
294-
# aka values.dtype.kind != "M"
295-
raise TypeError("astype_overflowsafe values must have datetime64 dtype")
296-
if dtype.type_num != cnp.NPY_DATETIME:
297-
raise TypeError("astype_overflowsafe dtype must be datetime64")
296+
if values.descr.type_num == dtype.type_num == cnp.NPY_DATETIME:
297+
# i.e. dtype.kind == "M"
298+
pass
299+
elif values.descr.type_num == dtype.type_num == cnp.NPY_TIMEDELTA:
300+
# i.e. dtype.kind == "m"
301+
pass
302+
else:
303+
raise TypeError(
304+
"astype_overflowsafe values.dtype and dtype must be either "
305+
"both-datetime64 or both-timedelta64."
306+
)
298307

299308
cdef:
300309
NPY_DATETIMEUNIT from_unit = get_unit_from_dtype(values.dtype)
@@ -306,14 +315,21 @@ cpdef ndarray astype_overflowsafe(
306315
):
307316
# without raising explicitly here, we end up with a SystemError
308317
# built-in function [...] returned a result with an error
309-
raise ValueError("datetime64 values and dtype must have a unit specified")
318+
raise ValueError(
319+
"datetime64/timedelta64 values and dtype must have a unit specified"
320+
)
310321

311322
if from_unit == to_unit:
312323
# Check this before allocating result for perf, might save some memory
313324
if copy:
314325
return values.copy()
315326
return values
316327

328+
elif from_unit > to_unit:
329+
# e.g. ns -> us, so there is no risk of overflow, so we can use
330+
# numpy's astype safely. Note there _is_ risk of truncation.
331+
return values.astype(dtype)
332+
317333
cdef:
318334
ndarray i8values = values.view("i8")
319335

@@ -326,6 +342,7 @@ cpdef ndarray astype_overflowsafe(
326342
Py_ssize_t i, N = values.size
327343
int64_t value, new_value
328344
npy_datetimestruct dts
345+
bint is_td = dtype.type_num == cnp.NPY_TIMEDELTA
329346

330347
for i in range(N):
331348
# Analogous to: item = values[i]
@@ -335,7 +352,20 @@ cpdef ndarray astype_overflowsafe(
335352
new_value = NPY_DATETIME_NAT
336353
else:
337354
pandas_datetime_to_datetimestruct(value, from_unit, &dts)
338-
check_dts_bounds(&dts, to_unit)
355+
356+
try:
357+
check_dts_bounds(&dts, to_unit)
358+
except OutOfBoundsDatetime as err:
359+
if is_td:
360+
tdval = np.timedelta64(value).view(values.dtype)
361+
msg = (
362+
"Cannot convert {tdval} to {dtype} without overflow"
363+
.format(tdval=str(tdval), dtype=str(dtype))
364+
)
365+
raise OutOfBoundsTimedelta(msg) from err
366+
else:
367+
raise
368+
339369
new_value = npy_datetimestruct_to_datetime(to_unit, &dts)
340370

341371
# Analogous to: iresult[i] = new_value

pandas/core/arrays/period.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
NaT,
2020
NaTType,
2121
Timedelta,
22+
astype_overflowsafe,
2223
delta_to_nanoseconds,
2324
dt64arr_to_periodarr as c_dt64arr_to_periodarr,
2425
iNaT,
@@ -864,11 +865,10 @@ def _check_timedeltalike_freq_compat(self, other):
864865
elif isinstance(other, np.ndarray):
865866
# numpy timedelta64 array; all entries must be compatible
866867
assert other.dtype.kind == "m"
867-
if other.dtype != TD64NS_DTYPE:
868-
# i.e. non-nano unit
869-
# TODO: disallow unit-less timedelta64
870-
other = other.astype(TD64NS_DTYPE)
871-
nanos = other.view("i8")
868+
other = astype_overflowsafe(other, TD64NS_DTYPE, copy=False)
869+
# error: Incompatible types in assignment (expression has type
870+
# "ndarray[Any, dtype[Any]]", variable has type "int")
871+
nanos = other.view("i8") # type: ignore[assignment]
872872
else:
873873
# TimedeltaArray/Index
874874
nanos = other.asi8

pandas/tests/tools/test_to_timedelta.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,10 @@ def test_to_timedelta_units_dtypes(self, dtype, unit):
7474
def test_to_timedelta_oob_non_nano(self):
7575
arr = np.array([pd.NaT.value + 1], dtype="timedelta64[s]")
7676

77-
msg = r"Out of bounds for nanosecond timedelta64\[s\] -9223372036854775807"
77+
msg = (
78+
"Cannot convert -9223372036854775807 seconds to "
79+
r"timedelta64\[ns\] without overflow"
80+
)
7881
with pytest.raises(OutOfBoundsTimedelta, match=msg):
7982
to_timedelta(arr)
8083

pandas/tests/tslibs/test_api.py

+1
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ def test_namespace():
5151
"to_offset",
5252
"tz_compare",
5353
"is_unitless",
54+
"astype_overflowsafe",
5455
"get_unit_from_dtype",
5556
"periods_per_day",
5657
]

pandas/tests/tslibs/test_conversion.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,13 @@ def test_tz_convert_readonly():
107107
def test_length_zero_copy(dtype, copy):
108108
arr = np.array([], dtype=dtype)
109109
result = conversion.ensure_datetime64ns(arr, copy=copy)
110-
assert result.base is (None if copy else arr)
110+
if copy:
111+
assert not np.shares_memory(result, arr)
112+
else:
113+
if arr.dtype == result.dtype:
114+
assert result is arr
115+
else:
116+
assert not np.shares_memory(result, arr)
111117

112118

113119
def test_ensure_datetime64ns_bigendian():
@@ -121,7 +127,7 @@ def test_ensure_datetime64ns_bigendian():
121127

122128
def test_ensure_timedelta64ns_overflows():
123129
arr = np.arange(10).astype("m8[Y]") * 100
124-
msg = r"Out of bounds for nanosecond timedelta64\[Y\] 900"
130+
msg = r"Cannot convert 300 years to timedelta64\[ns\] without overflow"
125131
with pytest.raises(OutOfBoundsTimedelta, match=msg):
126132
conversion.ensure_timedelta64ns(arr)
127133

pandas/tests/tslibs/test_np_datetime.py

+32-3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
from pandas._libs.tslibs.np_datetime import (
55
OutOfBoundsDatetime,
6+
OutOfBoundsTimedelta,
67
astype_overflowsafe,
78
is_unitless,
89
py_get_unit_from_dtype,
@@ -139,7 +140,10 @@ def test_pass_non_dt64_array(self):
139140
arr = np.arange(5)
140141
dtype = np.dtype("M8[ns]")
141142

142-
msg = "astype_overflowsafe values must have datetime64 dtype"
143+
msg = (
144+
"astype_overflowsafe values.dtype and dtype must be either "
145+
"both-datetime64 or both-timedelta64"
146+
)
143147
with pytest.raises(TypeError, match=msg):
144148
astype_overflowsafe(arr, dtype, copy=True)
145149

@@ -151,14 +155,17 @@ def test_pass_non_dt64_dtype(self):
151155
arr = np.arange(5, dtype="i8").view("M8[D]")
152156
dtype = np.dtype("m8[ns]")
153157

154-
msg = "astype_overflowsafe dtype must be datetime64"
158+
msg = (
159+
"astype_overflowsafe values.dtype and dtype must be either "
160+
"both-datetime64 or both-timedelta64"
161+
)
155162
with pytest.raises(TypeError, match=msg):
156163
astype_overflowsafe(arr, dtype, copy=True)
157164

158165
with pytest.raises(TypeError, match=msg):
159166
astype_overflowsafe(arr, dtype, copy=False)
160167

161-
def test_astype_overflowsafe(self):
168+
def test_astype_overflowsafe_dt64(self):
162169
dtype = np.dtype("M8[ns]")
163170

164171
dt = np.datetime64("2262-04-05", "D")
@@ -178,3 +185,25 @@ def test_astype_overflowsafe(self):
178185
result = astype_overflowsafe(arr, dtype2)
179186
expected = arr.astype(dtype2)
180187
tm.assert_numpy_array_equal(result, expected)
188+
189+
def test_astype_overflowsafe_td64(self):
190+
dtype = np.dtype("m8[ns]")
191+
192+
dt = np.datetime64("2262-04-05", "D")
193+
arr = dt + np.arange(10, dtype="m8[D]")
194+
arr = arr.view("m8[D]")
195+
196+
# arr.astype silently overflows, so this
197+
wrong = arr.astype(dtype)
198+
roundtrip = wrong.astype(arr.dtype)
199+
assert not (wrong == roundtrip).all()
200+
201+
msg = r"Cannot convert 106752 days to timedelta64\[ns\] without overflow"
202+
with pytest.raises(OutOfBoundsTimedelta, match=msg):
203+
astype_overflowsafe(arr, dtype)
204+
205+
# But converting to microseconds is fine, and we match numpy's results.
206+
dtype2 = np.dtype("m8[us]")
207+
result = astype_overflowsafe(arr, dtype2)
208+
expected = arr.astype(dtype2)
209+
tm.assert_numpy_array_equal(result, expected)

0 commit comments

Comments
 (0)