Skip to content

Commit 9f5c8b9

Browse files
authored
BUG: PeriodIndex + TimedeltaArray-with-NaT (#47783)
* BUG: PeriodIndex-with-Nat + TimedeltaArray * mypy fixup * de-kludge
1 parent 9bf1e96 commit 9f5c8b9

File tree

11 files changed

+126
-25
lines changed

11 files changed

+126
-25
lines changed

pandas/_libs/tslibs/dtypes.pxd

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ from numpy cimport int64_t
33
from pandas._libs.tslibs.np_datetime cimport NPY_DATETIMEUNIT
44

55

6-
cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit)
6+
cpdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit)
77
cdef NPY_DATETIMEUNIT abbrev_to_npy_unit(str abbrev)
88
cdef NPY_DATETIMEUNIT freq_group_code_to_npy_unit(int freq) nogil
99
cpdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=*) except? -1

pandas/_libs/tslibs/dtypes.pyi

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ _period_code_map: dict[str, int]
88
def periods_per_day(reso: int) -> int: ...
99
def periods_per_second(reso: int) -> int: ...
1010
def is_supported_unit(reso: int) -> bool: ...
11+
def npy_unit_to_abbrev(reso: int) -> str: ...
1112

1213
class PeriodDtypeBase:
1314
_dtype_code: int # PeriodDtypeCode

pandas/_libs/tslibs/dtypes.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ def is_supported_unit(NPY_DATETIMEUNIT reso):
289289
)
290290

291291

292-
cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit):
292+
cpdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit):
293293
if unit == NPY_DATETIMEUNIT.NPY_FR_ns or unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
294294
# generic -> default to nanoseconds
295295
return "ns"

pandas/_libs/tslibs/np_datetime.pxd

+1
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ cpdef cnp.ndarray astype_overflowsafe(
101101
cnp.ndarray values, # ndarray[datetime64[anyunit]]
102102
cnp.dtype dtype, # ndarray[datetime64[anyunit]]
103103
bint copy=*,
104+
bint round_ok=*,
104105
)
105106
cdef int64_t get_conversion_factor(NPY_DATETIMEUNIT from_unit, NPY_DATETIMEUNIT to_unit) except? -1
106107

pandas/_libs/tslibs/np_datetime.pyi

+4-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@ class OutOfBoundsTimedelta(ValueError): ...
99
def py_get_unit_from_dtype(dtype: np.dtype): ...
1010
def py_td64_to_tdstruct(td64: int, unit: int) -> dict: ...
1111
def astype_overflowsafe(
12-
arr: np.ndarray, dtype: np.dtype, copy: bool = ...
12+
arr: np.ndarray,
13+
dtype: np.dtype,
14+
copy: bool = ...,
15+
round_ok: bool = ...,
1316
) -> np.ndarray: ...
1417
def is_unitless(dtype: np.dtype) -> bool: ...
1518
def compare_mismatched_resolutions(

pandas/_libs/tslibs/np_datetime.pyx

+62-10
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,7 @@ cpdef ndarray astype_overflowsafe(
282282
ndarray values,
283283
cnp.dtype dtype,
284284
bint copy=True,
285+
bint round_ok=True,
285286
):
286287
"""
287288
Convert an ndarray with datetime64[X] to datetime64[Y]
@@ -314,20 +315,24 @@ cpdef ndarray astype_overflowsafe(
314315
"datetime64/timedelta64 values and dtype must have a unit specified"
315316
)
316317

317-
if (<object>values).dtype.byteorder == ">":
318-
# GH#29684 we incorrectly get OutOfBoundsDatetime if we dont swap
319-
values = values.astype(values.dtype.newbyteorder("<"))
320-
321318
if from_unit == to_unit:
322319
# Check this before allocating result for perf, might save some memory
323320
if copy:
324321
return values.copy()
325322
return values
326323

327324
elif from_unit > to_unit:
328-
# e.g. ns -> us, so there is no risk of overflow, so we can use
329-
# numpy's astype safely. Note there _is_ risk of truncation.
330-
return values.astype(dtype)
325+
if round_ok:
326+
# e.g. ns -> us, so there is no risk of overflow, so we can use
327+
# numpy's astype safely. Note there _is_ risk of truncation.
328+
return values.astype(dtype)
329+
else:
330+
iresult2 = astype_round_check(values.view("i8"), from_unit, to_unit)
331+
return iresult2.view(dtype)
332+
333+
if (<object>values).dtype.byteorder == ">":
334+
# GH#29684 we incorrectly get OutOfBoundsDatetime if we dont swap
335+
values = values.astype(values.dtype.newbyteorder("<"))
331336

332337
cdef:
333338
ndarray i8values = values.view("i8")
@@ -356,10 +361,11 @@ cpdef ndarray astype_overflowsafe(
356361
check_dts_bounds(&dts, to_unit)
357362
except OutOfBoundsDatetime as err:
358363
if is_td:
359-
tdval = np.timedelta64(value).view(values.dtype)
364+
from_abbrev = np.datetime_data(values.dtype)[0]
365+
np_val = np.timedelta64(value, from_abbrev)
360366
msg = (
361-
"Cannot convert {tdval} to {dtype} without overflow"
362-
.format(tdval=str(tdval), dtype=str(dtype))
367+
"Cannot convert {np_val} to {dtype} without overflow"
368+
.format(np_val=str(np_val), dtype=str(dtype))
363369
)
364370
raise OutOfBoundsTimedelta(msg) from err
365371
else:
@@ -453,6 +459,52 @@ cdef int op_to_op_code(op):
453459
return Py_GT
454460

455461

462+
cdef ndarray astype_round_check(
463+
ndarray i8values,
464+
NPY_DATETIMEUNIT from_unit,
465+
NPY_DATETIMEUNIT to_unit
466+
):
467+
# cases with from_unit > to_unit, e.g. ns->us, raise if the conversion
468+
# involves truncation, e.g. 1500ns->1us
469+
cdef:
470+
Py_ssize_t i, N = i8values.size
471+
472+
# equiv: iresult = np.empty((<object>i8values).shape, dtype="i8")
473+
ndarray iresult = cnp.PyArray_EMPTY(
474+
i8values.ndim, i8values.shape, cnp.NPY_INT64, 0
475+
)
476+
cnp.broadcast mi = cnp.PyArray_MultiIterNew2(iresult, i8values)
477+
478+
# Note the arguments to_unit, from unit are swapped vs how they
479+
# are passed when going to a higher-frequency reso.
480+
int64_t mult = get_conversion_factor(to_unit, from_unit)
481+
int64_t value, mod
482+
483+
for i in range(N):
484+
# Analogous to: item = i8values[i]
485+
value = (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
486+
487+
if value == NPY_DATETIME_NAT:
488+
new_value = NPY_DATETIME_NAT
489+
else:
490+
new_value, mod = divmod(value, mult)
491+
if mod != 0:
492+
# TODO: avoid runtime import
493+
from pandas._libs.tslibs.dtypes import npy_unit_to_abbrev
494+
from_abbrev = npy_unit_to_abbrev(from_unit)
495+
to_abbrev = npy_unit_to_abbrev(to_unit)
496+
raise ValueError(
497+
f"Cannot losslessly cast '{value} {from_abbrev}' to {to_abbrev}"
498+
)
499+
500+
# Analogous to: iresult[i] = new_value
501+
(<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = new_value
502+
503+
cnp.PyArray_MultiIter_NEXT(mi)
504+
505+
return iresult
506+
507+
456508
@cython.overflowcheck(True)
457509
cdef int64_t get_conversion_factor(NPY_DATETIMEUNIT from_unit, NPY_DATETIMEUNIT to_unit) except? -1:
458510
"""

pandas/_libs/tslibs/offsets.pyi

+2
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,8 @@ def to_offset(freq: timedelta | str) -> BaseOffset: ...
111111

112112
class Tick(SingleConstructorOffset):
113113
_reso: int
114+
_prefix: str
115+
_td64_unit: str
114116
def __init__(self, n: int = ..., normalize: bool = ...) -> None: ...
115117
@property
116118
def delta(self) -> Timedelta: ...

pandas/_libs/tslibs/offsets.pyx

+8
Original file line numberDiff line numberDiff line change
@@ -796,6 +796,7 @@ cdef class SingleConstructorOffset(BaseOffset):
796796
cdef class Tick(SingleConstructorOffset):
797797
_adjust_dst = False
798798
_prefix = "undefined"
799+
_td64_unit = "undefined"
799800
_attributes = tuple(["n", "normalize"])
800801

801802
def __init__(self, n=1, normalize=False):
@@ -968,48 +969,55 @@ cdef class Tick(SingleConstructorOffset):
968969
cdef class Day(Tick):
969970
_nanos_inc = 24 * 3600 * 1_000_000_000
970971
_prefix = "D"
972+
_td64_unit = "D"
971973
_period_dtype_code = PeriodDtypeCode.D
972974
_reso = NPY_DATETIMEUNIT.NPY_FR_D
973975

974976

975977
cdef class Hour(Tick):
976978
_nanos_inc = 3600 * 1_000_000_000
977979
_prefix = "H"
980+
_td64_unit = "h"
978981
_period_dtype_code = PeriodDtypeCode.H
979982
_reso = NPY_DATETIMEUNIT.NPY_FR_h
980983

981984

982985
cdef class Minute(Tick):
983986
_nanos_inc = 60 * 1_000_000_000
984987
_prefix = "T"
988+
_td64_unit = "m"
985989
_period_dtype_code = PeriodDtypeCode.T
986990
_reso = NPY_DATETIMEUNIT.NPY_FR_m
987991

988992

989993
cdef class Second(Tick):
990994
_nanos_inc = 1_000_000_000
991995
_prefix = "S"
996+
_td64_unit = "s"
992997
_period_dtype_code = PeriodDtypeCode.S
993998
_reso = NPY_DATETIMEUNIT.NPY_FR_s
994999

9951000

9961001
cdef class Milli(Tick):
9971002
_nanos_inc = 1_000_000
9981003
_prefix = "L"
1004+
_td64_unit = "ms"
9991005
_period_dtype_code = PeriodDtypeCode.L
10001006
_reso = NPY_DATETIMEUNIT.NPY_FR_ms
10011007

10021008

10031009
cdef class Micro(Tick):
10041010
_nanos_inc = 1000
10051011
_prefix = "U"
1012+
_td64_unit = "us"
10061013
_period_dtype_code = PeriodDtypeCode.U
10071014
_reso = NPY_DATETIMEUNIT.NPY_FR_us
10081015

10091016

10101017
cdef class Nano(Tick):
10111018
_nanos_inc = 1
10121019
_prefix = "N"
1020+
_td64_unit = "ns"
10131021
_period_dtype_code = PeriodDtypeCode.N
10141022
_reso = NPY_DATETIMEUNIT.NPY_FR_ns
10151023

pandas/core/arrays/period.py

+19-12
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,7 @@
7272
ABCSeries,
7373
ABCTimedeltaArray,
7474
)
75-
from pandas.core.dtypes.missing import (
76-
isna,
77-
notna,
78-
)
75+
from pandas.core.dtypes.missing import notna
7976

8077
import pandas.core.algorithms as algos
8178
from pandas.core.arrays import datetimelike as dtl
@@ -792,20 +789,30 @@ def _add_timedelta_arraylike(
792789
-------
793790
result : ndarray[int64]
794791
"""
795-
if not isinstance(self.freq, Tick):
792+
freq = self.freq
793+
if not isinstance(freq, Tick):
796794
# We cannot add timedelta-like to non-tick PeriodArray
797795
raise TypeError(
798796
f"Cannot add or subtract timedelta64[ns] dtype from {self.dtype}"
799797
)
800798

801-
if not np.all(isna(other)):
802-
delta = self._check_timedeltalike_freq_compat(other)
803-
else:
804-
# all-NaT TimedeltaIndex is equivalent to a single scalar td64 NaT
805-
return self + np.timedelta64("NaT")
799+
dtype = np.dtype(f"m8[{freq._td64_unit}]")
800+
801+
try:
802+
delta = astype_overflowsafe(
803+
np.asarray(other), dtype=dtype, copy=False, round_ok=False
804+
)
805+
except ValueError as err:
806+
# TODO: not actually a great exception message in this case
807+
raise raise_on_incompatible(self, other) from err
808+
809+
b_mask = np.isnat(delta)
806810

807-
ordinals = self._addsub_int_array_or_scalar(delta, operator.add).asi8
808-
return type(self)(ordinals, dtype=self.dtype)
811+
res_values = algos.checked_add_with_arr(
812+
self.asi8, delta.view("i8"), arr_mask=self._isnan, b_mask=b_mask
813+
)
814+
np.putmask(res_values, self._isnan | b_mask, iNaT)
815+
return type(self)(res_values, freq=self.freq)
809816

810817
def _check_timedeltalike_freq_compat(self, other):
811818
"""

pandas/tests/arithmetic/test_period.py

+15
Original file line numberDiff line numberDiff line change
@@ -1243,6 +1243,21 @@ def test_parr_add_sub_tdt64_nat_array(self, box_with_array, other):
12431243
with pytest.raises(TypeError, match=msg):
12441244
other - obj
12451245

1246+
# some but not *all* NaT
1247+
other = other.copy()
1248+
other[0] = np.timedelta64(0, "ns")
1249+
expected = PeriodIndex([pi[0]] + ["NaT"] * 8, freq="19D")
1250+
expected = tm.box_expected(expected, box_with_array)
1251+
1252+
result = obj + other
1253+
tm.assert_equal(result, expected)
1254+
result = other + obj
1255+
tm.assert_equal(result, expected)
1256+
result = obj - other
1257+
tm.assert_equal(result, expected)
1258+
with pytest.raises(TypeError, match=msg):
1259+
other - obj
1260+
12461261
# ---------------------------------------------------------------
12471262
# Unsorted
12481263

pandas/tests/tslibs/test_np_datetime.py

+12
Original file line numberDiff line numberDiff line change
@@ -208,3 +208,15 @@ def test_astype_overflowsafe_td64(self):
208208
result = astype_overflowsafe(arr, dtype2)
209209
expected = arr.astype(dtype2)
210210
tm.assert_numpy_array_equal(result, expected)
211+
212+
def test_astype_overflowsafe_disallow_rounding(self):
213+
arr = np.array([-1500, 1500], dtype="M8[ns]")
214+
dtype = np.dtype("M8[us]")
215+
216+
msg = "Cannot losslessly cast '-1500 ns' to us"
217+
with pytest.raises(ValueError, match=msg):
218+
astype_overflowsafe(arr, dtype, round_ok=False)
219+
220+
result = astype_overflowsafe(arr, dtype, round_ok=True)
221+
expected = arr.astype(dtype)
222+
tm.assert_numpy_array_equal(result, expected)

0 commit comments

Comments
 (0)