Skip to content

Commit 5b525b4

Browse files
mroeschkejbrockmendelWillAydlithomas1
authored
BUG: dt.days for timedelta non-nano overflows int32 (#52391)
* BUG: dt.days for timedelta non-nano overflows int32 * Run precommit * lint * Address code check failures * Add whatsnew * PERF: numpy dtype checks (#52582) * CLN: Use #pragma once instead of include guards (#52635) Use #pragma once * Refactored custom datetime functions (#52634) refactored custom datetime functions * BLD: Add DLL hashes to RECORD (#52556) * CI: Remove ArrayManager job (#52637) * DOC: Remove notes to old Python/package versions (#52640) * STYLE sort whatsnew entries alphabeticaly, allow for trailing full stops (#52598) * allow for trailing full stops in sort-whatsnew-entries hook * sort alphabetically instead --------- Co-authored-by: MarcoGorelli <> * Fix redundant entries * remove redundant entries --------- Co-authored-by: jbrockmendel <[email protected]> Co-authored-by: William Ayd <[email protected]> Co-authored-by: Thomas Li <[email protected]>
1 parent f960825 commit 5b525b4

File tree

8 files changed

+69
-18
lines changed

8 files changed

+69
-18
lines changed

asv_bench/benchmarks/tslibs/fields.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
class TimeGetTimedeltaField:
1313
params = [
1414
_sizes,
15-
["days", "seconds", "microseconds", "nanoseconds"],
15+
["seconds", "microseconds", "nanoseconds"],
1616
]
1717
param_names = ["size", "field"]
1818

doc/source/whatsnew/v2.0.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Fixed regressions
2424

2525
Bug fixes
2626
~~~~~~~~~
27+
- Bug in :attr:`Series.dt.days` that would overflow ``int32`` number of days (:issue:`52391`)
2728
- Bug in :class:`arrays.DatetimeArray` constructor returning an incorrect unit when passed a non-nanosecond numpy datetime array (:issue:`52555`)
2829
- Bug in :func:`pandas.testing.assert_series_equal` where ``check_dtype=False`` would still raise for datetime or timedelta types with different resolutions (:issue:`52449`)
2930
- Bug in :func:`read_csv` casting PyArrow datetimes to NumPy when ``dtype_backend="pyarrow"`` and ``parse_dates`` is set causing a performance bottleneck in the process (:issue:`52546`)

pandas/_libs/tslibs/fields.pyi

+4
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ def get_timedelta_field(
3030
field: str,
3131
reso: int = ..., # NPY_DATETIMEUNIT
3232
) -> npt.NDArray[np.int32]: ...
33+
def get_timedelta_days(
34+
tdindex: npt.NDArray[np.int64], # const int64_t[:]
35+
reso: int = ..., # NPY_DATETIMEUNIT
36+
) -> npt.NDArray[np.int64]: ...
3337
def isleapyear_arr(
3438
years: np.ndarray,
3539
) -> npt.NDArray[np.bool_]: ...

pandas/_libs/tslibs/fields.pyx

+29-12
Original file line numberDiff line numberDiff line change
@@ -512,18 +512,7 @@ def get_timedelta_field(
512512

513513
out = np.empty(count, dtype="i4")
514514

515-
if field == "days":
516-
with nogil:
517-
for i in range(count):
518-
if tdindex[i] == NPY_NAT:
519-
out[i] = -1
520-
continue
521-
522-
pandas_timedelta_to_timedeltastruct(tdindex[i], reso, &tds)
523-
out[i] = tds.days
524-
return out
525-
526-
elif field == "seconds":
515+
if field == "seconds":
527516
with nogil:
528517
for i in range(count):
529518
if tdindex[i] == NPY_NAT:
@@ -559,6 +548,34 @@ def get_timedelta_field(
559548
raise ValueError(f"Field {field} not supported")
560549

561550

551+
@cython.wraparound(False)
552+
@cython.boundscheck(False)
553+
def get_timedelta_days(
554+
const int64_t[:] tdindex,
555+
NPY_DATETIMEUNIT reso=NPY_FR_ns,
556+
):
557+
"""
558+
Given a int64-based timedelta index, extract the days,
559+
field and return an array of these values.
560+
"""
561+
cdef:
562+
Py_ssize_t i, count = len(tdindex)
563+
ndarray[int64_t] out
564+
pandas_timedeltastruct tds
565+
566+
out = np.empty(count, dtype="i8")
567+
568+
with nogil:
569+
for i in range(count):
570+
if tdindex[i] == NPY_NAT:
571+
out[i] = -1
572+
continue
573+
574+
pandas_timedelta_to_timedeltastruct(tdindex[i], reso, &tds)
575+
out[i] = tds.days
576+
return out
577+
578+
562579
cpdef isleapyear_arr(ndarray years):
563580
"""vectorized version of isleapyear; NaT evaluates as False"""
564581
cdef:

pandas/core/arrays/timedeltas.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,10 @@
3030
to_offset,
3131
)
3232
from pandas._libs.tslibs.conversion import precision_from_unit
33-
from pandas._libs.tslibs.fields import get_timedelta_field
33+
from pandas._libs.tslibs.fields import (
34+
get_timedelta_days,
35+
get_timedelta_field,
36+
)
3437
from pandas._libs.tslibs.timedeltas import (
3538
array_to_timedelta64,
3639
floordiv_object_array,
@@ -81,7 +84,13 @@
8184
def _field_accessor(name: str, alias: str, docstring: str):
8285
def f(self) -> np.ndarray:
8386
values = self.asi8
84-
result = get_timedelta_field(values, alias, reso=self._creso)
87+
if alias == "days":
88+
result = get_timedelta_days(values, reso=self._creso)
89+
else:
90+
# error: Incompatible types in assignment (
91+
# expression has type "ndarray[Any, dtype[signedinteger[_32Bit]]]",
92+
# variable has type "ndarray[Any, dtype[signedinteger[_64Bit]]]
93+
result = get_timedelta_field(values, alias, reso=self._creso) # type: ignore[assignment] # noqa: E501
8594
if self._hasna:
8695
result = self._maybe_mask_results(
8796
result, fill_value=None, convert="float64"

pandas/tests/indexes/timedeltas/test_timedelta.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def test_pass_TimedeltaIndex_to_index(self):
6767

6868
def test_fields(self):
6969
rng = timedelta_range("1 days, 10:11:12.100123456", periods=2, freq="s")
70-
tm.assert_index_equal(rng.days, Index([1, 1], dtype=np.int32))
70+
tm.assert_index_equal(rng.days, Index([1, 1], dtype=np.int64))
7171
tm.assert_index_equal(
7272
rng.seconds,
7373
Index([10 * 3600 + 11 * 60 + 12, 10 * 3600 + 11 * 60 + 13], dtype=np.int32),

pandas/tests/series/accessors/test_dt_accessor.py

+20
Original file line numberDiff line numberDiff line change
@@ -796,3 +796,23 @@ def test_normalize_pre_epoch_dates():
796796
result = ser.dt.normalize()
797797
expected = pd.to_datetime(Series(["1969-01-01", "2016-01-01"]))
798798
tm.assert_series_equal(result, expected)
799+
800+
801+
def test_day_attribute_non_nano_beyond_int32():
802+
# GH 52386
803+
data = np.array(
804+
[
805+
136457654736252,
806+
134736784364431,
807+
245345345545332,
808+
223432411,
809+
2343241,
810+
3634548734,
811+
23234,
812+
],
813+
dtype="timedelta64[s]",
814+
)
815+
ser = Series(data)
816+
result = ser.dt.days
817+
expected = Series([1579371003, 1559453522, 2839645203, 2586, 27, 42066, 0])
818+
tm.assert_series_equal(result, expected)

pandas/tests/tslibs/test_fields.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,6 @@ def test_get_start_end_field_readonly(dtindex):
3535

3636
def test_get_timedelta_field_readonly(dtindex):
3737
# treat dtindex as timedeltas for this next one
38-
result = fields.get_timedelta_field(dtindex, "days")
39-
expected = np.arange(5, dtype=np.int32) * 32
38+
result = fields.get_timedelta_field(dtindex, "seconds")
39+
expected = np.array([0] * 5, dtype=np.int32)
4040
tm.assert_numpy_array_equal(result, expected)

0 commit comments

Comments
 (0)