Skip to content

Commit 926485a

Browse files
mroeschkemeeseeksmachine
authored andcommitted
Backport PR pandas-dev#52391: BUG: dt.days for timedelta non-nano overflows int32
1 parent bbcc0ec commit 926485a

File tree

8 files changed

+69
-18
lines changed

8 files changed

+69
-18
lines changed

asv_bench/benchmarks/tslibs/fields.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
class TimeGetTimedeltaField:
1313
params = [
1414
_sizes,
15-
["days", "seconds", "microseconds", "nanoseconds"],
15+
["seconds", "microseconds", "nanoseconds"],
1616
]
1717
param_names = ["size", "field"]
1818

doc/source/whatsnew/v2.0.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Fixed regressions
2424

2525
Bug fixes
2626
~~~~~~~~~
27+
- Bug in :attr:`Series.dt.days` that would overflow ``int32`` number of days (:issue:`52391`)
2728
- Bug in :class:`arrays.DatetimeArray` constructor returning an incorrect unit when passed a non-nanosecond numpy datetime array (:issue:`52555`)
2829
- Bug in :func:`pandas.testing.assert_series_equal` where ``check_dtype=False`` would still raise for datetime or timedelta types with different resolutions (:issue:`52449`)
2930
- Bug in :func:`read_csv` casting PyArrow datetimes to NumPy when ``dtype_backend="pyarrow"`` and ``parse_dates`` is set causing a performance bottleneck in the process (:issue:`52546`)

pandas/_libs/tslibs/fields.pyi

+4
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ def get_timedelta_field(
3030
field: str,
3131
reso: int = ..., # NPY_DATETIMEUNIT
3232
) -> npt.NDArray[np.int32]: ...
33+
def get_timedelta_days(
34+
tdindex: npt.NDArray[np.int64], # const int64_t[:]
35+
reso: int = ..., # NPY_DATETIMEUNIT
36+
) -> npt.NDArray[np.int64]: ...
3337
def isleapyear_arr(
3438
years: np.ndarray,
3539
) -> npt.NDArray[np.bool_]: ...

pandas/_libs/tslibs/fields.pyx

+29-12
Original file line numberDiff line numberDiff line change
@@ -508,18 +508,7 @@ def get_timedelta_field(
508508

509509
out = np.empty(count, dtype="i4")
510510

511-
if field == "days":
512-
with nogil:
513-
for i in range(count):
514-
if tdindex[i] == NPY_NAT:
515-
out[i] = -1
516-
continue
517-
518-
pandas_timedelta_to_timedeltastruct(tdindex[i], reso, &tds)
519-
out[i] = tds.days
520-
return out
521-
522-
elif field == "seconds":
511+
if field == "seconds":
523512
with nogil:
524513
for i in range(count):
525514
if tdindex[i] == NPY_NAT:
@@ -555,6 +544,34 @@ def get_timedelta_field(
555544
raise ValueError(f"Field {field} not supported")
556545

557546

547+
@cython.wraparound(False)
548+
@cython.boundscheck(False)
549+
def get_timedelta_days(
550+
const int64_t[:] tdindex,
551+
NPY_DATETIMEUNIT reso=NPY_FR_ns,
552+
):
553+
"""
554+
Given a int64-based timedelta index, extract the days,
555+
field and return an array of these values.
556+
"""
557+
cdef:
558+
Py_ssize_t i, count = len(tdindex)
559+
ndarray[int64_t] out
560+
pandas_timedeltastruct tds
561+
562+
out = np.empty(count, dtype="i8")
563+
564+
with nogil:
565+
for i in range(count):
566+
if tdindex[i] == NPY_NAT:
567+
out[i] = -1
568+
continue
569+
570+
pandas_timedelta_to_timedeltastruct(tdindex[i], reso, &tds)
571+
out[i] = tds.days
572+
return out
573+
574+
558575
cpdef isleapyear_arr(ndarray years):
559576
"""vectorized version of isleapyear; NaT evaluates as False"""
560577
cdef:

pandas/core/arrays/timedeltas.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,10 @@
3131
to_offset,
3232
)
3333
from pandas._libs.tslibs.conversion import precision_from_unit
34-
from pandas._libs.tslibs.fields import get_timedelta_field
34+
from pandas._libs.tslibs.fields import (
35+
get_timedelta_days,
36+
get_timedelta_field,
37+
)
3538
from pandas._libs.tslibs.timedeltas import (
3639
array_to_timedelta64,
3740
floordiv_object_array,
@@ -78,7 +81,13 @@
7881
def _field_accessor(name: str, alias: str, docstring: str):
7982
def f(self) -> np.ndarray:
8083
values = self.asi8
81-
result = get_timedelta_field(values, alias, reso=self._creso)
84+
if alias == "days":
85+
result = get_timedelta_days(values, reso=self._creso)
86+
else:
87+
# error: Incompatible types in assignment (
88+
# expression has type "ndarray[Any, dtype[signedinteger[_32Bit]]]",
89+
# variable has type "ndarray[Any, dtype[signedinteger[_64Bit]]]
90+
result = get_timedelta_field(values, alias, reso=self._creso) # type: ignore[assignment] # noqa: E501
8291
if self._hasna:
8392
result = self._maybe_mask_results(
8493
result, fill_value=None, convert="float64"

pandas/tests/indexes/timedeltas/test_timedelta.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def test_pass_TimedeltaIndex_to_index(self):
6767

6868
def test_fields(self):
6969
rng = timedelta_range("1 days, 10:11:12.100123456", periods=2, freq="s")
70-
tm.assert_index_equal(rng.days, Index([1, 1], dtype=np.int32))
70+
tm.assert_index_equal(rng.days, Index([1, 1], dtype=np.int64))
7171
tm.assert_index_equal(
7272
rng.seconds,
7373
Index([10 * 3600 + 11 * 60 + 12, 10 * 3600 + 11 * 60 + 13], dtype=np.int32),

pandas/tests/series/accessors/test_dt_accessor.py

+20
Original file line numberDiff line numberDiff line change
@@ -792,3 +792,23 @@ def test_normalize_pre_epoch_dates():
792792
result = ser.dt.normalize()
793793
expected = pd.to_datetime(Series(["1969-01-01", "2016-01-01"]))
794794
tm.assert_series_equal(result, expected)
795+
796+
797+
def test_day_attribute_non_nano_beyond_int32():
798+
# GH 52386
799+
data = np.array(
800+
[
801+
136457654736252,
802+
134736784364431,
803+
245345345545332,
804+
223432411,
805+
2343241,
806+
3634548734,
807+
23234,
808+
],
809+
dtype="timedelta64[s]",
810+
)
811+
ser = Series(data)
812+
result = ser.dt.days
813+
expected = Series([1579371003, 1559453522, 2839645203, 2586, 27, 42066, 0])
814+
tm.assert_series_equal(result, expected)

pandas/tests/tslibs/test_fields.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,6 @@ def test_get_start_end_field_readonly(dtindex):
3535

3636
def test_get_timedelta_field_readonly(dtindex):
3737
# treat dtindex as timedeltas for this next one
38-
result = fields.get_timedelta_field(dtindex, "days")
39-
expected = np.arange(5, dtype=np.int32) * 32
38+
result = fields.get_timedelta_field(dtindex, "seconds")
39+
expected = np.array([0] * 5, dtype=np.int32)
4040
tm.assert_numpy_array_equal(result, expected)

0 commit comments

Comments
 (0)