diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index a9b4ad2e5374a..1e2747d3bc463 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -374,6 +374,7 @@ Datetimelike - Bug in :class:`DatetimeIndex.shift` incorrectly raising when shifting empty indexes (:issue:`14811`) - :class:`Timestamp` and :class:`DatetimeIndex` comparisons between timezone-aware and timezone-naive objects now follow the standard library ``datetime`` behavior, returning ``True``/``False`` for ``!=``/``==`` and raising for inequality comparisons (:issue:`28507`) - Bug in :meth:`DatetimeIndex.equals` and :meth:`TimedeltaIndex.equals` incorrectly considering ``int64`` indexes as equal (:issue:`36744`) +- Bug in :meth:`TimedeltaIndex.sum` and :meth:`Series.sum` with ``timedelta64`` dtype on an empty index or series returning ``NaT`` instead of ``Timedelta(0)`` (:issue:`31751`) Timedelta ^^^^^^^^^ diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 64e5f78d961d1..17fcd00b7b251 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -381,14 +381,12 @@ def sum( nv.validate_sum( (), dict(dtype=dtype, out=out, keepdims=keepdims, initial=initial) ) - if not self.size and (self.ndim == 1 or axis is None): - return NaT result = nanops.nansum( - self._data, axis=axis, skipna=skipna, min_count=min_count + self._ndarray, axis=axis, skipna=skipna, min_count=min_count ) - if is_scalar(result): - return Timedelta(result) + if axis is None or self.ndim == 1: + return self._box_func(result) return self._from_backing_data(result) def std( @@ -403,13 +401,11 @@ def std( nv.validate_stat_ddof_func( (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="std" ) - if not len(self): - return NaT - if not skipna and self._hasnans: - return NaT - result = nanops.nanstd(self._data, axis=axis, skipna=skipna, ddof=ddof) - return Timedelta(result) + result = nanops.nanstd(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) + if axis is None or self.ndim == 1: + return self._box_func(result) + return self._from_backing_data(result) # ---------------------------------------------------------------- # Rendering Methods diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 83399a87e5667..b101da196fdd8 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -228,7 +228,7 @@ def _maybe_get_mask( # Boolean data cannot contain nulls, so signal via mask being None return None - if skipna: + if skipna or needs_i8_conversion(values.dtype): mask = isna(values) return mask diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index 976f5d0c90f19..f9584e29d47bc 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -2,6 +2,7 @@ import pytest import pandas as pd +from pandas import Timedelta import pandas._testing as tm from pandas.core.arrays import TimedeltaArray @@ -175,7 +176,7 @@ def test_neg_freq(self): class TestReductions: - @pytest.mark.parametrize("name", ["sum", "std", "min", "max", "median"]) + @pytest.mark.parametrize("name", ["std", "min", "max", "median"]) @pytest.mark.parametrize("skipna", [True, False]) def test_reductions_empty(self, name, skipna): tdi = pd.TimedeltaIndex([]) @@ -187,6 +188,19 @@ def test_reductions_empty(self, name, skipna): result = getattr(arr, name)(skipna=skipna) assert result is pd.NaT + @pytest.mark.parametrize("skipna", [True, False]) + def test_sum_empty(self, skipna): + tdi = pd.TimedeltaIndex([]) + arr = tdi.array + + result = tdi.sum(skipna=skipna) + assert isinstance(result, Timedelta) + assert result == Timedelta(0) + + result = arr.sum(skipna=skipna) + assert isinstance(result, Timedelta) + assert result == Timedelta(0) + def test_min_max(self): arr = TimedeltaArray._from_sequence(["3H", "3H", "NaT", "2H", "5H", "4H"]) diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py index 0c946b2cbccc9..0e8bf8f052206 100644 --- a/pandas/tests/series/test_reductions.py +++ b/pandas/tests/series/test_reductions.py @@ -15,6 +15,16 @@ def test_reductions_td64_with_nat(): assert ser.max() == exp +@pytest.mark.parametrize("skipna", [True, False]) +def test_td64_sum_empty(skipna): + # GH#37151 + ser = Series([], dtype="timedelta64[ns]") + + result = ser.sum(skipna=skipna) + assert isinstance(result, pd.Timedelta) + assert result == pd.Timedelta(0) + + def test_td64_summation_overflow(): # GH#9442 ser = Series(pd.date_range("20130101", periods=100000, freq="H"))