Skip to content

Commit 5dc9afb

Browse files
jbrockmendelproost
authored andcommitted
ENH: implement TimedeltaArray/TimedeltaIIndex sum, median, std (pandas-dev#28165)
1 parent 9f58fc0 commit 5dc9afb

File tree

3 files changed

+155
-5
lines changed

3 files changed

+155
-5
lines changed

pandas/core/arrays/timedeltas.py

+58
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
precision_from_unit,
1515
)
1616
import pandas.compat as compat
17+
from pandas.compat.numpy import function as nv
1718
from pandas.util._decorators import Appender
1819

1920
from pandas.core.dtypes.common import (
@@ -41,6 +42,7 @@
4142
)
4243
from pandas.core.dtypes.missing import isna
4344

45+
from pandas.core import nanops
4446
from pandas.core.algorithms import checked_add_with_arr
4547
import pandas.core.common as com
4648
from pandas.core.ops.invalid import invalid_comparison
@@ -384,6 +386,62 @@ def astype(self, dtype, copy=True):
384386
return self
385387
return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy=copy)
386388

389+
def sum(
390+
self,
391+
axis=None,
392+
dtype=None,
393+
out=None,
394+
keepdims: bool = False,
395+
initial=None,
396+
skipna: bool = True,
397+
min_count: int = 0,
398+
):
399+
nv.validate_sum(
400+
(), dict(dtype=dtype, out=out, keepdims=keepdims, initial=initial)
401+
)
402+
if not len(self):
403+
return NaT
404+
if not skipna and self._hasnans:
405+
return NaT
406+
407+
result = nanops.nansum(
408+
self._data, axis=axis, skipna=skipna, min_count=min_count
409+
)
410+
return Timedelta(result)
411+
412+
def std(
413+
self,
414+
axis=None,
415+
dtype=None,
416+
out=None,
417+
ddof: int = 1,
418+
keepdims: bool = False,
419+
skipna: bool = True,
420+
):
421+
nv.validate_stat_ddof_func(
422+
(), dict(dtype=dtype, out=out, keepdims=keepdims), fname="std"
423+
)
424+
if not len(self):
425+
return NaT
426+
if not skipna and self._hasnans:
427+
return NaT
428+
429+
result = nanops.nanstd(self._data, axis=axis, skipna=skipna, ddof=ddof)
430+
return Timedelta(result)
431+
432+
def median(
433+
self,
434+
axis=None,
435+
out=None,
436+
overwrite_input: bool = False,
437+
keepdims: bool = False,
438+
skipna: bool = True,
439+
):
440+
nv.validate_median(
441+
(), dict(out=out, overwrite_input=overwrite_input, keepdims=keepdims)
442+
)
443+
return nanops.nanmedian(self._data, axis=axis, skipna=skipna)
444+
387445
# ----------------------------------------------------------------
388446
# Rendering Methods
389447

pandas/core/indexes/timedeltas.py

+4
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from pandas.core.indexes.datetimelike import (
3131
DatetimeIndexOpsMixin,
3232
DatetimelikeDelegateMixin,
33+
ea_passthrough,
3334
)
3435
from pandas.core.indexes.numeric import Int64Index
3536
from pandas.core.ops import get_op_result_name
@@ -173,6 +174,9 @@ def _join_i8_wrapper(joinf, **kwargs):
173174
_datetimelike_ops = TimedeltaArray._datetimelike_ops
174175
_datetimelike_methods = TimedeltaArray._datetimelike_methods
175176
_other_ops = TimedeltaArray._other_ops
177+
sum = ea_passthrough(TimedeltaArray.sum)
178+
std = ea_passthrough(TimedeltaArray.std)
179+
median = ea_passthrough(TimedeltaArray.median)
176180

177181
# -------------------------------------------------------------------
178182
# Constructors

pandas/tests/arrays/test_timedeltas.py

+93-5
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,18 @@ def test_setitem_objects(self, obj):
143143

144144

145145
class TestReductions:
146+
@pytest.mark.parametrize("name", ["sum", "std", "min", "max", "median"])
147+
@pytest.mark.parametrize("skipna", [True, False])
148+
def test_reductions_empty(self, name, skipna):
149+
tdi = pd.TimedeltaIndex([])
150+
arr = tdi.array
151+
152+
result = getattr(tdi, name)(skipna=skipna)
153+
assert result is pd.NaT
154+
155+
result = getattr(arr, name)(skipna=skipna)
156+
assert result is pd.NaT
157+
146158
def test_min_max(self):
147159
arr = TimedeltaArray._from_sequence(["3H", "3H", "NaT", "2H", "5H", "4H"])
148160

@@ -160,11 +172,87 @@ def test_min_max(self):
160172
result = arr.max(skipna=False)
161173
assert result is pd.NaT
162174

163-
@pytest.mark.parametrize("skipna", [True, False])
164-
def test_min_max_empty(self, skipna):
165-
arr = TimedeltaArray._from_sequence([])
166-
result = arr.min(skipna=skipna)
175+
def test_sum(self):
176+
tdi = pd.TimedeltaIndex(["3H", "3H", "NaT", "2H", "5H", "4H"])
177+
arr = tdi.array
178+
179+
result = arr.sum(skipna=True)
180+
expected = pd.Timedelta(hours=17)
181+
assert isinstance(result, pd.Timedelta)
182+
assert result == expected
183+
184+
result = tdi.sum(skipna=True)
185+
assert isinstance(result, pd.Timedelta)
186+
assert result == expected
187+
188+
result = arr.sum(skipna=False)
189+
assert result is pd.NaT
190+
191+
result = tdi.sum(skipna=False)
192+
assert result is pd.NaT
193+
194+
result = arr.sum(min_count=9)
195+
assert result is pd.NaT
196+
197+
result = tdi.sum(min_count=9)
198+
assert result is pd.NaT
199+
200+
result = arr.sum(min_count=1)
201+
assert isinstance(result, pd.Timedelta)
202+
assert result == expected
203+
204+
result = tdi.sum(min_count=1)
205+
assert isinstance(result, pd.Timedelta)
206+
assert result == expected
207+
208+
def test_npsum(self):
209+
# GH#25335 np.sum should return a Timedelta, not timedelta64
210+
tdi = pd.TimedeltaIndex(["3H", "3H", "2H", "5H", "4H"])
211+
arr = tdi.array
212+
213+
result = np.sum(tdi)
214+
expected = pd.Timedelta(hours=17)
215+
assert isinstance(result, pd.Timedelta)
216+
assert result == expected
217+
218+
result = np.sum(arr)
219+
assert isinstance(result, pd.Timedelta)
220+
assert result == expected
221+
222+
def test_std(self):
223+
tdi = pd.TimedeltaIndex(["0H", "4H", "NaT", "4H", "0H", "2H"])
224+
arr = tdi.array
225+
226+
result = arr.std(skipna=True)
227+
expected = pd.Timedelta(hours=2)
228+
assert isinstance(result, pd.Timedelta)
229+
assert result == expected
230+
231+
result = tdi.std(skipna=True)
232+
assert isinstance(result, pd.Timedelta)
233+
assert result == expected
234+
235+
result = arr.std(skipna=False)
236+
assert result is pd.NaT
237+
238+
result = tdi.std(skipna=False)
239+
assert result is pd.NaT
240+
241+
def test_median(self):
242+
tdi = pd.TimedeltaIndex(["0H", "3H", "NaT", "5H06m", "0H", "2H"])
243+
arr = tdi.array
244+
245+
result = arr.median(skipna=True)
246+
expected = pd.Timedelta(hours=2)
247+
assert isinstance(result, pd.Timedelta)
248+
assert result == expected
249+
250+
result = tdi.median(skipna=True)
251+
assert isinstance(result, pd.Timedelta)
252+
assert result == expected
253+
254+
result = arr.std(skipna=False)
167255
assert result is pd.NaT
168256

169-
result = arr.max(skipna=skipna)
257+
result = tdi.std(skipna=False)
170258
assert result is pd.NaT

0 commit comments

Comments
 (0)