Skip to content

Commit 72a051c

Browse files
committed
Merge pull request #8476 from jreback/td_std
BUG: allow std to work with timedeltas (GH8471)
2 parents d22b382 + d27e37a commit 72a051c

File tree

5 files changed

+79
-70
lines changed

5 files changed

+79
-70
lines changed

doc/source/v0.15.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -638,7 +638,7 @@ TimedeltaIndex/Scalar
638638
We introduce a new scalar type ``Timedelta``, which is a subclass of ``datetime.timedelta``, and behaves in a similar manner,
639639
but allows compatibility with ``np.timedelta64`` types as well as a host of custom representation, parsing, and attributes.
640640
This type is very similar to how ``Timestamp`` works for ``datetimes``. It is a nice-API box for the type. See the :ref:`docs <timedeltas.timedeltas>`.
641-
(:issue:`3009`, :issue:`4533`, :issue:`8209`, :issue:`8187`, :issue:`8190`, :issue:`7869`, :issue:`7661`, :issue:`8345`)
641+
(:issue:`3009`, :issue:`4533`, :issue:`8209`, :issue:`8187`, :issue:`8190`, :issue:`7869`, :issue:`7661`, :issue:`8345`, :issue:`8471`)
642642

643643
.. warning::
644644

pandas/core/generic.py

+34-52
Original file line numberDiff line numberDiff line change
@@ -3950,60 +3950,42 @@ def mad(self, axis=None, skipna=None, level=None, **kwargs):
39503950
return np.abs(demeaned).mean(axis=axis, skipna=skipna)
39513951
cls.mad = mad
39523952

3953-
@Substitution(outname='variance',
3954-
desc="Return unbiased variance over requested "
3955-
"axis.\n\nNormalized by N-1 by default. "
3956-
"This can be changed using the ddof argument")
3957-
@Appender(_num_doc)
3958-
def var(self, axis=None, skipna=None, level=None, ddof=1, **kwargs):
3959-
if skipna is None:
3960-
skipna = True
3961-
if axis is None:
3962-
axis = self._stat_axis_number
3963-
if level is not None:
3964-
return self._agg_by_level('var', axis=axis, level=level,
3965-
skipna=skipna, ddof=ddof)
3953+
def _make_stat_function_ddof(name, desc, f):
39663954

3967-
return self._reduce(nanops.nanvar, axis=axis, skipna=skipna,
3968-
ddof=ddof)
3969-
cls.var = var
3970-
3971-
@Substitution(outname='stdev',
3972-
desc="Return unbiased standard deviation over requested "
3973-
"axis.\n\nNormalized by N-1 by default. "
3974-
"This can be changed using the ddof argument")
3975-
@Appender(_num_doc)
3976-
def std(self, axis=None, skipna=None, level=None, ddof=1, **kwargs):
3977-
if skipna is None:
3978-
skipna = True
3979-
if axis is None:
3980-
axis = self._stat_axis_number
3981-
if level is not None:
3982-
return self._agg_by_level('std', axis=axis, level=level,
3983-
skipna=skipna, ddof=ddof)
3984-
result = self.var(axis=axis, skipna=skipna, ddof=ddof)
3985-
if getattr(result, 'ndim', 0) > 0:
3986-
return result.apply(np.sqrt)
3987-
return np.sqrt(result)
3988-
cls.std = std
3989-
3990-
@Substitution(outname='standarderror',
3991-
desc="Return unbiased standard error of the mean over "
3992-
"requested axis.\n\nNormalized by N-1 by default. "
3993-
"This can be changed using the ddof argument")
3994-
@Appender(_num_doc)
3995-
def sem(self, axis=None, skipna=None, level=None, ddof=1, **kwargs):
3996-
if skipna is None:
3997-
skipna = True
3998-
if axis is None:
3999-
axis = self._stat_axis_number
4000-
if level is not None:
4001-
return self._agg_by_level('sem', axis=axis, level=level,
4002-
skipna=skipna, ddof=ddof)
3955+
@Substitution(outname=name, desc=desc)
3956+
@Appender(_num_doc)
3957+
def stat_func(self, axis=None, skipna=None, level=None, ddof=1,
3958+
**kwargs):
3959+
if skipna is None:
3960+
skipna = True
3961+
if axis is None:
3962+
axis = self._stat_axis_number
3963+
if level is not None:
3964+
return self._agg_by_level(name, axis=axis, level=level,
3965+
skipna=skipna, ddof=ddof)
3966+
return self._reduce(f, axis=axis,
3967+
skipna=skipna, ddof=ddof)
3968+
stat_func.__name__ = name
3969+
return stat_func
40033970

4004-
return self._reduce(nanops.nansem, axis=axis, skipna=skipna,
4005-
ddof=ddof)
4006-
cls.sem = sem
3971+
cls.sem = _make_stat_function_ddof(
3972+
'sem',
3973+
"Return unbiased standard error of the mean over "
3974+
"requested axis.\n\nNormalized by N-1 by default. "
3975+
"This can be changed using the ddof argument",
3976+
nanops.nansem)
3977+
cls.var = _make_stat_function_ddof(
3978+
'var',
3979+
"Return unbiased variance over requested "
3980+
"axis.\n\nNormalized by N-1 by default. "
3981+
"This can be changed using the ddof argument",
3982+
nanops.nanvar)
3983+
cls.std = _make_stat_function_ddof(
3984+
'std',
3985+
"Return unbiased standard deviation over requested "
3986+
"axis.\n\nNormalized by N-1 by default. "
3987+
"This can be changed using the ddof argument",
3988+
nanops.nanstd)
40073989

40083990
@Substitution(outname='compounded',
40093991
desc="Return the compound percentage of the values for "

pandas/core/nanops.py

+23-13
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ def _wrap_results(result, dtype):
228228
if not isinstance(result, np.ndarray):
229229
result = lib.Timedelta(result)
230230
else:
231-
result = result.view(dtype)
231+
result = result.astype('i8').view(dtype)
232232

233233
return result
234234

@@ -295,7 +295,7 @@ def get_median(x):
295295
if values.ndim > 1:
296296
# there's a non-empty array to apply over otherwise numpy raises
297297
if notempty:
298-
return np.apply_along_axis(get_median, axis, values)
298+
return _wrap_results(np.apply_along_axis(get_median, axis, values), dtype)
299299

300300
# must return the correct shape, but median is not defined for the
301301
# empty set so return nans of shape "everything but the passed axis"
@@ -305,7 +305,7 @@ def get_median(x):
305305
dims = np.arange(values.ndim)
306306
ret = np.empty(shp[dims != axis])
307307
ret.fill(np.nan)
308-
return ret
308+
return _wrap_results(ret, dtype)
309309

310310
# otherwise return a scalar value
311311
return _wrap_results(get_median(values) if notempty else np.nan, dtype)
@@ -329,15 +329,8 @@ def _get_counts_nanvar(mask, axis, ddof):
329329
return count, d
330330

331331

332-
@disallow('M8','m8')
333-
@bottleneck_switch(ddof=1)
334-
def nanvar(values, axis=None, skipna=True, ddof=1):
335-
336-
# we are going to allow timedelta64[ns] here
337-
# but NOT going to coerce them to the Timedelta type
338-
# as this could cause overflow
339-
# so var cannot be computed (but std can!)
340-
332+
def _nanvar(values, axis=None, skipna=True, ddof=1):
333+
# private nanvar calculator
341334
mask = isnull(values)
342335
if not _is_floating_dtype(values):
343336
values = values.astype('f8')
@@ -352,6 +345,23 @@ def nanvar(values, axis=None, skipna=True, ddof=1):
352345
XX = _ensure_numeric((values ** 2).sum(axis))
353346
return np.fabs((XX - X ** 2 / count) / d)
354347

348+
@disallow('M8')
349+
@bottleneck_switch(ddof=1)
350+
def nanstd(values, axis=None, skipna=True, ddof=1):
351+
352+
result = np.sqrt(_nanvar(values, axis=axis, skipna=skipna, ddof=ddof))
353+
return _wrap_results(result, values.dtype)
354+
355+
@disallow('M8','m8')
356+
@bottleneck_switch(ddof=1)
357+
def nanvar(values, axis=None, skipna=True, ddof=1):
358+
359+
# we are going to allow timedelta64[ns] here
360+
# but NOT going to coerce them to the Timedelta type
361+
# as this could cause overflow
362+
# so var cannot be computed (but std can!)
363+
return _nanvar(values, axis=axis, skipna=skipna, ddof=ddof)
364+
355365
@disallow('M8','m8')
356366
def nansem(values, axis=None, skipna=True, ddof=1):
357367
var = nanvar(values, axis, skipna, ddof=ddof)
@@ -517,7 +527,7 @@ def nankurt(values, axis=None, skipna=True):
517527
return result
518528

519529

520-
@disallow('M8')
530+
@disallow('M8','m8')
521531
def nanprod(values, axis=None, skipna=True):
522532
mask = isnull(values)
523533
if skipna and not _is_any_int_dtype(values):

pandas/tests/test_nanops.py

+4
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,10 @@ def test_nanvar(self):
332332
self.check_funs_ddof(nanops.nanvar, np.var,
333333
allow_complex=False, allow_date=False, allow_tdelta=False)
334334

335+
def test_nanstd(self):
336+
self.check_funs_ddof(nanops.nanstd, np.std,
337+
allow_complex=False, allow_date=False, allow_tdelta=True)
338+
335339
def test_nansem(self):
336340
tm.skip_if_no_package('scipy.stats')
337341
self.check_funs_ddof(nanops.nansem, np.var,

pandas/tseries/tests/test_timedeltas.py

+17-4
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,9 @@ def test_timedelta_ops(self):
479479
expected = to_timedelta(timedelta(seconds=9))
480480
self.assertEqual(result, expected)
481481

482+
result = td.to_frame().mean()
483+
self.assertEqual(result[0], expected)
484+
482485
result = td.quantile(.1)
483486
expected = Timedelta(np.timedelta64(2600,'ms'))
484487
self.assertEqual(result, expected)
@@ -487,18 +490,28 @@ def test_timedelta_ops(self):
487490
expected = to_timedelta('00:00:08')
488491
self.assertEqual(result, expected)
489492

493+
result = td.to_frame().median()
494+
self.assertEqual(result[0], expected)
495+
490496
# GH 6462
491497
# consistency in returned values for sum
492498
result = td.sum()
493499
expected = to_timedelta('00:01:21')
494500
self.assertEqual(result, expected)
495501

496-
# you can technically do a std, but var overflows
497-
# so this is tricky
498-
self.assertRaises(TypeError, lambda : td.std())
502+
result = td.to_frame().sum()
503+
self.assertEqual(result[0], expected)
504+
505+
# std
506+
result = td.std()
507+
expected = to_timedelta(Series(td.dropna().values).std())
508+
self.assertEqual(result, expected)
509+
510+
result = td.to_frame().std()
511+
self.assertEqual(result[0], expected)
499512

500513
# invalid ops
501-
for op in ['skew','kurt','sem','var']:
514+
for op in ['skew','kurt','sem','var','prod']:
502515
self.assertRaises(TypeError, lambda : getattr(td,op)())
503516

504517
def test_timedelta_ops_scalar(self):

0 commit comments

Comments
 (0)