From 4cb07d694ea0cdb312fb8c9335d3371673e28d36 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 21 Dec 2017 06:55:36 -0600 Subject: [PATCH] BUG: Fixed timedelta numeric operations Closes https://github.com/pandas-dev/pandas/issues/18880 --- doc/source/whatsnew/v0.22.0.txt | 3 ++- pandas/core/nanops.py | 10 ++++++-- pandas/tests/series/test_analytics.py | 33 +++++++++++++++++++++++++++ pandas/tests/test_nanops.py | 13 +++++++++++ 4 files changed, 56 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 0579a80aad28e..d5d7b85d669e6 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -338,7 +338,8 @@ Reshaping Numeric ^^^^^^^ -- +- Fixed ``std`` and ``var`` computations for timedelta arrays not returning results in timedelta units (:issue:`18880`) +- Fixed ``skipna`` handling for some operations like ``sum`` on timedelta arrays (:issue:`18880`) - - diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index e1c09947ac0b4..61e94f0e7190b 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -239,6 +239,9 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None, elif copy: values = values.copy() + if is_timedelta64_dtype(values) and not skipna: + values = values.astype('float64') + values[mask] = np.nan values = _view_if_needed(values) # return a platform independent precision dtype @@ -406,7 +409,10 @@ def _get_counts_nanvar(mask, axis, ddof, dtype=float): @disallow('M8') @bottleneck_switch(ddof=1) def nanstd(values, axis=None, skipna=True, ddof=1): - result = np.sqrt(nanvar(values, axis=axis, skipna=skipna, ddof=ddof)) + var_ = nanvar(values, axis=axis, skipna=skipna, ddof=ddof) + if is_timedelta64_dtype(values): + var_ = var_.value + result = np.sqrt(var_) return _wrap_results(result, values.dtype) @@ -448,7 +454,7 @@ def nanvar(values, axis=None, skipna=True, ddof=1): # precision as the original values array. if is_float_dtype(dtype): result = result.astype(dtype) - return _wrap_results(result, values.dtype) + return _wrap_results(result, dtype) @disallow('M8', 'm8') diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 203a0b4a54858..1aec3f1664416 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1215,6 +1215,39 @@ def test_timedelta64_analytics(self): expected = Timedelta('1 days') assert result == expected + def test_timedelta64_sum(self): + # https://github.com/pandas-dev/pandas/issues/18880 + s = pd.Series(pd.timedelta_range(0, periods=12, freq='S')) + s[0] = np.nan + + result = s.sum(skipna=False) + assert result is pd.NaT + + result = s.sum() + assert result == pd.Timedelta(minutes=1, seconds=6) + + @pytest.mark.parametrize('method', [ + 'sum', 'mean', 'min', 'max', 'median', + 'std', 'var', + ]) + def test_timedelta64_many(self, method): + s_float = pd.Series(np.arange(12) * 1e3) + s_timed = pd.Series(pd.timedelta_range(0, periods=12, freq='us')) + + expected = pd.Timedelta(getattr(s_float, method)()) + result = getattr(s_timed, method)() + if pd.isna(expected): + assert pd.isna(result) + else: + assert result == expected + + s_float[0] = np.nan + s_timed[0] = np.nan + result = getattr(s_timed, method)(skipna=False) + expected = getattr(s_float, method)(skipna=False) + assert pd.isna(result) + assert pd.isna(expected) + def test_idxmin(self): # test idxmin # _check_stat_op approach can not be used here because of isna check. diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 9305504f8d5e3..e0006a9fec9cf 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -888,6 +888,19 @@ def test_nanstd_roundoff(self): result = data.std(ddof=ddof) assert result == 0.0 + def test_nanvar_timedelta(self): + result = pd.Series(dtype='m8[ns]').var() + assert result is pd.NaT + + result = pd.Series([1, 1], dtype='m8[ns]').var() + assert result == pd.Timedelta(0) + + result = pd.Series([10, 20], dtype='m8[ns]').var() + assert result == pd.Timedelta(50) + + result = pd.Series([np.nan, 10, 20, np.nan], dtype='m8[ns]').var() + assert result == pd.Timedelta(50) + @property def prng(self): return np.random.RandomState(1234)