diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 812af544ed9d8..83614d7a9628b 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -225,6 +225,7 @@ Other enhancements - :class:`DataFrame` now supports ``divmod`` operation (:issue:`37165`) - :meth:`DataFrame.to_parquet` now returns a ``bytes`` object when no ``path`` argument is passed (:issue:`37105`) - :class:`Rolling` now supports the ``closed`` argument for fixed windows (:issue:`34315`) +- :class:`DatetimeIndex` and :class:`Series` with ``datetime64`` or ``datetime64tz`` dtypes now support ``std`` (:issue:`37436`) .. _whatsnew_120.api_breaking.python: diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 751290d5af9ae..a1050f4271e05 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1862,6 +1862,28 @@ def to_julian_date(self): / 24.0 ) + # ----------------------------------------------------------------- + # Reductions + + def std( + self, + axis=None, + dtype=None, + out=None, + ddof: int = 1, + keepdims: bool = False, + skipna: bool = True, + ): + # Because std is translation-invariant, we can get self.std + # by calculating (self - Timestamp(0)).std, and we can do it + # without creating a copy by using a view on self._ndarray + from pandas.core.arrays import TimedeltaArray + + tda = TimedeltaArray(self._ndarray.view("i8")) + return tda.std( + axis=axis, dtype=dtype, out=out, ddof=ddof, keepdims=keepdims, skipna=skipna + ) + # ------------------------------------------------------------------- # Constructor Helpers diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 005272750997e..aa16dc9752565 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -99,6 +99,7 @@ def _new_DatetimeIndex(cls, d): "date", "time", "timetz", + "std", ] + DatetimeArray._bool_ops, DatetimeArray, @@ -201,6 +202,7 @@ class DatetimeIndex(DatetimeTimedeltaMixin): month_name day_name mean + std See Also -------- diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 3ab08ab2cda56..46ff4a0e2f612 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -756,7 +756,6 @@ def _get_counts_nanvar( return count, d -@disallow("M8") @bottleneck_switch(ddof=1) def nanstd(values, axis=None, skipna=True, ddof=1, mask=None): """ @@ -786,6 +785,9 @@ def nanstd(values, axis=None, skipna=True, ddof=1, mask=None): >>> nanops.nanstd(s) 1.0 """ + if values.dtype == "M8[ns]": + values = values.view("m8[ns]") + orig_dtype = values.dtype values, mask, _, _, _ = _get_values(values, skipna, mask=mask) diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index 95265a958c35d..47ebfe311d9ea 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -290,8 +290,18 @@ def test_sum_2d_skipna_false(self): )._values tm.assert_timedelta_array_equal(result, expected) - def test_std(self): - tdi = pd.TimedeltaIndex(["0H", "4H", "NaT", "4H", "0H", "2H"]) + # Adding a Timestamp makes this a test for DatetimeArray.std + @pytest.mark.parametrize( + "add", + [ + pd.Timedelta(0), + pd.Timestamp.now(), + pd.Timestamp.now("UTC"), + pd.Timestamp.now("Asia/Tokyo"), + ], + ) + def test_std(self, add): + tdi = pd.TimedeltaIndex(["0H", "4H", "NaT", "4H", "0H", "2H"]) + add arr = tdi.array result = arr.std(skipna=True) @@ -303,9 +313,10 @@ def test_std(self): assert isinstance(result, pd.Timedelta) assert result == expected - result = nanops.nanstd(np.asarray(arr), skipna=True) - assert isinstance(result, pd.Timedelta) - assert result == expected + if getattr(arr, "tz", None) is None: + result = nanops.nanstd(np.asarray(arr), skipna=True) + assert isinstance(result, pd.Timedelta) + assert result == expected result = arr.std(skipna=False) assert result is pd.NaT @@ -313,8 +324,9 @@ def test_std(self): result = tdi.std(skipna=False) assert result is pd.NaT - result = nanops.nanstd(np.asarray(arr), skipna=False) - assert result is pd.NaT + if getattr(arr, "tz", None) is None: + result = nanops.nanstd(np.asarray(arr), skipna=False) + assert result is pd.NaT def test_median(self): tdi = pd.TimedeltaIndex(["0H", "3H", "NaT", "5H06m", "0H", "2H"]) diff --git a/pandas/tests/reductions/test_stat_reductions.py b/pandas/tests/reductions/test_stat_reductions.py index fd2746672a0eb..67e871f8b67c2 100644 --- a/pandas/tests/reductions/test_stat_reductions.py +++ b/pandas/tests/reductions/test_stat_reductions.py @@ -96,7 +96,7 @@ def _check_stat_op( string_series_[5:15] = np.NaN # mean, idxmax, idxmin, min, and max are valid for dates - if name not in ["max", "min", "mean", "median"]: + if name not in ["max", "min", "mean", "median", "std"]: ds = Series(pd.date_range("1/1/2001", periods=10)) with pytest.raises(TypeError): f(ds)