Skip to content

Commit 927c83c

Browse files
authored
ENH: std for dt64 dtype (#37436)
1 parent 699c0e6 commit 927c83c

File tree

6 files changed

+48
-9
lines changed

6 files changed

+48
-9
lines changed

doc/source/whatsnew/v1.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,7 @@ Other enhancements
225225
- :class:`DataFrame` now supports ``divmod`` operation (:issue:`37165`)
226226
- :meth:`DataFrame.to_parquet` now returns a ``bytes`` object when no ``path`` argument is passed (:issue:`37105`)
227227
- :class:`Rolling` now supports the ``closed`` argument for fixed windows (:issue:`34315`)
228+
- :class:`DatetimeIndex` and :class:`Series` with ``datetime64`` or ``datetime64tz`` dtypes now support ``std`` (:issue:`37436`)
228229

229230
.. _whatsnew_120.api_breaking.python:
230231

pandas/core/arrays/datetimes.py

+22
Original file line numberDiff line numberDiff line change
@@ -1862,6 +1862,28 @@ def to_julian_date(self):
18621862
/ 24.0
18631863
)
18641864

1865+
# -----------------------------------------------------------------
1866+
# Reductions
1867+
1868+
def std(
1869+
self,
1870+
axis=None,
1871+
dtype=None,
1872+
out=None,
1873+
ddof: int = 1,
1874+
keepdims: bool = False,
1875+
skipna: bool = True,
1876+
):
1877+
# Because std is translation-invariant, we can get self.std
1878+
# by calculating (self - Timestamp(0)).std, and we can do it
1879+
# without creating a copy by using a view on self._ndarray
1880+
from pandas.core.arrays import TimedeltaArray
1881+
1882+
tda = TimedeltaArray(self._ndarray.view("i8"))
1883+
return tda.std(
1884+
axis=axis, dtype=dtype, out=out, ddof=ddof, keepdims=keepdims, skipna=skipna
1885+
)
1886+
18651887

18661888
# -------------------------------------------------------------------
18671889
# Constructor Helpers

pandas/core/indexes/datetimes.py

+2
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ def _new_DatetimeIndex(cls, d):
9999
"date",
100100
"time",
101101
"timetz",
102+
"std",
102103
]
103104
+ DatetimeArray._bool_ops,
104105
DatetimeArray,
@@ -201,6 +202,7 @@ class DatetimeIndex(DatetimeTimedeltaMixin):
201202
month_name
202203
day_name
203204
mean
205+
std
204206
205207
See Also
206208
--------

pandas/core/nanops.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -756,7 +756,6 @@ def _get_counts_nanvar(
756756
return count, d
757757

758758

759-
@disallow("M8")
760759
@bottleneck_switch(ddof=1)
761760
def nanstd(values, axis=None, skipna=True, ddof=1, mask=None):
762761
"""
@@ -786,6 +785,9 @@ def nanstd(values, axis=None, skipna=True, ddof=1, mask=None):
786785
>>> nanops.nanstd(s)
787786
1.0
788787
"""
788+
if values.dtype == "M8[ns]":
789+
values = values.view("m8[ns]")
790+
789791
orig_dtype = values.dtype
790792
values, mask, _, _, _ = _get_values(values, skipna, mask=mask)
791793

pandas/tests/arrays/test_timedeltas.py

+19-7
Original file line numberDiff line numberDiff line change
@@ -290,8 +290,18 @@ def test_sum_2d_skipna_false(self):
290290
)._values
291291
tm.assert_timedelta_array_equal(result, expected)
292292

293-
def test_std(self):
294-
tdi = pd.TimedeltaIndex(["0H", "4H", "NaT", "4H", "0H", "2H"])
293+
# Adding a Timestamp makes this a test for DatetimeArray.std
294+
@pytest.mark.parametrize(
295+
"add",
296+
[
297+
pd.Timedelta(0),
298+
pd.Timestamp.now(),
299+
pd.Timestamp.now("UTC"),
300+
pd.Timestamp.now("Asia/Tokyo"),
301+
],
302+
)
303+
def test_std(self, add):
304+
tdi = pd.TimedeltaIndex(["0H", "4H", "NaT", "4H", "0H", "2H"]) + add
295305
arr = tdi.array
296306

297307
result = arr.std(skipna=True)
@@ -303,18 +313,20 @@ def test_std(self):
303313
assert isinstance(result, pd.Timedelta)
304314
assert result == expected
305315

306-
result = nanops.nanstd(np.asarray(arr), skipna=True)
307-
assert isinstance(result, pd.Timedelta)
308-
assert result == expected
316+
if getattr(arr, "tz", None) is None:
317+
result = nanops.nanstd(np.asarray(arr), skipna=True)
318+
assert isinstance(result, pd.Timedelta)
319+
assert result == expected
309320

310321
result = arr.std(skipna=False)
311322
assert result is pd.NaT
312323

313324
result = tdi.std(skipna=False)
314325
assert result is pd.NaT
315326

316-
result = nanops.nanstd(np.asarray(arr), skipna=False)
317-
assert result is pd.NaT
327+
if getattr(arr, "tz", None) is None:
328+
result = nanops.nanstd(np.asarray(arr), skipna=False)
329+
assert result is pd.NaT
318330

319331
def test_median(self):
320332
tdi = pd.TimedeltaIndex(["0H", "3H", "NaT", "5H06m", "0H", "2H"])

pandas/tests/reductions/test_stat_reductions.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ def _check_stat_op(
9696
string_series_[5:15] = np.NaN
9797

9898
# mean, idxmax, idxmin, min, and max are valid for dates
99-
if name not in ["max", "min", "mean", "median"]:
99+
if name not in ["max", "min", "mean", "median", "std"]:
100100
ds = Series(pd.date_range("1/1/2001", periods=10))
101101
with pytest.raises(TypeError):
102102
f(ds)

0 commit comments

Comments
 (0)