Skip to content

Commit efc7f2f

Browse files
jbrockmendeljreback
authored andcommitted
implement+test mean for datetimelike EA/Index/Series (#24757)
1 parent 959e799 commit efc7f2f

File tree

9 files changed

+181
-1
lines changed

9 files changed

+181
-1
lines changed

doc/source/reference/indexing.rst

+14
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,13 @@ Conversion
403403
DatetimeIndex.to_series
404404
DatetimeIndex.to_frame
405405

406+
Methods
407+
~~~~~~~
408+
.. autosummary::
409+
:toctree: api/
410+
411+
DatetimeIndex.mean
412+
406413
TimedeltaIndex
407414
--------------
408415
.. autosummary::
@@ -435,6 +442,13 @@ Conversion
435442
TimedeltaIndex.ceil
436443
TimedeltaIndex.to_frame
437444

445+
Methods
446+
~~~~~~~
447+
.. autosummary::
448+
:toctree: api/
449+
450+
TimedeltaIndex.mean
451+
438452
.. currentmodule:: pandas
439453

440454
PeriodIndex

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ Other Enhancements
9696
- :meth:`DataFrame.query` and :meth:`DataFrame.eval` now supports quoting column names with backticks to refer to names with spaces (:issue:`6508`)
9797
- :func:`merge_asof` now gives a more clear error message when merge keys are categoricals that are not equal (:issue:`26136`)
9898
- :meth:`pandas.core.window.Rolling` supports exponential (or Poisson) window type (:issue:`21303`)
99+
- :class:`DatetimeIndex` and :class:`TimedeltaIndex` now have a `mean` method (:issue:`24757`)
99100
-
100101

101102
.. _whatsnew_0250.api_breaking:

pandas/core/arrays/datetimelike.py

+49-1
Original file line numberDiff line numberDiff line change
@@ -1382,7 +1382,7 @@ def _ensure_localized(self, arg, ambiguous='raise', nonexistent='raise',
13821382
def _reduce(self, name, axis=0, skipna=True, **kwargs):
13831383
op = getattr(self, name, None)
13841384
if op:
1385-
return op(axis=axis, skipna=skipna, **kwargs)
1385+
return op(skipna=skipna, **kwargs)
13861386
else:
13871387
return super()._reduce(name, skipna, **kwargs)
13881388

@@ -1438,6 +1438,54 @@ def max(self, axis=None, skipna=True, *args, **kwargs):
14381438
# Don't have to worry about NA `result`, since no NA went in.
14391439
return self._box_func(result)
14401440

1441+
def mean(self, skipna=True):
1442+
"""
1443+
Return the mean value of the Array.
1444+
1445+
.. versionadded:: 0.25.0
1446+
1447+
Parameters
1448+
----------
1449+
skipna : bool, default True
1450+
Whether to ignore any NaT elements
1451+
1452+
Returns
1453+
-------
1454+
scalar (Timestamp or Timedelta)
1455+
1456+
See Also
1457+
--------
1458+
numpy.ndarray.mean
1459+
Series.mean : Return the mean value in a Series.
1460+
1461+
Notes
1462+
-----
1463+
mean is only defined for Datetime and Timedelta dtypes, not for Period.
1464+
"""
1465+
if is_period_dtype(self):
1466+
# See discussion in GH#24757
1467+
raise TypeError(
1468+
"mean is not implemented for {cls} since the meaning is "
1469+
"ambiguous. An alternative is "
1470+
"obj.to_timestamp(how='start').mean()"
1471+
.format(cls=type(self).__name__))
1472+
1473+
mask = self.isna()
1474+
if skipna:
1475+
values = self[~mask]
1476+
elif mask.any():
1477+
return NaT
1478+
else:
1479+
values = self
1480+
1481+
if not len(values):
1482+
# short-circut for empty max / min
1483+
return NaT
1484+
1485+
result = nanops.nanmean(values.view('i8'), skipna=skipna)
1486+
# Don't have to worry about NA `result`, since no NA went in.
1487+
return self._box_func(result)
1488+
14411489

14421490
# -------------------------------------------------------------------
14431491
# Shared Constructor Helpers

pandas/core/indexes/datetimelike.py

+1
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ class DatetimeIndexOpsMixin(ExtensionOpsMixin):
7373
_maybe_mask_results = ea_passthrough(
7474
DatetimeLikeArrayMixin._maybe_mask_results)
7575
__iter__ = ea_passthrough(DatetimeLikeArrayMixin.__iter__)
76+
mean = ea_passthrough(DatetimeLikeArrayMixin.mean)
7677

7778
@property
7879
def freq(self):

pandas/core/indexes/datetimes.py

+1
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ class DatetimeIndex(DatetimeIndexOpsMixin, Int64Index, DatetimeDelegateMixin):
203203
to_frame
204204
month_name
205205
day_name
206+
mean
206207
207208
See Also
208209
--------

pandas/core/indexes/timedeltas.py

+1
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ class TimedeltaIndex(DatetimeIndexOpsMixin, dtl.TimelikeOps, Int64Index,
129129
floor
130130
ceil
131131
to_frame
132+
mean
132133
133134
See Also
134135
--------

pandas/core/series.py

+4
Original file line numberDiff line numberDiff line change
@@ -3729,6 +3729,10 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
37293729
elif is_datetime64_dtype(delegate):
37303730
# use DatetimeIndex implementation to handle skipna correctly
37313731
delegate = DatetimeIndex(delegate)
3732+
elif is_timedelta64_dtype(delegate) and hasattr(TimedeltaIndex, name):
3733+
# use TimedeltaIndex to handle skipna correctly
3734+
# TODO: remove hasattr check after TimedeltaIndex has `std` method
3735+
delegate = TimedeltaIndex(delegate)
37323736

37333737
# dispatch to numpy arrays
37343738
elif isinstance(delegate, np.ndarray):

pandas/tests/frame/test_analytics.py

+41
Original file line numberDiff line numberDiff line change
@@ -1205,6 +1205,47 @@ def test_mean_corner(self, float_frame, float_string_frame):
12051205
means = float_frame.mean(0)
12061206
assert means['bool'] == float_frame['bool'].values.mean()
12071207

1208+
def test_mean_datetimelike(self):
1209+
# GH#24757 check that datetimelike are excluded by default, handled
1210+
# correctly with numeric_only=True
1211+
1212+
df = pd.DataFrame({
1213+
'A': np.arange(3),
1214+
'B': pd.date_range('2016-01-01', periods=3),
1215+
'C': pd.timedelta_range('1D', periods=3),
1216+
'D': pd.period_range('2016', periods=3, freq='A')
1217+
})
1218+
result = df.mean(numeric_only=True)
1219+
expected = pd.Series({'A': 1.})
1220+
tm.assert_series_equal(result, expected)
1221+
1222+
result = df.mean()
1223+
expected = pd.Series({
1224+
'A': 1.,
1225+
'C': df.loc[1, 'C']
1226+
})
1227+
tm.assert_series_equal(result, expected)
1228+
1229+
@pytest.mark.xfail(reason="casts to object-dtype and then tries to "
1230+
"add timestamps",
1231+
raises=TypeError, strict=True)
1232+
def test_mean_datetimelike_numeric_only_false(self):
1233+
df = pd.DataFrame({
1234+
'A': np.arange(3),
1235+
'B': pd.date_range('2016-01-01', periods=3),
1236+
'C': pd.timedelta_range('1D', periods=3),
1237+
'D': pd.period_range('2016', periods=3, freq='A')
1238+
})
1239+
1240+
result = df.mean(numeric_only=False)
1241+
expected = pd.Series({
1242+
'A': 1,
1243+
'B': df.loc[1, 'B'],
1244+
'C': df.loc[1, 'C'],
1245+
'D': df.loc[1, 'D']
1246+
})
1247+
tm.assert_series_equal(result, expected)
1248+
12081249
def test_stats_mixed_type(self, float_string_frame):
12091250
# don't blow up
12101251
float_string_frame.std(1)

pandas/tests/reductions/test_stat_reductions.py

+69
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,78 @@
1010

1111
import pandas as pd
1212
from pandas import DataFrame, Series
13+
from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray
1314
import pandas.util.testing as tm
1415

1516

17+
class TestDatetimeLikeStatReductions:
18+
19+
@pytest.mark.parametrize('box', [Series, pd.Index, DatetimeArray])
20+
def test_dt64_mean(self, tz_naive_fixture, box):
21+
tz = tz_naive_fixture
22+
23+
dti = pd.date_range('2001-01-01', periods=11, tz=tz)
24+
# shuffle so that we are not just working with monotone-increasing
25+
dti = dti.take([4, 1, 3, 10, 9, 7, 8, 5, 0, 2, 6])
26+
dtarr = dti._data
27+
28+
obj = box(dtarr)
29+
assert obj.mean() == pd.Timestamp('2001-01-06', tz=tz)
30+
assert obj.mean(skipna=False) == pd.Timestamp('2001-01-06', tz=tz)
31+
32+
# dtarr[-2] will be the first date 2001-01-1
33+
dtarr[-2] = pd.NaT
34+
35+
obj = box(dtarr)
36+
assert obj.mean() == pd.Timestamp('2001-01-06 07:12:00', tz=tz)
37+
assert obj.mean(skipna=False) is pd.NaT
38+
39+
@pytest.mark.parametrize('box', [Series, pd.Index, PeriodArray])
40+
def test_period_mean(self, box):
41+
# GH#24757
42+
dti = pd.date_range('2001-01-01', periods=11)
43+
# shuffle so that we are not just working with monotone-increasing
44+
dti = dti.take([4, 1, 3, 10, 9, 7, 8, 5, 0, 2, 6])
45+
46+
# use hourly frequency to avoid rounding errors in expected results
47+
# TODO: flesh this out with different frequencies
48+
parr = dti._data.to_period('H')
49+
obj = box(parr)
50+
with pytest.raises(TypeError, match="ambiguous"):
51+
obj.mean()
52+
with pytest.raises(TypeError, match="ambiguous"):
53+
obj.mean(skipna=True)
54+
55+
# parr[-2] will be the first date 2001-01-1
56+
parr[-2] = pd.NaT
57+
58+
with pytest.raises(TypeError, match="ambiguous"):
59+
obj.mean()
60+
with pytest.raises(TypeError, match="ambiguous"):
61+
obj.mean(skipna=True)
62+
63+
@pytest.mark.parametrize('box', [Series, pd.Index, TimedeltaArray])
64+
def test_td64_mean(self, box):
65+
tdi = pd.TimedeltaIndex([0, 3, -2, -7, 1, 2, -1, 3, 5, -2, 4],
66+
unit='D')
67+
68+
tdarr = tdi._data
69+
obj = box(tdarr)
70+
71+
result = obj.mean()
72+
expected = np.array(tdarr).mean()
73+
assert result == expected
74+
75+
tdarr[0] = pd.NaT
76+
assert obj.mean(skipna=False) is pd.NaT
77+
78+
result2 = obj.mean(skipna=True)
79+
assert result2 == tdi[1:].mean()
80+
81+
# exact equality fails by 1 nanosecond
82+
assert result2.round('us') == (result * 11. / 10).round('us')
83+
84+
1685
class TestSeriesStatReductions:
1786
# Note: the name TestSeriesStatReductions indicates these tests
1887
# were moved from a series-specific test file, _not_ that these tests are

0 commit comments

Comments
 (0)