diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 4523ea1030ef1..8e3b26503a61b 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1304,7 +1304,7 @@ def max(self, axis=None, skipna=True, *args, **kwargs): # Don't have to worry about NA `result`, since no NA went in. return self._box_func(result) - def mean(self, skipna=True): + def mean(self, skipna=True, axis: Optional[int] = 0): """ Return the mean value of the Array. @@ -1314,6 +1314,7 @@ def mean(self, skipna=True): ---------- skipna : bool, default True Whether to ignore any NaT elements. + axis : int, optional, default 0 Returns ------- @@ -1337,21 +1338,12 @@ def mean(self, skipna=True): "obj.to_timestamp(how='start').mean()" ) - mask = self.isna() - if skipna: - values = self[~mask] - elif mask.any(): - return NaT - else: - values = self - - if not len(values): - # short-circuit for empty max / min - return NaT - - result = nanops.nanmean(values.view("i8"), skipna=skipna) - # Don't have to worry about NA `result`, since no NA went in. - return self._box_func(result) + result = nanops.nanmean( + self._ndarray, axis=axis, skipna=skipna, mask=self.isna() + ) + if axis is None or self.ndim == 1: + return self._box_func(result) + return self._from_backing_data(result) def median(self, axis: Optional[int] = None, skipna: bool = True, *args, **kwargs): nv.validate_median(args, kwargs) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index c7b6e132f9a74..d2f596a5a6800 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -339,7 +339,12 @@ def _wrap_results(result, dtype: DtypeObj, fill_value=None): assert not isna(fill_value), "Expected non-null fill_value" if result == fill_value: result = np.nan - result = Timestamp(result, tz=tz) + if tz is not None: + result = Timestamp(result, tz=tz) + elif isna(result): + result = np.datetime64("NaT", "ns") + else: + result = np.int64(result).view("datetime64[ns]") else: # If we have float dtype, taking a view will give the wrong result result = result.astype(dtype) @@ -386,8 +391,9 @@ def _na_for_min_count( if values.ndim == 1: return fill_value + elif axis is None: + return fill_value else: - assert axis is not None # assertion to make mypy happy result_shape = values.shape[:axis] + values.shape[axis + 1 :] result = np.full(result_shape, fill_value, dtype=values.dtype) diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 78721fc2fe1c1..9245eda2a71fe 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -9,6 +9,7 @@ from pandas.core.dtypes.dtypes import DatetimeTZDtype import pandas as pd +from pandas import NaT import pandas._testing as tm from pandas.core.arrays import DatetimeArray from pandas.core.arrays.datetimes import sequence_to_dt64ns @@ -566,3 +567,54 @@ def test_median_2d(self, arr1d): result = arr.median(axis=1, skipna=False) expected = type(arr)._from_sequence([pd.NaT], dtype=arr.dtype) tm.assert_equal(result, expected) + + def test_mean(self, arr1d): + arr = arr1d + + # manually verified result + expected = arr[0] + 0.4 * pd.Timedelta(days=1) + + result = arr.mean() + assert result == expected + result = arr.mean(skipna=False) + assert result is pd.NaT + + result = arr.dropna().mean(skipna=False) + assert result == expected + + result = arr.mean(axis=0) + assert result == expected + + def test_mean_2d(self): + dti = pd.date_range("2016-01-01", periods=6, tz="US/Pacific") + dta = dti._data.reshape(3, 2) + + result = dta.mean(axis=0) + expected = dta[1] + tm.assert_datetime_array_equal(result, expected) + + result = dta.mean(axis=1) + expected = dta[:, 0] + pd.Timedelta(hours=12) + tm.assert_datetime_array_equal(result, expected) + + result = dta.mean(axis=None) + expected = dti.mean() + assert result == expected + + @pytest.mark.parametrize("skipna", [True, False]) + def test_mean_empty(self, arr1d, skipna): + arr = arr1d[:0] + + assert arr.mean(skipna=skipna) is NaT + + arr2d = arr.reshape(0, 3) + result = arr2d.mean(axis=0, skipna=skipna) + expected = DatetimeArray._from_sequence([NaT, NaT, NaT], dtype=arr.dtype) + tm.assert_datetime_array_equal(result, expected) + + result = arr2d.mean(axis=1, skipna=skipna) + expected = arr # i.e. 1D, empty + tm.assert_datetime_array_equal(result, expected) + + result = arr2d.mean(axis=None, skipna=skipna) + assert result is NaT diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index a5a74b16ed1cd..95265a958c35d 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -177,7 +177,7 @@ def test_neg_freq(self): class TestReductions: - @pytest.mark.parametrize("name", ["std", "min", "max", "median"]) + @pytest.mark.parametrize("name", ["std", "min", "max", "median", "mean"]) @pytest.mark.parametrize("skipna", [True, False]) def test_reductions_empty(self, name, skipna): tdi = pd.TimedeltaIndex([]) @@ -334,3 +334,37 @@ def test_median(self): result = tdi.median(skipna=False) assert result is pd.NaT + + def test_mean(self): + tdi = pd.TimedeltaIndex(["0H", "3H", "NaT", "5H06m", "0H", "2H"]) + arr = tdi._data + + # manually verified result + expected = pd.Timedelta(arr.dropna()._ndarray.mean()) + + result = arr.mean() + assert result == expected + result = arr.mean(skipna=False) + assert result is pd.NaT + + result = arr.dropna().mean(skipna=False) + assert result == expected + + result = arr.mean(axis=0) + assert result == expected + + def test_mean_2d(self): + tdi = pd.timedelta_range("14 days", periods=6) + tda = tdi._data.reshape(3, 2) + + result = tda.mean(axis=0) + expected = tda[1] + tm.assert_timedelta_array_equal(result, expected) + + result = tda.mean(axis=1) + expected = tda[:, 0] + pd.Timedelta(hours=12) + tm.assert_timedelta_array_equal(result, expected) + + result = tda.mean(axis=None) + expected = tdi.mean() + assert result == expected