Skip to content

REF: avoid special-casing inside DTA/TDA.mean, flesh out tests #37422

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 26, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 8 additions & 16 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1304,7 +1304,7 @@ def max(self, axis=None, skipna=True, *args, **kwargs):
# Don't have to worry about NA `result`, since no NA went in.
return self._box_func(result)

def mean(self, skipna=True):
def mean(self, skipna=True, axis: Optional[int] = 0):
"""
Return the mean value of the Array.

Expand All @@ -1314,6 +1314,7 @@ def mean(self, skipna=True):
----------
skipna : bool, default True
Whether to ignore any NaT elements.
axis : int, optional, default 0

Returns
-------
Expand All @@ -1337,21 +1338,12 @@ def mean(self, skipna=True):
"obj.to_timestamp(how='start').mean()"
)

mask = self.isna()
if skipna:
values = self[~mask]
elif mask.any():
return NaT
else:
values = self

if not len(values):
# short-circuit for empty max / min
return NaT

result = nanops.nanmean(values.view("i8"), skipna=skipna)
# Don't have to worry about NA `result`, since no NA went in.
return self._box_func(result)
result = nanops.nanmean(
self._ndarray, axis=axis, skipna=skipna, mask=self.isna()
)
if axis is None or self.ndim == 1:
return self._box_func(result)
return self._from_backing_data(result)

def median(self, axis: Optional[int] = None, skipna: bool = True, *args, **kwargs):
nv.validate_median(args, kwargs)
Expand Down
10 changes: 8 additions & 2 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,12 @@ def _wrap_results(result, dtype: DtypeObj, fill_value=None):
assert not isna(fill_value), "Expected non-null fill_value"
if result == fill_value:
result = np.nan
result = Timestamp(result, tz=tz)
if tz is not None:
result = Timestamp(result, tz=tz)
elif isna(result):
result = np.datetime64("NaT", "ns")
else:
result = np.int64(result).view("datetime64[ns]")
else:
# If we have float dtype, taking a view will give the wrong result
result = result.astype(dtype)
Expand Down Expand Up @@ -386,8 +391,9 @@ def _na_for_min_count(

if values.ndim == 1:
return fill_value
elif axis is None:
return fill_value
else:
assert axis is not None # assertion to make mypy happy
result_shape = values.shape[:axis] + values.shape[axis + 1 :]

result = np.full(result_shape, fill_value, dtype=values.dtype)
Expand Down
52 changes: 52 additions & 0 deletions pandas/tests/arrays/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from pandas.core.dtypes.dtypes import DatetimeTZDtype

import pandas as pd
from pandas import NaT
import pandas._testing as tm
from pandas.core.arrays import DatetimeArray
from pandas.core.arrays.datetimes import sequence_to_dt64ns
Expand Down Expand Up @@ -566,3 +567,54 @@ def test_median_2d(self, arr1d):
result = arr.median(axis=1, skipna=False)
expected = type(arr)._from_sequence([pd.NaT], dtype=arr.dtype)
tm.assert_equal(result, expected)

def test_mean(self, arr1d):
arr = arr1d

# manually verified result
expected = arr[0] + 0.4 * pd.Timedelta(days=1)

result = arr.mean()
assert result == expected
result = arr.mean(skipna=False)
assert result is pd.NaT

result = arr.dropna().mean(skipna=False)
assert result == expected

result = arr.mean(axis=0)
assert result == expected

def test_mean_2d(self):
dti = pd.date_range("2016-01-01", periods=6, tz="US/Pacific")
dta = dti._data.reshape(3, 2)

result = dta.mean(axis=0)
expected = dta[1]
tm.assert_datetime_array_equal(result, expected)

result = dta.mean(axis=1)
expected = dta[:, 0] + pd.Timedelta(hours=12)
tm.assert_datetime_array_equal(result, expected)

result = dta.mean(axis=None)
expected = dti.mean()
assert result == expected

@pytest.mark.parametrize("skipna", [True, False])
def test_mean_empty(self, arr1d, skipna):
arr = arr1d[:0]

assert arr.mean(skipna=skipna) is NaT

arr2d = arr.reshape(0, 3)
result = arr2d.mean(axis=0, skipna=skipna)
expected = DatetimeArray._from_sequence([NaT, NaT, NaT], dtype=arr.dtype)
tm.assert_datetime_array_equal(result, expected)

result = arr2d.mean(axis=1, skipna=skipna)
expected = arr # i.e. 1D, empty
tm.assert_datetime_array_equal(result, expected)

result = arr2d.mean(axis=None, skipna=skipna)
assert result is NaT
36 changes: 35 additions & 1 deletion pandas/tests/arrays/test_timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ def test_neg_freq(self):


class TestReductions:
@pytest.mark.parametrize("name", ["std", "min", "max", "median"])
@pytest.mark.parametrize("name", ["std", "min", "max", "median", "mean"])
@pytest.mark.parametrize("skipna", [True, False])
def test_reductions_empty(self, name, skipna):
tdi = pd.TimedeltaIndex([])
Expand Down Expand Up @@ -334,3 +334,37 @@ def test_median(self):

result = tdi.median(skipna=False)
assert result is pd.NaT

def test_mean(self):
tdi = pd.TimedeltaIndex(["0H", "3H", "NaT", "5H06m", "0H", "2H"])
arr = tdi._data

# manually verified result
expected = pd.Timedelta(arr.dropna()._ndarray.mean())

result = arr.mean()
assert result == expected
result = arr.mean(skipna=False)
assert result is pd.NaT

result = arr.dropna().mean(skipna=False)
assert result == expected

result = arr.mean(axis=0)
assert result == expected

def test_mean_2d(self):
tdi = pd.timedelta_range("14 days", periods=6)
tda = tdi._data.reshape(3, 2)

result = tda.mean(axis=0)
expected = tda[1]
tm.assert_timedelta_array_equal(result, expected)

result = tda.mean(axis=1)
expected = tda[:, 0] + pd.Timedelta(hours=12)
tm.assert_timedelta_array_equal(result, expected)

result = tda.mean(axis=None)
expected = tdi.mean()
assert result == expected