Skip to content

Commit a4d743e

Browse files
authored
ENH: support datetime64, datetime64tz in nanops.mean, nanops.median (pandas-dev#29941)
1 parent 97054ac commit a4d743e

File tree

5 files changed

+47
-15
lines changed

5 files changed

+47
-15
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ Backwards incompatible API changes
7474
Deprecations
7575
~~~~~~~~~~~~
7676
- Lookups on a :class:`Series` with a single-item list containing a slice (e.g. ``ser[[slice(0, 4)]]``) are deprecated, will raise in a future version. Either convert the list to tuple, or pass the slice directly instead (:issue:`31333`)
77+
- :meth:`DataFrame.mean` and :meth:`DataFrame.median` with ``numeric_only=None`` will include datetime64 and datetime64tz columns in a future version (:issue:`29941`)
7778
-
7879
-
7980

pandas/core/frame.py

+20-1
Original file line numberDiff line numberDiff line change
@@ -7957,6 +7957,19 @@ def _count_level(self, level, axis=0, numeric_only=False):
79577957
def _reduce(
79587958
self, op, name, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds
79597959
):
7960+
7961+
dtype_is_dt = self.dtypes.apply(lambda x: x.kind == "M")
7962+
if numeric_only is None and name in ["mean", "median"] and dtype_is_dt.any():
7963+
warnings.warn(
7964+
"DataFrame.mean and DataFrame.median with numeric_only=None "
7965+
"will include datetime64 and datetime64tz columns in a "
7966+
"future version.",
7967+
FutureWarning,
7968+
stacklevel=3,
7969+
)
7970+
cols = self.columns[~dtype_is_dt]
7971+
self = self[cols]
7972+
79607973
if axis is None and filter_type == "bool":
79617974
labels = None
79627975
constructor = None
@@ -7996,9 +8009,15 @@ def _get_data(axis_matters):
79968009

79978010
out_dtype = "bool" if filter_type == "bool" else None
79988011

8012+
def blk_func(values):
8013+
if values.ndim == 1 and not isinstance(values, np.ndarray):
8014+
# we can't pass axis=1
8015+
return op(values, axis=0, skipna=skipna, **kwds)
8016+
return op(values, axis=1, skipna=skipna, **kwds)
8017+
79998018
# After possibly _get_data and transposing, we are now in the
80008019
# simple case where we can use BlockManager._reduce
8001-
res = df._data.reduce(op, axis=1, skipna=skipna, **kwds)
8020+
res = df._data.reduce(blk_func)
80028021
assert isinstance(res, dict)
80038022
if len(res):
80048023
assert len(res) == max(list(res.keys())) + 1, res.keys()

pandas/core/nanops.py

+11-6
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
is_timedelta64_dtype,
3131
pandas_dtype,
3232
)
33-
from pandas.core.dtypes.dtypes import DatetimeTZDtype
3433
from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna
3534

3635
bn = import_optional_dependency("bottleneck", raise_on_missing=False, on_version="warn")
@@ -516,7 +515,6 @@ def nansum(
516515
return _wrap_results(the_sum, dtype)
517516

518517

519-
@disallow("M8", DatetimeTZDtype)
520518
@bottleneck_switch()
521519
def nanmean(values, axis=None, skipna=True, mask=None):
522520
"""
@@ -574,7 +572,6 @@ def nanmean(values, axis=None, skipna=True, mask=None):
574572
return _wrap_results(the_mean, dtype)
575573

576574

577-
@disallow("M8")
578575
@bottleneck_switch()
579576
def nanmedian(values, axis=None, skipna=True, mask=None):
580577
"""
@@ -607,8 +604,12 @@ def get_median(x):
607604
return np.nanmedian(x[mask])
608605

609606
values, mask, dtype, dtype_max, _ = _get_values(values, skipna, mask=mask)
610-
if not is_float_dtype(values):
611-
values = values.astype("f8")
607+
if not is_float_dtype(values.dtype):
608+
try:
609+
values = values.astype("f8")
610+
except ValueError:
611+
# e.g. "could not convert string to float: 'a'"
612+
raise TypeError
612613
if mask is not None:
613614
values[mask] = np.nan
614615

@@ -1355,7 +1356,11 @@ def _ensure_numeric(x):
13551356
try:
13561357
x = x.astype(np.complex128)
13571358
except (TypeError, ValueError):
1358-
x = x.astype(np.float64)
1359+
try:
1360+
x = x.astype(np.float64)
1361+
except ValueError:
1362+
# GH#29941 we get here with object arrays containing strs
1363+
raise TypeError(f"Could not convert {x} to numeric")
13591364
else:
13601365
if not np.any(np.imag(x)):
13611366
x = x.real

pandas/tests/frame/test_analytics.py

+13-5
Original file line numberDiff line numberDiff line change
@@ -63,12 +63,15 @@ def assert_stat_op_calc(
6363
f = getattr(frame, opname)
6464

6565
if check_dates:
66+
expected_warning = FutureWarning if opname in ["mean", "median"] else None
6667
df = DataFrame({"b": date_range("1/1/2001", periods=2)})
67-
result = getattr(df, opname)()
68+
with tm.assert_produces_warning(expected_warning):
69+
result = getattr(df, opname)()
6870
assert isinstance(result, Series)
6971

7072
df["a"] = range(len(df))
71-
result = getattr(df, opname)()
73+
with tm.assert_produces_warning(expected_warning):
74+
result = getattr(df, opname)()
7275
assert isinstance(result, Series)
7376
assert len(result)
7477

@@ -457,7 +460,8 @@ def test_nunique(self):
457460
def test_mean_mixed_datetime_numeric(self, tz):
458461
# https://github.com/pandas-dev/pandas/issues/24752
459462
df = pd.DataFrame({"A": [1, 1], "B": [pd.Timestamp("2000", tz=tz)] * 2})
460-
result = df.mean()
463+
with tm.assert_produces_warning(FutureWarning):
464+
result = df.mean()
461465
expected = pd.Series([1.0], index=["A"])
462466
tm.assert_series_equal(result, expected)
463467

@@ -467,7 +471,9 @@ def test_mean_excludes_datetimes(self, tz):
467471
# Our long-term desired behavior is unclear, but the behavior in
468472
# 0.24.0rc1 was buggy.
469473
df = pd.DataFrame({"A": [pd.Timestamp("2000", tz=tz)] * 2})
470-
result = df.mean()
474+
with tm.assert_produces_warning(FutureWarning):
475+
result = df.mean()
476+
471477
expected = pd.Series(dtype=np.float64)
472478
tm.assert_series_equal(result, expected)
473479

@@ -863,7 +869,9 @@ def test_mean_datetimelike(self):
863869
expected = pd.Series({"A": 1.0})
864870
tm.assert_series_equal(result, expected)
865871

866-
result = df.mean()
872+
with tm.assert_produces_warning(FutureWarning):
873+
# in the future datetime columns will be included
874+
result = df.mean()
867875
expected = pd.Series({"A": 1.0, "C": df.loc[1, "C"]})
868876
tm.assert_series_equal(result, expected)
869877

pandas/tests/test_nanops.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -750,8 +750,8 @@ def test_ndarray(self):
750750

751751
# Test non-convertible string ndarray
752752
s_values = np.array(["foo", "bar", "baz"], dtype=object)
753-
msg = r"could not convert string to float: '(foo|baz)'"
754-
with pytest.raises(ValueError, match=msg):
753+
msg = r"Could not convert .* to numeric"
754+
with pytest.raises(TypeError, match=msg):
755755
nanops._ensure_numeric(s_values)
756756

757757
def test_convertable_values(self):
@@ -993,7 +993,6 @@ def prng(self):
993993

994994
class TestDatetime64NaNOps:
995995
@pytest.mark.parametrize("tz", [None, "UTC"])
996-
@pytest.mark.xfail(reason="disabled")
997996
# Enabling mean changes the behavior of DataFrame.mean
998997
# See https://github.com/pandas-dev/pandas/issues/24752
999998
def test_nanmean(self, tz):

0 commit comments

Comments
 (0)