diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 95ed60b075884..c1932d7e4b30d 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -73,6 +73,7 @@ Backwards incompatible API changes Deprecations ~~~~~~~~~~~~ - Lookups on a :class:`Series` with a single-item list containing a slice (e.g. ``ser[[slice(0, 4)]]``) are deprecated, will raise in a future version. Either convert the list to tuple, or pass the slice directly instead (:issue:`31333`) +- :meth:`DataFrame.mean` and :meth:`DataFrame.median` with ``numeric_only=None`` will include datetime64 and datetime64tz columns in a future version (:issue:`29941`) - - diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e0efa93379bca..dd694ceaae0b4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7927,6 +7927,19 @@ def _count_level(self, level, axis=0, numeric_only=False): def _reduce( self, op, name, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds ): + + dtype_is_dt = self.dtypes.apply(lambda x: x.kind == "M") + if numeric_only is None and name in ["mean", "median"] and dtype_is_dt.any(): + warnings.warn( + "DataFrame.mean and DataFrame.median with numeric_only=None " + "will include datetime64 and datetime64tz columns in a " + "future version.", + FutureWarning, + stacklevel=3, + ) + cols = self.columns[~dtype_is_dt] + self = self[cols] + if axis is None and filter_type == "bool": labels = None constructor = None @@ -7966,9 +7979,15 @@ def _get_data(axis_matters): out_dtype = "bool" if filter_type == "bool" else None + def blk_func(values): + if values.ndim == 1 and not isinstance(values, np.ndarray): + # we can't pass axis=1 + return op(values, axis=0, skipna=skipna, **kwds) + return op(values, axis=1, skipna=skipna, **kwds) + # After possibly _get_data and transposing, we are now in the # simple case where we can use BlockManager._reduce - res = df._data.reduce(op, axis=1, skipna=skipna, **kwds) + res = df._data.reduce(blk_func) assert isinstance(res, dict) if len(res): assert len(res) == max(list(res.keys())) + 1, res.keys() diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 2bf2be082f639..b0dcb3b6e1f99 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -30,7 +30,6 @@ is_timedelta64_dtype, pandas_dtype, ) -from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna bn = import_optional_dependency("bottleneck", raise_on_missing=False, on_version="warn") @@ -519,7 +518,6 @@ def nansum( return _wrap_results(the_sum, dtype) -@disallow("M8", DatetimeTZDtype) @bottleneck_switch() def nanmean(values, axis=None, skipna=True, mask=None): """ @@ -577,7 +575,6 @@ def nanmean(values, axis=None, skipna=True, mask=None): return _wrap_results(the_mean, dtype) -@disallow("M8") @bottleneck_switch() def nanmedian(values, axis=None, skipna=True, mask=None): """ @@ -610,8 +607,12 @@ def get_median(x): return np.nanmedian(x[mask]) values, mask, dtype, dtype_max, _ = _get_values(values, skipna, mask=mask) - if not is_float_dtype(values): - values = values.astype("f8") + if not is_float_dtype(values.dtype): + try: + values = values.astype("f8") + except ValueError: + # e.g. "could not convert string to float: 'a'" + raise TypeError if mask is not None: values[mask] = np.nan @@ -1359,7 +1360,11 @@ def _ensure_numeric(x): try: x = x.astype(np.complex128) except (TypeError, ValueError): - x = x.astype(np.float64) + try: + x = x.astype(np.float64) + except ValueError: + # GH#29941 we get here with object arrays containing strs + raise TypeError(f"Could not convert {x} to numeric") else: if not np.any(np.imag(x)): x = x.real diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 25b2997eb088f..faae97472c0c9 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -64,12 +64,15 @@ def assert_stat_op_calc( f = getattr(frame, opname) if check_dates: + expected_warning = FutureWarning if opname in ["mean", "median"] else None df = DataFrame({"b": date_range("1/1/2001", periods=2)}) - result = getattr(df, opname)() + with tm.assert_produces_warning(expected_warning): + result = getattr(df, opname)() assert isinstance(result, Series) df["a"] = range(len(df)) - result = getattr(df, opname)() + with tm.assert_produces_warning(expected_warning): + result = getattr(df, opname)() assert isinstance(result, Series) assert len(result) @@ -460,7 +463,8 @@ def test_nunique(self): def test_mean_mixed_datetime_numeric(self, tz): # https://github.com/pandas-dev/pandas/issues/24752 df = pd.DataFrame({"A": [1, 1], "B": [pd.Timestamp("2000", tz=tz)] * 2}) - result = df.mean() + with tm.assert_produces_warning(FutureWarning): + result = df.mean() expected = pd.Series([1.0], index=["A"]) tm.assert_series_equal(result, expected) @@ -470,7 +474,9 @@ def test_mean_excludes_datetimes(self, tz): # Our long-term desired behavior is unclear, but the behavior in # 0.24.0rc1 was buggy. df = pd.DataFrame({"A": [pd.Timestamp("2000", tz=tz)] * 2}) - result = df.mean() + with tm.assert_produces_warning(FutureWarning): + result = df.mean() + expected = pd.Series(dtype=np.float64) tm.assert_series_equal(result, expected) @@ -866,7 +872,9 @@ def test_mean_datetimelike(self): expected = pd.Series({"A": 1.0}) tm.assert_series_equal(result, expected) - result = df.mean() + with tm.assert_produces_warning(FutureWarning): + # in the future datetime columns will be included + result = df.mean() expected = pd.Series({"A": 1.0, "C": df.loc[1, "C"]}) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 2c5d028ebe42e..f7e652eb78e2d 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -750,8 +750,8 @@ def test_ndarray(self): # Test non-convertible string ndarray s_values = np.array(["foo", "bar", "baz"], dtype=object) - msg = r"could not convert string to float: '(foo|baz)'" - with pytest.raises(ValueError, match=msg): + msg = r"Could not convert .* to numeric" + with pytest.raises(TypeError, match=msg): nanops._ensure_numeric(s_values) def test_convertable_values(self): @@ -993,7 +993,6 @@ def prng(self): class TestDatetime64NaNOps: @pytest.mark.parametrize("tz", [None, "UTC"]) - @pytest.mark.xfail(reason="disabled") # Enabling mean changes the behavior of DataFrame.mean # See https://github.com/pandas-dev/pandas/issues/24752 def test_nanmean(self, tz):