From 441502e637b538fdecf38948236e0dfd4941395d Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 30 Nov 2019 15:13:48 -0800
Subject: [PATCH 1/5] ENH: support datetime64, datetime64tz in nanops.mean,
 nanops.median

---
 pandas/core/frame.py                 | 17 ++++++++++++++++-
 pandas/core/nanops.py                | 11 ++++++-----
 pandas/tests/frame/test_analytics.py | 17 ++++++++++++-----
 pandas/tests/test_nanops.py          |  1 -
 4 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 5dfa7002abfca..4cb31df8ce670 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -7575,6 +7575,19 @@ def _count_level(self, level, axis=0, numeric_only=False):
     def _reduce(
         self, op, name, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds
     ):
+
+        dtype_is_dt = self.dtypes.apply(lambda x: x.kind == "M")
+        if numeric_only is None and name in ["mean", "median"] and dtype_is_dt.any():
+            warnings.warn(
+                "DataFrame.mean and DataFrame.median with numeric_only=None "
+                "will include datetime64 and datetime64tz columns in a "
+                "future version.",
+                FutureWarning,
+                stacklevel=3,
+            )
+            cols = self.columns[~dtype_is_dt]
+            self = self[cols]
+
         if axis is None and filter_type == "bool":
             labels = None
             constructor = None
@@ -7614,8 +7627,10 @@ def _get_data(axis_matters):
                     # TODO: combine with hasattr(result, 'dtype') further down
                     # hard since we don't have `values` down there.
                     result = np.bool_(result)
-            except TypeError:
+            except (TypeError, ValueError):
                 # e.g. in nanops trying to convert strs to float
+                # TODO: the ValueError is raised in trying to convert str
+                #  to float, should we make that a TypError?
 
                 # try by-column first
                 if filter_type is None and axis == 0:
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index a2a40bbf93604..0b16c387e2462 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -29,7 +29,6 @@
     is_timedelta64_dtype,
     pandas_dtype,
 )
-from pandas.core.dtypes.dtypes import DatetimeTZDtype
 from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna
 
 bn = import_optional_dependency("bottleneck", raise_on_missing=False, on_version="warn")
@@ -494,7 +493,6 @@ def nansum(values, axis=None, skipna=True, min_count=0, mask=None):
     return _wrap_results(the_sum, dtype)
 
 
-@disallow("M8", DatetimeTZDtype)
 @bottleneck_switch()
 def nanmean(values, axis=None, skipna=True, mask=None):
     """
@@ -552,7 +550,6 @@ def nanmean(values, axis=None, skipna=True, mask=None):
     return _wrap_results(the_mean, dtype)
 
 
-@disallow("M8")
 @bottleneck_switch()
 def nanmedian(values, axis=None, skipna=True, mask=None):
     """
@@ -585,8 +582,12 @@ def get_median(x):
         return np.nanmedian(x[mask])
 
     values, mask, dtype, dtype_max, _ = _get_values(values, skipna, mask=mask)
-    if not is_float_dtype(values):
-        values = values.astype("f8")
+    if not is_float_dtype(values.dtype):
+        try:
+            values = values.astype("f8")
+        except ValueError:
+            # e.g. "could not convert string to float: 'a'"
+            raise TypeError
         if mask is not None:
             values[mask] = np.nan
 
diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index 005ca8d95182e..16dfa4f144134 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -66,12 +66,15 @@ def assert_stat_op_calc(
     f = getattr(frame, opname)
 
     if check_dates:
+        expected_warning = FutureWarning if opname in ["mean", "median"] else None
         df = DataFrame({"b": date_range("1/1/2001", periods=2)})
-        result = getattr(df, opname)()
+        with tm.assert_produces_warning(expected_warning):
+            result = getattr(df, opname)()
         assert isinstance(result, Series)
 
         df["a"] = range(len(df))
-        result = getattr(df, opname)()
+        with tm.assert_produces_warning(expected_warning):
+            result = getattr(df, opname)()
         assert isinstance(result, Series)
         assert len(result)
 
@@ -1062,7 +1065,8 @@ def test_nunique(self):
     def test_mean_mixed_datetime_numeric(self, tz):
         # https://github.com/pandas-dev/pandas/issues/24752
         df = pd.DataFrame({"A": [1, 1], "B": [pd.Timestamp("2000", tz=tz)] * 2})
-        result = df.mean()
+        with tm.assert_produces_warning(FutureWarning):
+            result = df.mean()
         expected = pd.Series([1.0], index=["A"])
         tm.assert_series_equal(result, expected)
 
@@ -1072,7 +1076,8 @@ def test_mean_excludeds_datetimes(self, tz):
         # Our long-term desired behavior is unclear, but the behavior in
         # 0.24.0rc1 was buggy.
         df = pd.DataFrame({"A": [pd.Timestamp("2000", tz=tz)] * 2})
-        result = df.mean()
+        with tm.assert_produces_warning(FutureWarning):
+            result = df.mean()
         expected = pd.Series()
         tm.assert_series_equal(result, expected)
 
@@ -1458,7 +1463,9 @@ def test_mean_datetimelike(self):
         expected = pd.Series({"A": 1.0})
         tm.assert_series_equal(result, expected)
 
-        result = df.mean()
+        with tm.assert_produces_warning(FutureWarning):
+            # in the future datetime columns will be included
+            result = df.mean()
         expected = pd.Series({"A": 1.0, "C": df.loc[1, "C"]})
         tm.assert_series_equal(result, expected)
 
diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py
index e5d963a307502..dd225949fdc57 100644
--- a/pandas/tests/test_nanops.py
+++ b/pandas/tests/test_nanops.py
@@ -985,7 +985,6 @@ def prng(self):
 
 class TestDatetime64NaNOps:
     @pytest.mark.parametrize("tz", [None, "UTC"])
-    @pytest.mark.xfail(reason="disabled")
     # Enabling mean changes the behavior of DataFrame.mean
     # See https://github.com/pandas-dev/pandas/issues/24752
     def test_nanmean(self, tz):

From cb77a0d8f9e55d31a98d890e9d30f8b385d3c484 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 25 Dec 2019 16:46:35 -0800
Subject: [PATCH 2/5] re-raise as typeError

---
 pandas/core/frame.py  | 4 +---
 pandas/core/nanops.py | 6 +++++-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index a667901111fc6..0c1536185d76a 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -7609,10 +7609,8 @@ def _get_data(axis_matters):
                     # TODO: combine with hasattr(result, 'dtype') further down
                     # hard since we don't have `values` down there.
                     result = np.bool_(result)
-            except (TypeError, ValueError):
+            except TypeError:
                 # e.g. in nanops trying to convert strs to float
-                # TODO: the ValueError is raised in trying to convert str
-                #  to float, should we make that a TypError?
 
                 # try by-column first
                 if filter_type is None and axis == 0:
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index 593ede8daaf0a..facf8734ae924 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -1289,7 +1289,11 @@ def _ensure_numeric(x):
             try:
                 x = x.astype(np.complex128)
             except (TypeError, ValueError):
-                x = x.astype(np.float64)
+                try:
+                    x = x.astype(np.float64)
+                except ValueError:
+                    # GH#29941 we get here with object arrays containing strs
+                    raise TypeError(f"Could not convert {x} to numeric")
             else:
                 if not np.any(np.imag(x)):
                     x = x.real

From 406de1ba93ebb2c8f97893754230dc590bcc93fe Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 25 Dec 2019 17:30:45 -0800
Subject: [PATCH 3/5] update test

---
 pandas/tests/test_nanops.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py
index 324c1a8aed2c8..575c351026b4a 100644
--- a/pandas/tests/test_nanops.py
+++ b/pandas/tests/test_nanops.py
@@ -742,8 +742,8 @@ def test_ndarray(self):
 
         # Test non-convertible string ndarray
         s_values = np.array(["foo", "bar", "baz"], dtype=object)
-        msg = r"could not convert string to float: '(foo|baz)'"
-        with pytest.raises(ValueError, match=msg):
+        msg = r"Could not convert .* to numeric"
+        with pytest.raises(TypeError, match=msg):
             nanops._ensure_numeric(s_values)
 
     def test_convertable_values(self):

From 1eafb5ebea7fc4d2517cc81004767eceb7664f91 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sun, 26 Jan 2020 10:57:56 -0800
Subject: [PATCH 4/5] whatsnew

---
 doc/source/whatsnew/v1.1.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 920919755dc23..be8228fe90b06 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -68,7 +68,7 @@ Backwards incompatible API changes
 
 Deprecations
 ~~~~~~~~~~~~
-
+- :meth:`DataFrame.mean` and :meth:`DataFrame.median` with ``numeric_only=None`` will include datetime64 and datetime64tz columns in a future version (:issue:`29941`)
 -
 -
 

From 385ae34699174fc7b54ebe7545d381876cc53573 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 27 Jan 2020 15:49:48 -0800
Subject: [PATCH 5/5] compat for CI fails

---
 pandas/core/frame.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 834bff6d2f314..3df646206821a 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -7973,9 +7973,15 @@ def _get_data(axis_matters):
 
             out_dtype = "bool" if filter_type == "bool" else None
 
+            def blk_func(values):
+                if values.ndim == 1 and not isinstance(values, np.ndarray):
+                    # we can't pass axis=1
+                    return op(values, axis=0, skipna=skipna, **kwds)
+                return op(values, axis=1, skipna=skipna, **kwds)
+
             # After possibly _get_data and transposing, we are now in the
             #  simple case where we can use BlockManager._reduce
-            res = df._data.reduce(op, axis=1, skipna=skipna, **kwds)
+            res = df._data.reduce(blk_func)
             assert isinstance(res, dict)
             if len(res):
                 assert len(res) == max(list(res.keys())) + 1, res.keys()