From bf23e99c36f37bcd0cabb923a3632f1737f217d6 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 20 Mar 2020 10:51:21 -0700
Subject: [PATCH 1/6] REF: DF._reduce do frame_apply early

---
 pandas/core/frame.py                 | 70 ++++++++++++++++------------
 pandas/tests/frame/test_analytics.py |  2 +-
 pandas/tests/frame/test_missing.py   |  2 +-
 3 files changed, 43 insertions(+), 31 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index cd5d81bc70dd9..b8a2842c80af6 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -7856,54 +7856,66 @@ def blk_func(values):
             out.index = df.columns
             return out
 
+        if not self._is_homogeneous_type:
+            # try to avoid self.values call
+
+            # try by-column first
+            if filter_type is None and axis == 0 and len(self) > 0:
+                # numeric_only must be None here, as other cases caught above
+                # require len(self) > 0 bc frame_apply messes up empty prod/sum
+
+                # this can end up with a non-reduction
+                # but not always. if the types are mixed
+                # with datelike then need to make sure a series
+
+                # we only end up here if we have not specified
+                # numeric_only and yet we have tried a
+                # column-by-column reduction, where we have mixed type.
+                # So let's just do what we can
+                from pandas.core.apply import frame_apply
+
+                opa = frame_apply(
+                    self, func=f, result_type="expand", ignore_failures=True
+                )
+                result = opa.get_result()
+                if result.ndim == self.ndim:
+                    result = result.iloc[0].rename(None)
+                return result
+
+        data = self
         if numeric_only is None:
+
             values = self.values
             try:
                 result = f(values)
 
-                if filter_type == "bool" and is_object_dtype(values) and axis is None:
-                    # work around https://github.com/numpy/numpy/issues/10489
-                    # TODO: combine with hasattr(result, 'dtype') further down
-                    # hard since we don't have `values` down there.
-                    result = np.bool_(result)
             except TypeError:
                 # e.g. in nanops trying to convert strs to float
 
-                # try by-column first
-                if filter_type is None and axis == 0:
-                    # this can end up with a non-reduction
-                    # but not always. if the types are mixed
-                    # with datelike then need to make sure a series
-
-                    # we only end up here if we have not specified
-                    # numeric_only and yet we have tried a
-                    # column-by-column reduction, where we have mixed type.
-                    # So let's just do what we can
-                    from pandas.core.apply import frame_apply
-
-                    opa = frame_apply(
-                        self, func=f, result_type="expand", ignore_failures=True
-                    )
-                    result = opa.get_result()
-                    if result.ndim == self.ndim:
-                        result = result.iloc[0]
-                    return result
-
                 # TODO: why doesnt axis matter here?
                 data = _get_data(axis_matters=False)
-                with np.errstate(all="ignore"):
-                    result = f(data.values)
                 labels = data._get_agg_axis(axis)
+
+                values = data.values
+                with np.errstate(all="ignore"):
+                    result = f(values)
+
         else:
             if numeric_only:
                 data = _get_data(axis_matters=True)
+                labels = data._get_agg_axis(axis)
 
                 values = data.values
-                labels = data._get_agg_axis(axis)
             else:
                 values = self.values
             result = f(values)
 
+        if filter_type == "bool" and is_object_dtype(values.dtype) and axis is None:
+            # work around https://github.com/numpy/numpy/issues/10489
+            # TODO: combine with hasattr(result, 'dtype') further down
+            # hard since we don't have `values` down there.
+            result = np.bool_(result)
+
         if hasattr(result, "dtype") and is_object_dtype(result.dtype):
             try:
                 if filter_type is None or filter_type == "numeric":
@@ -7914,7 +7926,7 @@ def blk_func(values):
 
                 # try to coerce to the original dtypes item by item if we can
                 if axis == 0:
-                    result = coerce_to_dtypes(result, self.dtypes)
+                    result = coerce_to_dtypes(result, data.dtypes)
 
         if constructor is not None:
             result = self._constructor_sliced(result, index=labels)
diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index 07e30d41c216d..3a0f1b2a2ec17 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -344,7 +344,7 @@ def kurt(x):
             "sum", np.sum, float_frame_with_na, skipna_alternative=np.nansum
         )
         assert_stat_op_calc("mean", np.mean, float_frame_with_na, check_dates=True)
-        assert_stat_op_calc("product", np.prod, float_frame_with_na)
+        assert_stat_op_calc("product", np.prod, float_frame_with_na, skipna_alternative=np.nanprod)
 
         assert_stat_op_calc("mad", mad, float_frame_with_na)
         assert_stat_op_calc("var", var, float_frame_with_na)
diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py
index 196df8ba00476..6cd9c0a7fde88 100644
--- a/pandas/tests/frame/test_missing.py
+++ b/pandas/tests/frame/test_missing.py
@@ -372,7 +372,7 @@ def test_fillna_categorical_nan(self):
         cat = Categorical([np.nan, 2, np.nan])
         val = Categorical([np.nan, np.nan, np.nan])
         df = DataFrame({"cats": cat, "vals": val})
-        with tm.assert_produces_warning(RuntimeWarning):
+        with tm.assert_produces_warning(None):
             res = df.fillna(df.median())
         v_exp = [np.nan, np.nan, np.nan]
         df_exp = DataFrame({"cats": [2, 2, 2], "vals": v_exp}, dtype="category")

From d85805fdb9c06e266dd29d92a22b6a940345e792 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 20 Mar 2020 10:51:30 -0700
Subject: [PATCH 2/6] REF: DF._reduce do frame_apply early

---
 pandas/tests/frame/test_analytics.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index 3a0f1b2a2ec17..c83e2dda20d0a 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -344,7 +344,9 @@ def kurt(x):
             "sum", np.sum, float_frame_with_na, skipna_alternative=np.nansum
         )
         assert_stat_op_calc("mean", np.mean, float_frame_with_na, check_dates=True)
-        assert_stat_op_calc("product", np.prod, float_frame_with_na, skipna_alternative=np.nanprod)
+        assert_stat_op_calc(
+            "product", np.prod, float_frame_with_na, skipna_alternative=np.nanprod
+        )
 
         assert_stat_op_calc("mad", mad, float_frame_with_na)
         assert_stat_op_calc("var", var, float_frame_with_na)

From f9235fca115772e2ace24cd621532e4a1a815a84 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 24 Mar 2020 13:34:08 -0700
Subject: [PATCH 3/6] move fillna call outside of context

---
 pandas/tests/frame/test_missing.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py
index 749f4080582d3..10e0b52fb6221 100644
--- a/pandas/tests/frame/test_missing.py
+++ b/pandas/tests/frame/test_missing.py
@@ -373,7 +373,8 @@ def test_fillna_categorical_nan(self):
         val = Categorical([np.nan, np.nan, np.nan])
         df = DataFrame({"cats": cat, "vals": val})
         with tm.assert_produces_warning(None):
-            res = df.fillna(df.median())
+            median = df.median()
+        res = df.fillna(median)
         v_exp = [np.nan, np.nan, np.nan]
         df_exp = DataFrame({"cats": [2, 2, 2], "vals": v_exp}, dtype="category")
         tm.assert_frame_equal(res, df_exp)

From c22dd07097cacfa7a2f4217e1a2796dc27a7c825 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 30 Mar 2020 11:08:39 -0700
Subject: [PATCH 4/6] debugging assertion

---
 pandas/tests/frame/test_missing.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py
index 10e0b52fb6221..4e4ab25031afc 100644
--- a/pandas/tests/frame/test_missing.py
+++ b/pandas/tests/frame/test_missing.py
@@ -374,6 +374,11 @@ def test_fillna_categorical_nan(self):
         df = DataFrame({"cats": cat, "vals": val})
         with tm.assert_produces_warning(None):
             median = df.median()
+
+        # GH#32950 check that we got the right expected median
+        exmed = Series({"cats": 2.0, "vals": np.nan})
+        tm.assert_series_equal(median, exmed)
+
         res = df.fillna(median)
         v_exp = [np.nan, np.nan, np.nan]
         df_exp = DataFrame({"cats": [2, 2, 2], "vals": v_exp}, dtype="category")

From 2f8fc79db508c816cc6bfdf19f5b3cfdc838880a Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 30 Mar 2020 12:09:18 -0700
Subject: [PATCH 5/6] Troubleshoot CI

---
 pandas/core/frame.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 7c143dcf4b222..cf974a2fedab4 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -77,6 +77,7 @@
     ensure_platform_int,
     infer_dtype_from_object,
     is_bool_dtype,
+    is_categorical_dtype,
     is_dataclass,
     is_datetime64_any_dtype,
     is_dict_like,
@@ -7998,8 +7999,15 @@ def blk_func(values):
         if not self._is_homogeneous_type:
             # try to avoid self.values call
 
-            # try by-column first
-            if filter_type is None and axis == 0 and len(self) > 0:
+            if self.dtypes.apply(is_categorical_dtype).any():
+                # GH#32950 Fall through to operating on self.values, since
+                #  operating column-wise will fail on Categorical.median
+                #  (TODO: only on some builds, not clear why)
+                pass
+
+            elif filter_type is None and axis == 0 and len(self) > 0:
+                # operate column-wise
+
                 # numeric_only must be None here, as other cases caught above
                 # require len(self) > 0 bc frame_apply messes up empty prod/sum
 

From 28fd5607ecf29b8317bc9d899364d4eb76e36173 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 30 Mar 2020 13:11:32 -0700
Subject: [PATCH 6/6] update test

---
 pandas/core/frame.py               | 9 +--------
 pandas/tests/frame/test_missing.py | 8 +++-----
 2 files changed, 4 insertions(+), 13 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index cf974a2fedab4..58494d2fcaa5f 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -77,7 +77,6 @@
     ensure_platform_int,
     infer_dtype_from_object,
     is_bool_dtype,
-    is_categorical_dtype,
     is_dataclass,
     is_datetime64_any_dtype,
     is_dict_like,
@@ -7999,13 +7998,7 @@ def blk_func(values):
         if not self._is_homogeneous_type:
             # try to avoid self.values call
 
-            if self.dtypes.apply(is_categorical_dtype).any():
-                # GH#32950 Fall through to operating on self.values, since
-                #  operating column-wise will fail on Categorical.median
-                #  (TODO: only on some builds, not clear why)
-                pass
-
-            elif filter_type is None and axis == 0 and len(self) > 0:
+            if filter_type is None and axis == 0 and len(self) > 0:
                 # operate column-wise
 
                 # numeric_only must be None here, as other cases caught above
diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py
index 4e4ab25031afc..7cb7115276f71 100644
--- a/pandas/tests/frame/test_missing.py
+++ b/pandas/tests/frame/test_missing.py
@@ -372,12 +372,10 @@ def test_fillna_categorical_nan(self):
         cat = Categorical([np.nan, 2, np.nan])
         val = Categorical([np.nan, np.nan, np.nan])
         df = DataFrame({"cats": cat, "vals": val})
-        with tm.assert_produces_warning(None):
-            median = df.median()
 
-        # GH#32950 check that we got the right expected median
-        exmed = Series({"cats": 2.0, "vals": np.nan})
-        tm.assert_series_equal(median, exmed)
+        # GH#32950 df.median() is poorly behaved because there is no
+        #  Categorical.median
+        median = Series({"cats": 2.0, "vals": np.nan})
 
         res = df.fillna(median)
         v_exp = [np.nan, np.nan, np.nan]