From 898759d10cf808e7be69bf6b30eaba395b3df4fa Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Thu, 10 Nov 2022 08:53:11 -0500
Subject: [PATCH 1/6] BUG: DataFrame reductions with object dtype and axis=1

---
 doc/source/whatsnew/v2.0.0.rst        |  2 +-
 pandas/core/frame.py                  | 83 +++++++++++----------------
 pandas/tests/frame/test_reductions.py | 29 ++++++++++
 3 files changed, 63 insertions(+), 51 deletions(-)

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index 73a75667b46da..ca1d708786f6f 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -571,7 +571,7 @@ Timezones
 Numeric
 ^^^^^^^
 - Bug in :meth:`DataFrame.add` cannot apply ufunc when inputs contain mixed DataFrame type and Series type (:issue:`39853`)
-- Bug in DataFrame reduction methods (e.g. :meth:`DataFrame.sum`) with object dtype, ``axis=1`` and ``numeric_only=False`` would not be coerced to float (:issue:`49551`)
+- Bug in DataFrame reduction methods (e.g. :meth:`DataFrame.sum`) with object dtype, ``axis=1`` and ``numeric_only=False`` would have results unnecessarily coerced to float; coercion still occurs for reductions that necessarily result in floats (``mean``, ``var``, ``std``, ``skew``) (:issue:`49603`)
 -
 
 Conversion
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 1627a7add25ed..fe51d7b24cc50 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -134,7 +134,6 @@
     is_iterator,
     is_list_like,
     is_numeric_dtype,
-    is_object_dtype,
     is_scalar,
     is_sequence,
     needs_i8_conversion,
@@ -10522,57 +10521,41 @@ def _get_data() -> DataFrame:
                 data = self._get_bool_data()
             return data
 
-        if numeric_only or axis == 0:
-            # For numeric_only non-None and axis non-None, we know
-            #  which blocks to use and no try/except is needed.
-            #  For numeric_only=None only the case with axis==0 and no object
-            #  dtypes are unambiguous can be handled with BlockManager.reduce
-            # Case with EAs see GH#35881
-            df = self
-            if numeric_only:
-                df = _get_data()
-            if axis == 1:
-                df = df.T
-                axis = 0
-
-            # After possibly _get_data and transposing, we are now in the
-            #  simple case where we can use BlockManager.reduce
-            res, _ = df._mgr.reduce(blk_func, ignore_failures=False)
-            out = df._constructor(res).iloc[0]
-            if out_dtype is not None:
-                out = out.astype(out_dtype)
-            if axis == 0 and len(self) == 0 and name in ["sum", "prod"]:
-                # Even if we are object dtype, follow numpy and return
-                #  float64, see test_apply_funcs_over_empty
-                out = out.astype(np.float64)
-
-            if numeric_only is None and out.shape[0] != df.shape[1]:
-                # columns have been dropped GH#41480
-                com.deprecate_numeric_only_default(
-                    type(self), name, deprecate_none=True
-                )
-
-            return out
-
-        assert not numeric_only and axis == 1
-
-        data = self
-        values = data.values
-        result = func(values)
+        # Case with EAs see GH#35881
+        df = self
+        if numeric_only:
+            df = _get_data()
+        if axis == 1:
+            if len(df.index) == 0:
+                # Taking a transpose would result in no columns, losing the dtype.
+                # In the empty case, reducing along axis 0 or 1 gives the same
+                # result dtype, so reduce with axis=0 and ignore values
+                result = df._reduce(
+                    op,
+                    name,
+                    axis=0,
+                    skipna=skipna,
+                    numeric_only=False,
+                    filter_type=filter_type,
+                    **kwds,
+                ).iloc[:0]
+                result.index = df.index
+                return result
+            df = df.T
+            axis = 0
 
-        if hasattr(result, "dtype"):
-            if filter_type == "bool" and notna(result).all():
-                result = result.astype(np.bool_)
-            elif filter_type is None and is_object_dtype(result.dtype):
-                try:
-                    result = result.astype(np.float64)
-                except (ValueError, TypeError):
-                    # try to coerce to the original dtypes item by item if we can
-                    pass
+        # After possibly _get_data and transposing, we are now in the
+        #  simple case where we can use BlockManager.reduce
+        res, _ = df._mgr.reduce(blk_func, ignore_failures=False)
+        out = df._constructor(res).iloc[0]
+        if out_dtype is not None:
+            out = out.astype(out_dtype)
+        if axis == 0 and len(self) == 0 and name in ["sum", "prod"]:
+            # Even if we are object dtype, follow numpy and return
+            #  float64, see test_apply_funcs_over_empty
+            out = out.astype(np.float64)
 
-        labels = self._get_agg_axis(axis)
-        result = self._constructor_sliced(result, index=labels)
-        return result
+        return out
 
     def _reduce_axis1(self, name: str, func, skipna: bool) -> Series:
         """
diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index 0e5c6057b9a61..4262fde36c373 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -331,6 +331,8 @@ def test_stat_operators_attempt_obj_array(self, method, df):
         assert df.values.dtype == np.object_
         result = getattr(df, method)(1)
         expected = getattr(df.astype("f8"), method)(1)
+        if method in ("sum", "prod", "min", "max"):
+            expected = expected.astype(object)
         tm.assert_series_equal(result, expected)
 
     @pytest.mark.parametrize("op", ["mean", "std", "var", "skew", "kurt", "sem"])
@@ -718,6 +720,33 @@ def test_sum_corner(self):
         assert len(axis0) == 0
         assert len(axis1) == 0
 
+    @pytest.mark.parametrize(
+        "index",
+        [
+            tm.makeRangeIndex(0),
+            tm.makeDateIndex(0),
+            tm.makeNumericIndex(0, dtype=int),
+            tm.makeNumericIndex(0, dtype=float),
+            tm.makeDateIndex(0, freq="M"),
+            tm.makePeriodIndex(0),
+        ],
+    )
+    def test_axis_1_empty(self, reduction_functions, index):
+        df = DataFrame(columns=["a"], index=index)
+        result = getattr(df, reduction_functions)(axis=1)
+        expected_dtype = {
+            "any": "bool",
+            "all": "bool",
+            "count": "int",
+            "sum": "float",
+            "prod": "float",
+            "skew": "float",
+            "kurt": "float",
+            "sem": "float",
+        }.get(reduction_functions, "object")
+        expected = Series([], index=index, dtype=expected_dtype)
+        tm.assert_series_equal(result, expected)
+
     @pytest.mark.parametrize("method, unit", [("sum", 0), ("prod", 1)])
     @pytest.mark.parametrize("numeric_only", [None, True, False])
     def test_sum_prod_nanops(self, method, unit, numeric_only):

From cd696aefd2e85653d2c2afb8e70a9f6ea36569de Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sat, 12 Nov 2022 08:00:13 -0500
Subject: [PATCH 2/6] Use intp

---
 pandas/tests/frame/test_reductions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index 4262fde36c373..df536e409f510 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -737,7 +737,7 @@ def test_axis_1_empty(self, reduction_functions, index):
         expected_dtype = {
             "any": "bool",
             "all": "bool",
-            "count": "int",
+            "count": "intp",
             "sum": "float",
             "prod": "float",
             "skew": "float",

From 9de5b27387e39bbfc245dcaaf9e0c58550ca73cb Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Wed, 23 Nov 2022 07:40:56 -0500
Subject: [PATCH 3/6] Merge cleanup

---
 pandas/core/frame.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index f20fe62258597..272f7c3cdc156 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -132,6 +132,7 @@
     is_integer_dtype,
     is_iterator,
     is_list_like,
+    is_object_dtype,
     is_scalar,
     is_sequence,
     needs_i8_conversion,

From a37377557abcdaac07ebbc5d0927fc06d9eab41c Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Fri, 2 Dec 2022 11:17:54 -0500
Subject: [PATCH 4/6] Update fixture name

---
 pandas/tests/frame/test_reductions.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index 27349e0bc74e9..64a08b4e5bac4 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -713,9 +713,9 @@ def test_sum_corner(self):
             tm.makePeriodIndex(0),
         ],
     )
-    def test_axis_1_empty(self, reduction_functions, index):
+    def test_axis_1_empty(self, all_reductions, index):
         df = DataFrame(columns=["a"], index=index)
-        result = getattr(df, reduction_functions)(axis=1)
+        result = getattr(df, all_reductions)(axis=1)
         expected_dtype = {
             "any": "bool",
             "all": "bool",
@@ -725,7 +725,7 @@ def test_axis_1_empty(self, reduction_functions, index):
             "skew": "float",
             "kurt": "float",
             "sem": "float",
-        }.get(reduction_functions, "object")
+        }.get(all_reductions, "object")
         expected = Series([], index=index, dtype=expected_dtype)
         tm.assert_series_equal(result, expected)
 

From 10568da5945c3caad5a63beef44496912d648744 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Tue, 6 Dec 2022 16:22:12 -0500
Subject: [PATCH 5/6] WIP

---
 pandas/core/frame.py                  |  2 +-
 pandas/core/nanops.py                 | 12 +++++++++---
 pandas/tests/frame/test_reductions.py | 25 +++++++++++++++++++++----
 3 files changed, 31 insertions(+), 8 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 2631150882284..ba3b6e301fd92 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -10547,7 +10547,7 @@ def _get_data() -> DataFrame:
         out = df._constructor(res).iloc[0]
         if out_dtype is not None:
             out = out.astype(out_dtype)
-        if axis == 0 and len(self) == 0 and name in ["sum", "prod"]:
+        if len(self) == 0 and name in ["sum", "prod"]:
             # Even if we are object dtype, follow numpy and return
             #  float64, see test_apply_funcs_over_empty
             out = out.astype(np.float64)
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index b8e2b1fafe326..744cb1eae00d3 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -622,7 +622,7 @@ def nansum(
     3.0
     """
     values, mask, dtype, dtype_max, _ = _get_values(
-        values, skipna, fill_value=0, mask=mask
+        values, skipna, fill_value=0.0, mask=mask
     )
     dtype_sum = dtype_max
     if is_float_dtype(dtype):
@@ -1389,7 +1389,7 @@ def nanprod(
 
     if skipna and mask is not None:
         values = values.copy()
-        values[mask] = 1
+        values[mask] = 1.0
     result = values.prod(axis)
     # error: Incompatible return value type (got "Union[ndarray, float]", expected
     # "float")
@@ -1500,7 +1500,13 @@ def _maybe_null_out(
                 result[null_mask] = None
     elif result is not NaT:
         if check_below_min_count(shape, mask, min_count):
-            result = np.nan
+            result_dtype = getattr(result, "dtype", None)
+            if is_float_dtype(result_dtype):
+                # Preserve dtype when possible
+
+                result = np.array([np.nan], dtype=f"f{result_dtype.itemsize}")[0]
+            else:
+                result = np.nan
 
     return result
 
diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index 64a08b4e5bac4..ce0eccd0fdec2 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -323,12 +323,12 @@ def wrapper(x):
             DataFrame({0: [np.nan, 2], 1: [np.nan, 3], 2: [np.nan, 4]}, dtype=object),
         ],
     )
-    def test_stat_operators_attempt_obj_array(self, method, df):
+    def test_stat_operators_attempt_obj_array(self, method, df, using_array_manager):
         # GH#676
         assert df.values.dtype == np.object_
         result = getattr(df, method)(1)
         expected = getattr(df.astype("f8"), method)(1)
-        if method in ("sum", "prod", "min", "max"):
+        if not using_array_manager and method in ("sum", "prod", "min", "max"):
             expected = expected.astype(object)
         tm.assert_series_equal(result, expected)
 
@@ -713,7 +713,7 @@ def test_sum_corner(self):
             tm.makePeriodIndex(0),
         ],
     )
-    def test_axis_1_empty(self, all_reductions, index):
+    def test_axis_1_empty(self, all_reductions, index, using_array_manager):
         df = DataFrame(columns=["a"], index=index)
         result = getattr(df, all_reductions)(axis=1)
         expected_dtype = {
@@ -726,6 +726,15 @@ def test_axis_1_empty(self, all_reductions, index):
             "kurt": "float",
             "sem": "float",
         }.get(all_reductions, "object")
+        if using_array_manager and all_reductions in (
+            "max",
+            "min",
+            "mean",
+            "std",
+            "var",
+            "median",
+        ):
+            expected_dtype = "float"
         expected = Series([], index=index, dtype=expected_dtype)
         tm.assert_series_equal(result, expected)
 
@@ -1368,7 +1377,9 @@ def test_min_max_dt64_with_NaT(self):
         exp = Series([pd.NaT], index=["foo"])
         tm.assert_series_equal(res, exp)
 
-    def test_min_max_dt64_with_NaT_skipna_false(self, request, tz_naive_fixture):
+    def test_min_max_dt64_with_NaT_skipna_false(
+        self, request, tz_naive_fixture, using_array_manager
+    ):
         # GH#36907
         tz = tz_naive_fixture
         if isinstance(tz, tzlocal) and is_platform_windows():
@@ -1390,12 +1401,18 @@ def test_min_max_dt64_with_NaT_skipna_false(self, request, tz_naive_fixture):
         expected = Series([df.loc[0, "a"], pd.NaT])
         assert expected.dtype == df["a"].dtype
 
+        if using_array_manager and tz is not None:
+            expected = expected.astype(object)
+
         tm.assert_series_equal(res, expected)
 
         res = df.max(axis=1, skipna=False)
         expected = Series([df.loc[0, "b"], pd.NaT])
         assert expected.dtype == df["a"].dtype
 
+        if using_array_manager and tz is not None:
+            expected = expected.astype(object)
+
         tm.assert_series_equal(res, expected)
 
     def test_min_max_dt64_api_consistency_with_NaT(self):

From d0307d9c1135284bb0e6d9689514102842cdd534 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sat, 10 Dec 2022 13:42:48 -0500
Subject: [PATCH 6/6] Fixups

---
 pandas/core/frame.py                   |  5 -----
 pandas/core/internals/array_manager.py |  3 ++-
 pandas/tests/frame/test_reductions.py  | 31 ++++++++++++++------------
 3 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index dafe09c487d26..5154a777789c6 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -10492,10 +10492,6 @@ def _reduce(
         axis = self._get_axis_number(axis)
         assert axis in [0, 1]
 
-        def func(values: np.ndarray):
-            # We only use this in the case that operates on self.values
-            return op(values, axis=axis, skipna=skipna, **kwds)
-
         def blk_func(values, axis: Axis = 1):
             if isinstance(values, ExtensionArray):
                 if not is_1d_only_ea_dtype(values.dtype) and not isinstance(
@@ -10536,7 +10532,6 @@ def _get_data() -> DataFrame:
                 result.index = df.index
                 return result
             df = df.T
-            axis = 0
 
         # After possibly _get_data and transposing, we are now in the
         #  simple case where we can use BlockManager.reduce
diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index 2fffd8ebe2f72..6ef4fe2b5267d 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -986,8 +986,9 @@ def reduce(self: T, func: Callable) -> T:
             else:
                 # error: Argument 1 to "append" of "list" has incompatible type
                 # "ExtensionArray"; expected "ndarray"
+                dtype = arr.dtype if res is NaT else None
                 result_arrays.append(
-                    sanitize_array([res], None)  # type: ignore[arg-type]
+                    sanitize_array([res], None, dtype=dtype)  # type: ignore[arg-type]
                 )
 
         index = Index._simple_new(np.array([None], dtype=object))  # placeholder
diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index e1efd9f90cf69..5b96b4cd32864 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -323,13 +323,26 @@ def wrapper(x):
             DataFrame({0: [np.nan, 2], 1: [np.nan, 3], 2: [np.nan, 4]}, dtype=object),
         ],
     )
-    def test_stat_operators_attempt_obj_array(self, method, df, using_array_manager):
+    def test_stat_operators_attempt_obj_array(
+        self, method, df, using_array_manager, axis
+    ):
         # GH#676
         assert df.values.dtype == np.object_
-        result = getattr(df, method)(1)
-        expected = getattr(df.astype("f8"), method)(1)
+        result = getattr(df, method)(axis=axis)
+        expected = getattr(df.astype("f8"), method)(axis=axis)
+        # With values an np.array with dtype object:
+        #   - When using blocks, `values.sum(axis=1, ...)` returns a np.array of dim 1
+        #     and this remains object dtype
+        #   - When using arrays, `values.sum(axis=0, ...)` returns a Python float
         if not using_array_manager and method in ("sum", "prod", "min", "max"):
             expected = expected.astype(object)
+        elif (
+            using_array_manager
+            and axis in (0, "index")
+            and method in ("min", "max")
+            and 0 in df.columns
+        ):
+            expected = expected.astype(int)
         tm.assert_series_equal(result, expected)
 
     @pytest.mark.parametrize("op", ["mean", "std", "var", "skew", "kurt", "sem"])
@@ -1377,9 +1390,7 @@ def test_min_max_dt64_with_NaT(self):
         exp = Series([pd.NaT], index=["foo"])
         tm.assert_series_equal(res, exp)
 
-    def test_min_max_dt64_with_NaT_skipna_false(
-        self, request, tz_naive_fixture, using_array_manager
-    ):
+    def test_min_max_dt64_with_NaT_skipna_false(self, request, tz_naive_fixture):
         # GH#36907
         tz = tz_naive_fixture
         if isinstance(tz, tzlocal) and is_platform_windows():
@@ -1400,19 +1411,11 @@ def test_min_max_dt64_with_NaT_skipna_false(
         res = df.min(axis=1, skipna=False)
         expected = Series([df.loc[0, "a"], pd.NaT])
         assert expected.dtype == df["a"].dtype
-
-        if using_array_manager and tz is not None:
-            expected = expected.astype(object)
-
         tm.assert_series_equal(res, expected)
 
         res = df.max(axis=1, skipna=False)
         expected = Series([df.loc[0, "b"], pd.NaT])
         assert expected.dtype == df["a"].dtype
-
-        if using_array_manager and tz is not None:
-            expected = expected.astype(object)
-
         tm.assert_series_equal(res, expected)
 
     def test_min_max_dt64_api_consistency_with_NaT(self):