From 822ce66299ef03fb1e2d62d4795174b63d37cc71 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 6 Feb 2023 15:20:57 -0800
Subject: [PATCH 1/2] API: dont infer dtype for object-dtype groupby reductions

---
 doc/source/whatsnew/v2.0.0.rst                 |  2 ++
 pandas/core/groupby/groupby.py                 |  6 ++++--
 .../tests/groupby/aggregate/test_aggregate.py  |  2 ++
 pandas/tests/groupby/aggregate/test_other.py   |  1 +
 pandas/tests/groupby/test_function.py          |  6 ++++++
 pandas/tests/groupby/test_groupby.py           |  4 +++-
 pandas/tests/groupby/test_min_max.py           | 18 ++++++++++++------
 7 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index df30a31889a99..9afc1bae6d6d4 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -760,6 +760,8 @@ Other API changes
 - The levels of the index of the :class:`Series` returned from ``Series.sparse.from_coo`` now always have dtype ``int32``. Previously they had dtype ``int64`` (:issue:`50926`)
 - :func:`to_datetime` with ``unit`` of either "Y" or "M" will now raise if a sequence contains a non-round ``float`` value, matching the ``Timestamp`` behavior (:issue:`50301`)
 - The methods :meth:`Series.round`, :meth:`DataFrame.__invert__`, :meth:`Series.__invert__`, :meth:`DataFrame.swapaxes`, :meth:`DataFrame.first`, :meth:`DataFrame.last`, :meth:`Series.first`, :meth:`Series.last` and :meth:`DataFrame.align` will now always return new objects (:issue:`51032`)
+- :class:`DataFrameGroupBy` aggregations (e.g. "sum") with object-dtype columns no longer infer non-object dtypes for their results, explicitly call ``result.infer_objects(copy=False)`` on the result to obtain the old behavior (:issue:`??`)
+-
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_200.deprecations:
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 5f5bb1c8833da..8158729e7f779 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1482,6 +1482,9 @@ def _agg_py_fallback(
             # TODO: if we ever get "rank" working, exclude it here.
             res_values = type(values)._from_sequence(res_values, dtype=values.dtype)
 
+        elif ser.dtype == object:
+            res_values = res_values.astype(object, copy=False)
+
         # If we are DataFrameGroupBy and went through a SeriesGroupByPath
         # then we need to reshape
         # GH#32223 includes case with IntegerArray values, ndarray res_values
@@ -1524,8 +1527,7 @@ def array_func(values: ArrayLike) -> ArrayLike:
         new_mgr = data.grouped_reduce(array_func)
         res = self._wrap_agged_manager(new_mgr)
         out = self._wrap_aggregated_output(res)
-        if data.ndim == 2:
-            # TODO: don't special-case DataFrame vs Series
+        if self.axis == 1:
             out = out.infer_objects(copy=False)
         return out
 
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index e7be78be55620..210dba8bbc44c 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -258,6 +258,7 @@ def test_multiindex_groupby_mixed_cols_axis1(func, expected, dtype, result_dtype
     expected = DataFrame([expected] * 3, columns=["i", "j", "k"]).astype(
         result_dtype_dict
     )
+
     tm.assert_frame_equal(result, expected)
 
 
@@ -675,6 +676,7 @@ def test_agg_split_object_part_datetime():
             "F": [1],
         },
         index=np.array([0]),
+        dtype=object,
     )
     tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
index eb667016b1e62..aad1218190a84 100644
--- a/pandas/tests/groupby/aggregate/test_other.py
+++ b/pandas/tests/groupby/aggregate/test_other.py
@@ -517,6 +517,7 @@ def test_sum_uint64_overflow():
     expected = DataFrame(
         {1: [9223372036854775809, 9223372036854775811, 9223372036854775813]},
         index=index,
+        dtype=object,
     )
 
     expected.index.name = 0
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 1fd61e6eb268e..b51fc31eaef6d 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -1509,6 +1509,12 @@ def test_deprecate_numeric_only_series(dtype, groupby_func, request):
         "sum",
         "diff",
         "pct_change",
+        "var",
+        "mean",
+        "median",
+        "min",
+        "max",
+        "prod",
     )
 
     # Test default behavior; kernels that fail may be enabled in the future but kernels
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index d7b015fa7104a..10118bb83a4a3 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -2408,7 +2408,9 @@ def test_groupby_duplicate_columns():
     ).astype(object)
     df.columns = ["A", "B", "B"]
     result = df.groupby([0, 0, 0, 0]).min()
-    expected = DataFrame([["e", "a", 1]], index=np.array([0]), columns=["A", "B", "B"])
+    expected = DataFrame(
+        [["e", "a", 1]], index=np.array([0]), columns=["A", "B", "B"], dtype=object
+    )
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/groupby/test_min_max.py b/pandas/tests/groupby/test_min_max.py
index 2a997b3c84216..11f62c5d03c49 100644
--- a/pandas/tests/groupby/test_min_max.py
+++ b/pandas/tests/groupby/test_min_max.py
@@ -148,9 +148,13 @@ def test_aggregate_numeric_object_dtype():
         {"key": ["A", "A", "B", "B"], "col1": list("abcd"), "col2": [np.nan] * 4},
     ).astype(object)
     result = df.groupby("key").min()
-    expected = DataFrame(
-        {"key": ["A", "B"], "col1": ["a", "c"], "col2": [np.nan, np.nan]}
-    ).set_index("key")
+    expected = (
+        DataFrame(
+            {"key": ["A", "B"], "col1": ["a", "c"], "col2": [np.nan, np.nan]},
+        )
+        .set_index("key")
+        .astype(object)
+    )
     tm.assert_frame_equal(result, expected)
 
     # same but with numbers
@@ -158,9 +162,11 @@ def test_aggregate_numeric_object_dtype():
         {"key": ["A", "A", "B", "B"], "col1": list("abcd"), "col2": range(4)},
     ).astype(object)
     result = df.groupby("key").min()
-    expected = DataFrame(
-        {"key": ["A", "B"], "col1": ["a", "c"], "col2": [0, 2]}
-    ).set_index("key")
+    expected = (
+        DataFrame({"key": ["A", "B"], "col1": ["a", "c"], "col2": [0, 2]})
+        .set_index("key")
+        .astype(object)
+    )
     tm.assert_frame_equal(result, expected)
 
 

From 730e10e8bb267abd15340642f885cb001d7e0b16 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 6 Feb 2023 15:22:02 -0800
Subject: [PATCH 2/2] GH ref

---
 doc/source/whatsnew/v2.0.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index 9afc1bae6d6d4..2f1ec69d80773 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -760,7 +760,7 @@ Other API changes
 - The levels of the index of the :class:`Series` returned from ``Series.sparse.from_coo`` now always have dtype ``int32``. Previously they had dtype ``int64`` (:issue:`50926`)
 - :func:`to_datetime` with ``unit`` of either "Y" or "M" will now raise if a sequence contains a non-round ``float`` value, matching the ``Timestamp`` behavior (:issue:`50301`)
 - The methods :meth:`Series.round`, :meth:`DataFrame.__invert__`, :meth:`Series.__invert__`, :meth:`DataFrame.swapaxes`, :meth:`DataFrame.first`, :meth:`DataFrame.last`, :meth:`Series.first`, :meth:`Series.last` and :meth:`DataFrame.align` will now always return new objects (:issue:`51032`)
-- :class:`DataFrameGroupBy` aggregations (e.g. "sum") with object-dtype columns no longer infer non-object dtypes for their results, explicitly call ``result.infer_objects(copy=False)`` on the result to obtain the old behavior (:issue:`??`)
+- :class:`DataFrameGroupBy` aggregations (e.g. "sum") with object-dtype columns no longer infer non-object dtypes for their results, explicitly call ``result.infer_objects(copy=False)`` on the result to obtain the old behavior (:issue:`51205`)
 -
 
 .. ---------------------------------------------------------------------------