From 34b36fb8b8aeba8a100b5df2d7592fc14b827d65 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Tue, 6 Aug 2024 14:24:00 -0700
Subject: [PATCH 01/12] TST: fix groupby xfails with using_infer_string

---
 pandas/tests/groupby/test_raises.py | 60 ++++++++++++++++++++++++++---
 1 file changed, 54 insertions(+), 6 deletions(-)

diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py
index f28967fa81ddb..7c469fd57f567 100644
--- a/pandas/tests/groupby/test_raises.py
+++ b/pandas/tests/groupby/test_raises.py
@@ -8,8 +8,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas import (
     Categorical,
     DataFrame,
@@ -106,10 +104,9 @@ def _call_and_check(klass, msg, how, gb, groupby_func, args, warn_msg=""):
                     gb.transform(groupby_func, *args)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize("how", ["method", "agg", "transform"])
 def test_groupby_raises_string(
-    how, by, groupby_series, groupby_func, df_with_string_col
+    how, by, groupby_series, groupby_func, df_with_string_col, using_infer_string
 ):
     df = df_with_string_col
     args = get_groupby_method_args(groupby_func, df)
@@ -183,6 +180,44 @@ def test_groupby_raises_string(
         ),
     }[groupby_func]
 
+    if using_infer_string:
+        if klass is not None:
+            if re.escape("agg function failed") in msg:
+                msg = msg.replace("object", "string")
+            elif groupby_func in [
+                "cumsum",
+                "cumprod",
+                "cummin",
+                "cummax",
+                "std",
+                "sem",
+                "skew",
+            ]:
+                msg = msg.replace("object", "string")
+            elif groupby_func == "quantile":
+                msg = "No matching signature found"
+            elif groupby_func == "corrwith":
+                msg = (
+                    "'ArrowStringArrayNumpySemantics' with dtype string does "
+                    "not support operation 'mean'"
+                )
+            else:
+                import pyarrow as pa
+
+                klass = pa.lib.ArrowNotImplementedError
+                if groupby_func == "pct_change":
+                    msg = "Function 'divide' has no kernel matching input types"
+                elif groupby_func == "diff":
+                    msg = (
+                        "Function 'subtract_checked' has no kernel matching "
+                        "input types"
+                    )
+                else:
+                    msg = (
+                        f"Function '{groupby_func}' has no kernel matching "
+                        "input types"
+                    )
+
     if groupby_func == "fillna":
         kind = "Series" if groupby_series else "DataFrame"
         warn_msg = f"{kind}GroupBy.fillna is deprecated"
@@ -208,11 +243,15 @@ def func(x):
         getattr(gb, how)(func)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
 @pytest.mark.parametrize("how", ["agg", "transform"])
 @pytest.mark.parametrize("groupby_func_np", [np.sum, np.mean])
 def test_groupby_raises_string_np(
-    how, by, groupby_series, groupby_func_np, df_with_string_col
+    how,
+    by,
+    groupby_series,
+    groupby_func_np,
+    df_with_string_col,
+    using_infer_string,
 ):
     # GH#50749
     df = df_with_string_col
@@ -228,6 +267,15 @@ def test_groupby_raises_string_np(
             "Could not convert string .* to numeric",
         ),
     }[groupby_func_np]
+
+    if using_infer_string:
+        # TODO: should ArrowStringArrayNumpySemantics support sum?
+        klass = TypeError
+        msg = (
+            "'ArrowStringArrayNumpySemantics' with dtype string does not "
+            f"support operation '{groupby_func_np.__name__}'"
+        )
+
     _call_and_check(klass, msg, how, gb, groupby_func_np, ())
 
 

From 91278297313b496535ed9dee8871eab1318d9691 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Tue, 13 Aug 2024 14:20:55 -0700
Subject: [PATCH 02/12] TST: update _groupby_op to raise

---
 pandas/core/arrays/arrow/array.py    | 13 +++++
 pandas/core/arrays/base.py           | 15 ++++++
 pandas/core/groupby/groupby.py       |  4 ++
 pandas/tests/groupby/test_groupby.py | 14 +++--
 pandas/tests/groupby/test_raises.py  | 79 +++++++++++++++-------------
 5 files changed, 84 insertions(+), 41 deletions(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index a374afcacc45a..27034ce63ff53 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -2270,6 +2270,19 @@ def _groupby_op(
         **kwargs,
     ):
         if isinstance(self.dtype, StringDtype):
+            if how in [
+                "sum",
+                "prod",
+                "mean",
+                "median",
+                "cumsum",
+                "cumprod",
+                "std",
+                "sem",
+                "var",
+                "skew",
+            ]:
+                raise TypeError(f"{self.dtype} dtype does not support {how} operations")
             return super()._groupby_op(
                 how=how,
                 has_dropped_na=has_dropped_na,
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 2124f86b03b9c..6259de397b170 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -2535,6 +2535,21 @@ def _groupby_op(
         # GH#43682
         if isinstance(self.dtype, StringDtype):
             # StringArray
+            if op.how in [
+                "sum",
+                "prod",
+                "mean",
+                "median",
+                "cumsum",
+                "cumprod",
+                "std",
+                "sem",
+                "var",
+                "skew",
+            ]:
+                raise TypeError(
+                    f"{self.dtype} dtype does not support {op.how} operations"
+                )
             if op.how not in ["any", "all"]:
                 # Fail early to avoid conversion to object
                 op._get_cython_function(op.kind, op.how, np.dtype(object), False)
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 8c9c92594ebe7..8f45e110594b1 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -4287,6 +4287,10 @@ def pre_processor(vals: ArrayLike) -> tuple[np.ndarray, DtypeObj | None]:
                 raise TypeError(
                     "'quantile' cannot be performed against 'object' dtypes!"
                 )
+            elif isinstance(vals.dtype, StringDtype):
+                raise TypeError(
+                    f"{vals.dtype} dtype does not support quantile operations"
+                )
 
             inference: DtypeObj | None = None
             if isinstance(vals, BaseMaskedArray) and is_numeric_dtype(vals.dtype):
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 11b874d0b1608..c5158c56622a7 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -702,15 +702,20 @@ def test_keep_nuisance_agg(df, agg_function):
     ["sum", "mean", "prod", "std", "var", "sem", "median"],
 )
 @pytest.mark.parametrize("numeric_only", [True, False])
-def test_omit_nuisance_agg(df, agg_function, numeric_only):
+def test_omit_nuisance_agg(df, agg_function, numeric_only, using_infer_string):
     # GH 38774, GH 38815
     grouped = df.groupby("A")
 
     no_drop_nuisance = ("var", "std", "sem", "mean", "prod", "median")
+    if using_infer_string:
+        no_drop_nuisance += ("sum",)
     if agg_function in no_drop_nuisance and not numeric_only:
         # Added numeric_only as part of GH#46560; these do not drop nuisance
         # columns when numeric_only is False
-        if agg_function in ("std", "sem"):
+        if using_infer_string:
+            msg = f"str dtype does not support {agg_function} operations"
+            klass = TypeError
+        elif agg_function in ("std", "sem"):
             klass = ValueError
             msg = "could not convert string to float: 'one'"
         else:
@@ -1772,6 +1777,7 @@ def get_categorical_invalid_expected():
     is_per = isinstance(df.dtypes.iloc[0], pd.PeriodDtype)
     is_dt64 = df.dtypes.iloc[0].kind == "M"
     is_cat = isinstance(values, Categorical)
+    is_str = isinstance(df.dtypes.iloc[0], pd.StringDtype)
 
     if (
         isinstance(values, Categorical)
@@ -1796,13 +1802,15 @@ def get_categorical_invalid_expected():
 
     if op in ["prod", "sum", "skew"]:
         # ops that require more than just ordered-ness
-        if is_dt64 or is_cat or is_per:
+        if is_dt64 or is_cat or is_per or is_str:
             # GH#41291
             # datetime64 -> prod and sum are invalid
             if is_dt64:
                 msg = "datetime64 type does not support"
             elif is_per:
                 msg = "Period type does not support"
+            elif is_str:
+                msg = "str dtype does not support"
             else:
                 msg = "category type does not support"
             if op == "skew":
diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py
index 7c469fd57f567..6925e0158fbea 100644
--- a/pandas/tests/groupby/test_raises.py
+++ b/pandas/tests/groupby/test_raises.py
@@ -181,42 +181,46 @@ def test_groupby_raises_string(
     }[groupby_func]
 
     if using_infer_string:
-        if klass is not None:
-            if re.escape("agg function failed") in msg:
-                msg = msg.replace("object", "string")
-            elif groupby_func in [
-                "cumsum",
-                "cumprod",
-                "cummin",
-                "cummax",
-                "std",
-                "sem",
-                "skew",
-            ]:
-                msg = msg.replace("object", "string")
-            elif groupby_func == "quantile":
-                msg = "No matching signature found"
-            elif groupby_func == "corrwith":
-                msg = (
-                    "'ArrowStringArrayNumpySemantics' with dtype string does "
-                    "not support operation 'mean'"
-                )
-            else:
-                import pyarrow as pa
-
-                klass = pa.lib.ArrowNotImplementedError
-                if groupby_func == "pct_change":
-                    msg = "Function 'divide' has no kernel matching input types"
-                elif groupby_func == "diff":
-                    msg = (
-                        "Function 'subtract_checked' has no kernel matching "
-                        "input types"
-                    )
-                else:
-                    msg = (
-                        f"Function '{groupby_func}' has no kernel matching "
-                        "input types"
-                    )
+        if groupby_func in [
+            "sum",
+            "prod",
+            "mean",
+            "median",
+            "cumsum",
+            "cumprod",
+            "std",
+            "sem",
+            "var",
+            "skew",
+            "quantile",
+        ]:
+            msg = f"str dtype does not support {groupby_func} operations"
+            if groupby_func == "sum":
+                # The object-dtype allows this, StringArray variants do not.
+                klass = TypeError
+            elif groupby_func in ["sem", "std", "skew"]:
+                # The object-dtype raises ValueError when trying to convert to numeric.
+                klass = TypeError
+        elif groupby_func == "pct_change" and df["d"].dtype.storage == "pyarrow":
+            # This doesn't go through EA._groupby_op so the message isn't controlled
+            #  there.
+            import pyarrow as pa
+
+            klass = pa.lib.ArrowNotImplementedError
+            msg = "Function 'divide' has no kernel matching input types"
+        elif groupby_func == "diff" and df["d"].dtype.storage == "pyarrow":
+            # This doesn't go through EA._groupby_op so the message isn't controlled
+            #  there.
+            import pyarrow as pa
+
+            klass = pa.lib.ArrowNotImplementedError
+            msg = "Function 'subtract_checked' has no kernel matching input types"
+        elif groupby_func in ["cummin", "cummax"]:
+            msg = msg.replace("object", "str")
+        elif groupby_func == "corrwith":
+            msg = (
+                "'.*NumpySemantics' with dtype str does " "not support operation 'mean'"
+            )
 
     if groupby_func == "fillna":
         kind = "Series" if groupby_series else "DataFrame"
@@ -269,10 +273,9 @@ def test_groupby_raises_string_np(
     }[groupby_func_np]
 
     if using_infer_string:
-        # TODO: should ArrowStringArrayNumpySemantics support sum?
         klass = TypeError
         msg = (
-            "'ArrowStringArrayNumpySemantics' with dtype string does not "
+            "'.*StringArrayNumpySemantics' with dtype str does not "
             f"support operation '{groupby_func_np.__name__}'"
         )
 

From e7ae735e65b3ee55ee41cbc75ea3133a49364a8f Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 14 Aug 2024 14:12:58 -0700
Subject: [PATCH 03/12] update tests

---
 pandas/tests/frame/test_stack_unstack.py      |  4 ++-
 pandas/tests/groupby/test_groupby.py          | 18 +++++++++---
 pandas/tests/groupby/test_groupby_subclass.py |  2 +-
 pandas/tests/groupby/test_numeric_only.py     |  4 +--
 pandas/tests/groupby/test_raises.py           | 11 ++++---
 pandas/tests/resample/test_resample_api.py    | 29 +++++++++++++++++--
 pandas/tests/reshape/merge/test_join.py       |  4 ++-
 pandas/tests/reshape/test_pivot.py            | 10 +++++--
 8 files changed, 63 insertions(+), 19 deletions(-)

diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py
index b4f02b6f81b6f..c9ddb7bf60085 100644
--- a/pandas/tests/frame/test_stack_unstack.py
+++ b/pandas/tests/frame/test_stack_unstack.py
@@ -2113,7 +2113,7 @@ def test_unstack_period_frame(self):
     @pytest.mark.filterwarnings(
         "ignore:The previous implementation of stack is deprecated"
     )
-    def test_stack_multiple_bug(self, future_stack):
+    def test_stack_multiple_bug(self, future_stack, using_infer_string):
         # bug when some uniques are not present in the data GH#3170
         id_col = ([1] * 3) + ([2] * 3)
         name = (["a"] * 3) + (["b"] * 3)
@@ -2125,6 +2125,8 @@ def test_stack_multiple_bug(self, future_stack):
         multi.columns.name = "Params"
         unst = multi.unstack("ID")
         msg = re.escape("agg function failed [how->mean,dtype->")
+        if using_infer_string:
+            msg = "str dtype does not support mean operations"
         with pytest.raises(TypeError, match=msg):
             unst.resample("W-THU").mean()
         down = unst.resample("W-THU").mean(numeric_only=True)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index c5158c56622a7..74582267a6475 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -428,7 +428,7 @@ def test_frame_multi_key_function_list():
     tm.assert_frame_equal(agged, expected)
 
 
-def test_frame_multi_key_function_list_partial_failure():
+def test_frame_multi_key_function_list_partial_failure(using_infer_string):
     data = DataFrame(
         {
             "A": [
@@ -479,6 +479,8 @@ def test_frame_multi_key_function_list_partial_failure():
     grouped = data.groupby(["A", "B"])
     funcs = ["mean", "std"]
     msg = re.escape("agg function failed [how->mean,dtype->")
+    if using_infer_string:
+        msg = "str dtype does not support mean operations"
     with pytest.raises(TypeError, match=msg):
         grouped.agg(funcs)
 
@@ -665,9 +667,11 @@ def test_groupby_multi_corner(df):
     tm.assert_frame_equal(agged, expected)
 
 
-def test_raises_on_nuisance(df):
+def test_raises_on_nuisance(df, using_infer_string):
     grouped = df.groupby("A")
     msg = re.escape("agg function failed [how->mean,dtype->")
+    if using_infer_string:
+        msg = "str dtype does not support mean operations"
     with pytest.raises(TypeError, match=msg):
         grouped.agg("mean")
     with pytest.raises(TypeError, match=msg):
@@ -743,9 +747,11 @@ def test_raise_on_nuisance_python_single(df):
         grouped.skew()
 
 
-def test_raise_on_nuisance_python_multiple(three_group):
+def test_raise_on_nuisance_python_multiple(three_group, using_infer_string):
     grouped = three_group.groupby(["A", "B"])
     msg = re.escape("agg function failed [how->mean,dtype->")
+    if using_infer_string:
+        msg = "str dtype does not support mean operations"
     with pytest.raises(TypeError, match=msg):
         grouped.agg("mean")
     with pytest.raises(TypeError, match=msg):
@@ -783,12 +789,16 @@ def test_nonsense_func():
         df.groupby(lambda x: x + "foo")
 
 
-def test_wrap_aggregated_output_multindex(multiindex_dataframe_random_data):
+def test_wrap_aggregated_output_multindex(
+    multiindex_dataframe_random_data, using_infer_string
+):
     df = multiindex_dataframe_random_data.T
     df["baz", "two"] = "peekaboo"
 
     keys = [np.array([0, 0, 1]), np.array([0, 0, 1])]
     msg = re.escape("agg function failed [how->mean,dtype->")
+    if using_infer_string:
+        msg = "str dtype does not support mean operations"
     with pytest.raises(TypeError, match=msg):
         df.groupby(keys).agg("mean")
     agged = df.drop(columns=("baz", "two")).groupby(keys).agg("mean")
diff --git a/pandas/tests/groupby/test_groupby_subclass.py b/pandas/tests/groupby/test_groupby_subclass.py
index 0832b67b38098..a1f4627475bab 100644
--- a/pandas/tests/groupby/test_groupby_subclass.py
+++ b/pandas/tests/groupby/test_groupby_subclass.py
@@ -109,7 +109,7 @@ def test_groupby_resample_preserves_subclass(obj):
 
     df = obj(
         {
-            "Buyer": "Carl Carl Carl Carl Joe Carl".split(),
+            "Buyer": Series("Carl Carl Carl Carl Joe Carl".split(), dtype=object),
             "Quantity": [18, 3, 5, 1, 9, 3],
             "Date": [
                 datetime(2013, 9, 1, 13, 0),
diff --git a/pandas/tests/groupby/test_numeric_only.py b/pandas/tests/groupby/test_numeric_only.py
index 41e00f8121b14..8a09625dba79f 100644
--- a/pandas/tests/groupby/test_numeric_only.py
+++ b/pandas/tests/groupby/test_numeric_only.py
@@ -180,7 +180,7 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
                     "category type does not support sum operations",
                     re.escape(f"agg function failed [how->{method},dtype->object]"),
                     re.escape(f"agg function failed [how->{method},dtype->string]"),
-                    re.escape(f"agg function failed [how->{method},dtype->str]"),
+                    f"str dtype does not support {method} operations",
                 ]
             )
             with pytest.raises(exception, match=msg):
@@ -198,7 +198,7 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
                     f"Cannot perform {method} with non-ordered Categorical",
                     re.escape(f"agg function failed [how->{method},dtype->object]"),
                     re.escape(f"agg function failed [how->{method},dtype->string]"),
-                    re.escape(f"agg function failed [how->{method},dtype->str]"),
+                    f"str dtype does not support {method} operations",
                 ]
             )
             with pytest.raises(exception, match=msg):
diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py
index 6925e0158fbea..acc9729d235b0 100644
--- a/pandas/tests/groupby/test_raises.py
+++ b/pandas/tests/groupby/test_raises.py
@@ -274,10 +274,13 @@ def test_groupby_raises_string_np(
 
     if using_infer_string:
         klass = TypeError
-        msg = (
-            "'.*StringArrayNumpySemantics' with dtype str does not "
-            f"support operation '{groupby_func_np.__name__}'"
-        )
+        if df["d"].dtype.storage == "python":
+            msg = "Cannot perform reduction 'mean' with string dtype"
+        else:
+            msg = (
+                "'ArrowStringArrayNumpySemantics' with dtype str does not "
+                f"support operation '{groupby_func_np.__name__}'"
+            )
 
     _call_and_check(klass, msg, how, gb, groupby_func_np, ())
 
diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py
index a8fb1b392322d..18db05f554140 100644
--- a/pandas/tests/resample/test_resample_api.py
+++ b/pandas/tests/resample/test_resample_api.py
@@ -187,7 +187,7 @@ def test_api_compat_before_use(attr):
     getattr(rs, attr)
 
 
-def tests_raises_on_nuisance(test_frame):
+def tests_raises_on_nuisance(test_frame, using_infer_string):
     df = test_frame
     df["D"] = "foo"
     r = df.resample("h")
@@ -197,6 +197,8 @@ def tests_raises_on_nuisance(test_frame):
 
     expected = r[["A", "B", "C"]].mean()
     msg = re.escape("agg function failed [how->mean,dtype->")
+    if using_infer_string:
+        msg = "str dtype does not support mean operations"
     with pytest.raises(TypeError, match=msg):
         r.mean()
     result = r.mean(numeric_only=True)
@@ -881,7 +883,9 @@ def test_end_and_end_day_origin(
         ("sem", lib.no_default, "could not convert string to float"),
     ],
 )
-def test_frame_downsample_method(method, numeric_only, expected_data):
+def test_frame_downsample_method(
+    method, numeric_only, expected_data, using_infer_string
+):
     # GH#46442 test if `numeric_only` behave as expected for DataFrameGroupBy
 
     index = date_range("2018-01-01", periods=2, freq="D")
@@ -898,11 +902,21 @@ def test_frame_downsample_method(method, numeric_only, expected_data):
         if method in ("var", "mean", "median", "prod"):
             klass = TypeError
             msg = re.escape(f"agg function failed [how->{method},dtype->")
+            if using_infer_string:
+                msg = f"str dtype does not support {method} operations"
+        elif method in ["sum", "std", "sem"] and using_infer_string:
+            klass = TypeError
+            msg = f"str dtype does not support {method} operations"
         else:
             klass = ValueError
             msg = expected_data
         with pytest.raises(klass, match=msg):
             _ = func(**kwargs)
+    elif method == "sum" and using_infer_string and numeric_only is not True:
+        klass = TypeError
+        msg = "str dtype does not support sum operations"
+        with pytest.raises(klass, match=msg):
+            _ = func(**kwargs)
     else:
         result = func(**kwargs)
         expected = DataFrame(expected_data, index=expected_index)
@@ -932,7 +946,9 @@ def test_frame_downsample_method(method, numeric_only, expected_data):
         ("last", lib.no_default, ["cat_2"]),
     ],
 )
-def test_series_downsample_method(method, numeric_only, expected_data):
+def test_series_downsample_method(
+    method, numeric_only, expected_data, using_infer_string
+):
     # GH#46442 test if `numeric_only` behave as expected for SeriesGroupBy
 
     index = date_range("2018-01-01", periods=2, freq="D")
@@ -948,8 +964,15 @@ def test_series_downsample_method(method, numeric_only, expected_data):
             func(**kwargs)
     elif method == "prod":
         msg = re.escape("agg function failed [how->prod,dtype->")
+        if using_infer_string:
+            msg = "str dtype does not support prod operations"
+        with pytest.raises(TypeError, match=msg):
+            func(**kwargs)
+    elif method == "sum" and using_infer_string and numeric_only is not True:
+        msg = "str dtype does not support sum operations"
         with pytest.raises(TypeError, match=msg):
             func(**kwargs)
+
     else:
         result = func(**kwargs)
         expected = Series(expected_data, index=expected_index)
diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py
index 0f743332acbbe..8e3da5b3bee38 100644
--- a/pandas/tests/reshape/merge/test_join.py
+++ b/pandas/tests/reshape/merge/test_join.py
@@ -620,7 +620,7 @@ def test_join_non_unique_period_index(self):
         )
         tm.assert_frame_equal(result, expected)
 
-    def test_mixed_type_join_with_suffix(self):
+    def test_mixed_type_join_with_suffix(self, using_infer_string):
         # GH #916
         df = DataFrame(
             np.random.default_rng(2).standard_normal((20, 6)),
@@ -631,6 +631,8 @@ def test_mixed_type_join_with_suffix(self):
 
         grouped = df.groupby("id")
         msg = re.escape("agg function failed [how->mean,dtype->")
+        if using_infer_string:
+            msg = "str dtype does not support mean operations"
         with pytest.raises(TypeError, match=msg):
             grouped.mean()
         mn = grouped.mean(numeric_only=True)
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index 44b96afaa4ef5..64ba8da907557 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -135,7 +135,7 @@ def test_pivot_table_categorical_observed_equal(self, observed):
 
         tm.assert_frame_equal(result, expected)
 
-    def test_pivot_table_nocols(self):
+    def test_pivot_table_nocols(self, using_infer_string):
         df = DataFrame(
             {"rows": ["a", "b", "c"], "cols": ["x", "y", "z"], "values": [1, 2, 3]}
         )
@@ -935,12 +935,14 @@ def test_margins(self, data):
         for value_col in table.columns.levels[0]:
             self._check_output(table[value_col], value_col, data)
 
-    def test_no_col(self, data):
+    def test_no_col(self, data, using_infer_string):
         # no col
 
         # to help with a buglet
         data.columns = [k * 2 for k in data.columns]
         msg = re.escape("agg function failed [how->mean,dtype->")
+        if using_infer_string:
+            msg = "str dtype does not support mean operations"
         with pytest.raises(TypeError, match=msg):
             data.pivot_table(index=["AA", "BB"], margins=True, aggfunc="mean")
         table = data.drop(columns="CC").pivot_table(
@@ -990,7 +992,7 @@ def test_no_col(self, data):
         ],
     )
     def test_margin_with_only_columns_defined(
-        self, columns, aggfunc, values, expected_columns
+        self, columns, aggfunc, values, expected_columns, using_infer_string
     ):
         # GH 31016
         df = DataFrame(
@@ -1014,6 +1016,8 @@ def test_margin_with_only_columns_defined(
         )
         if aggfunc != "sum":
             msg = re.escape("agg function failed [how->mean,dtype->")
+            if using_infer_string:
+                msg = "str dtype does not support mean operations"
             with pytest.raises(TypeError, match=msg):
                 df.pivot_table(columns=columns, margins=True, aggfunc=aggfunc)
         if "B" not in columns:

From 4ca5a2f12e38413ba5ad08200f3308b2b57b5c94 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 19 Aug 2024 10:45:42 -0700
Subject: [PATCH 04/12] Fix failing test_in_numeric_groupby

---
 pandas/tests/extension/base/groupby.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/tests/extension/base/groupby.py b/pandas/tests/extension/base/groupby.py
index bab8566a06dc2..c1480f54163e0 100644
--- a/pandas/tests/extension/base/groupby.py
+++ b/pandas/tests/extension/base/groupby.py
@@ -6,7 +6,6 @@
     is_bool_dtype,
     is_numeric_dtype,
     is_object_dtype,
-    is_string_dtype,
 )
 
 import pandas as pd
@@ -151,7 +150,6 @@ def test_in_numeric_groupby(self, data_for_grouping):
             is_numeric_dtype(dtype)
             or is_bool_dtype(dtype)
             or dtype.name == "decimal"
-            or is_string_dtype(dtype)
             or is_object_dtype(dtype)
             or dtype.kind == "m"  # in particular duration[*][pyarrow]
         ):

From 2c28a2c868fd3fb7ae980c2588a5cf5975e8ff2e Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 22 Aug 2024 14:21:45 -0700
Subject: [PATCH 05/12] update exception messages

---
 asv_bench/benchmarks/groupby.py               |  2 +
 pandas/core/arrays/arrow/array.py             |  4 +-
 pandas/core/arrays/base.py                    |  2 +-
 pandas/core/groupby/groupby.py                |  2 +-
 pandas/tests/frame/test_stack_unstack.py      |  2 +-
 pandas/tests/generic/test_frame.py            |  1 +
 .../tests/groupby/aggregate/test_aggregate.py |  2 +-
 pandas/tests/groupby/methods/test_quantile.py | 11 ++--
 pandas/tests/groupby/test_groupby.py          | 63 ++++++++++++++-----
 pandas/tests/groupby/test_numeric_only.py     |  6 +-
 pandas/tests/groupby/test_raises.py           | 12 ++--
 pandas/tests/resample/test_resample_api.py    | 12 ++--
 pandas/tests/reshape/merge/test_join.py       |  2 +-
 pandas/tests/reshape/test_pivot.py            |  7 ++-
 14 files changed, 84 insertions(+), 44 deletions(-)

diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
index abffa1f702b9c..19e95fefd4986 100644
--- a/asv_bench/benchmarks/groupby.py
+++ b/asv_bench/benchmarks/groupby.py
@@ -760,6 +760,8 @@ def setup(self, dtype, method):
         )
 
     def time_str_func(self, dtype, method):
+        if dtype == "string[python]" and method == "sum":
+            raise NotImplementedError
         self.df.groupby("a")[self.df.columns[1:]].agg(method)
 
 
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 27034ce63ff53..ef8192e80f9fe 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -2282,7 +2282,9 @@ def _groupby_op(
                 "var",
                 "skew",
             ]:
-                raise TypeError(f"{self.dtype} dtype does not support {how} operations")
+                raise TypeError(
+                    f"dtype '{self.dtype}' does not support operation '{how}'"
+                )
             return super()._groupby_op(
                 how=how,
                 has_dropped_na=has_dropped_na,
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 6259de397b170..f47342c0c031a 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -2548,7 +2548,7 @@ def _groupby_op(
                 "skew",
             ]:
                 raise TypeError(
-                    f"{self.dtype} dtype does not support {op.how} operations"
+                    f"dtype '{self.dtype}' does not support operation '{how}'"
                 )
             if op.how not in ["any", "all"]:
                 # Fail early to avoid conversion to object
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 8f45e110594b1..007802131bcbe 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -4289,7 +4289,7 @@ def pre_processor(vals: ArrayLike) -> tuple[np.ndarray, DtypeObj | None]:
                 )
             elif isinstance(vals.dtype, StringDtype):
                 raise TypeError(
-                    f"{vals.dtype} dtype does not support quantile operations"
+                    f"dtype '{vals.dtype}' does not support operation 'quantile'"
                 )
 
             inference: DtypeObj | None = None
diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py
index c9ddb7bf60085..57c803c23b001 100644
--- a/pandas/tests/frame/test_stack_unstack.py
+++ b/pandas/tests/frame/test_stack_unstack.py
@@ -2126,7 +2126,7 @@ def test_stack_multiple_bug(self, future_stack, using_infer_string):
         unst = multi.unstack("ID")
         msg = re.escape("agg function failed [how->mean,dtype->")
         if using_infer_string:
-            msg = "str dtype does not support mean operations"
+            msg = "dtype 'str' does not support operation 'mean'"
         with pytest.raises(TypeError, match=msg):
             unst.resample("W-THU").mean()
         down = unst.resample("W-THU").mean(numeric_only=True)
diff --git a/pandas/tests/generic/test_frame.py b/pandas/tests/generic/test_frame.py
index 1d0f491529b56..7b74856273ad3 100644
--- a/pandas/tests/generic/test_frame.py
+++ b/pandas/tests/generic/test_frame.py
@@ -61,6 +61,7 @@ def test_metadata_propagation_indiv_groupby(self):
                 "D": np.random.default_rng(2).standard_normal(8),
             }
         )
+        df = df.astype({"A": object, "B": object})
         result = df.groupby("A").sum()
         tm.assert_metadata_equivalent(df, result)
 
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index 46c27849356b5..a14d4e8385576 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -1020,7 +1020,7 @@ def test_groupby_as_index_agg(df):
 
     result2 = grouped.agg({"C": "mean", "D": "sum"})
     expected2 = grouped.mean(numeric_only=True)
-    expected2["D"] = grouped.sum()["D"]
+    expected2["D"] = grouped.sum(numeric_only=True)["D"]
     tm.assert_frame_equal(result2, expected2)
 
     grouped = df.groupby("A", as_index=True)
diff --git a/pandas/tests/groupby/methods/test_quantile.py b/pandas/tests/groupby/methods/test_quantile.py
index 0e31c0698cb1e..e2a5ab04c8887 100644
--- a/pandas/tests/groupby/methods/test_quantile.py
+++ b/pandas/tests/groupby/methods/test_quantile.py
@@ -241,19 +241,20 @@ def test_groupby_quantile_nullable_array(values, q):
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
+# @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize("q", [0.5, [0.0, 0.5, 1.0]])
 @pytest.mark.parametrize("numeric_only", [True, False])
-def test_groupby_quantile_raises_on_invalid_dtype(q, numeric_only):
+def test_groupby_quantile_raises_on_invalid_dtype(q, numeric_only, using_infer_string):
     df = DataFrame({"a": [1], "b": [2.0], "c": ["x"]})
     if numeric_only:
         result = df.groupby("a").quantile(q, numeric_only=numeric_only)
         expected = df.groupby("a")[["b"]].quantile(q)
         tm.assert_frame_equal(result, expected)
     else:
-        with pytest.raises(
-            TypeError, match="'quantile' cannot be performed against 'object' dtypes!"
-        ):
+        msg = "'quantile' cannot be performed against 'object' dtypes!"
+        if using_infer_string:
+            msg = "dtype 'str' does not support operation 'quantile'"
+        with pytest.raises(TypeError, match=msg):
             df.groupby("a").quantile(q, numeric_only=numeric_only)
 
 
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 74582267a6475..de4a225b92298 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -480,7 +480,7 @@ def test_frame_multi_key_function_list_partial_failure(using_infer_string):
     funcs = ["mean", "std"]
     msg = re.escape("agg function failed [how->mean,dtype->")
     if using_infer_string:
-        msg = "str dtype does not support mean operations"
+        msg = "dtype 'str' does not support operation 'mean'"
     with pytest.raises(TypeError, match=msg):
         grouped.agg(funcs)
 
@@ -578,6 +578,7 @@ def test_ops_not_as_index(reduction_func):
 
 
 def test_as_index_series_return_frame(df):
+    df = df.astype({"A": object, "B": object})
     grouped = df.groupby("A", as_index=False)
     grouped2 = df.groupby(["A", "B"], as_index=False)
 
@@ -671,7 +672,7 @@ def test_raises_on_nuisance(df, using_infer_string):
     grouped = df.groupby("A")
     msg = re.escape("agg function failed [how->mean,dtype->")
     if using_infer_string:
-        msg = "str dtype does not support mean operations"
+        msg = "dtype 'str' does not support operation 'mean'"
     with pytest.raises(TypeError, match=msg):
         grouped.agg("mean")
     with pytest.raises(TypeError, match=msg):
@@ -717,7 +718,7 @@ def test_omit_nuisance_agg(df, agg_function, numeric_only, using_infer_string):
         # Added numeric_only as part of GH#46560; these do not drop nuisance
         # columns when numeric_only is False
         if using_infer_string:
-            msg = f"str dtype does not support {agg_function} operations"
+            msg = f"dtype 'str' does not support operation '{agg_function}'"
             klass = TypeError
         elif agg_function in ("std", "sem"):
             klass = ValueError
@@ -740,10 +741,16 @@ def test_omit_nuisance_agg(df, agg_function, numeric_only, using_infer_string):
         tm.assert_frame_equal(result, expected)
 
 
-def test_raise_on_nuisance_python_single(df):
+def test_raise_on_nuisance_python_single(df, using_infer_string):
     # GH 38815
     grouped = df.groupby("A")
-    with pytest.raises(ValueError, match="could not convert"):
+
+    err = ValueError
+    msg = "could not convert"
+    if using_infer_string:
+        err = TypeError
+        msg = "dtype 'str' does not support operation 'skew'"
+    with pytest.raises(err, match=msg):
         grouped.skew()
 
 
@@ -751,7 +758,7 @@ def test_raise_on_nuisance_python_multiple(three_group, using_infer_string):
     grouped = three_group.groupby(["A", "B"])
     msg = re.escape("agg function failed [how->mean,dtype->")
     if using_infer_string:
-        msg = "str dtype does not support mean operations"
+        msg = "dtype 'str' does not support operation 'mean'"
     with pytest.raises(TypeError, match=msg):
         grouped.agg("mean")
     with pytest.raises(TypeError, match=msg):
@@ -798,7 +805,7 @@ def test_wrap_aggregated_output_multindex(
     keys = [np.array([0, 0, 1]), np.array([0, 0, 1])]
     msg = re.escape("agg function failed [how->mean,dtype->")
     if using_infer_string:
-        msg = "str dtype does not support mean operations"
+        msg = "dtype 'str' does not support operation 'mean'"
     with pytest.raises(TypeError, match=msg):
         df.groupby(keys).agg("mean")
     agged = df.drop(columns=("baz", "two")).groupby(keys).agg("mean")
@@ -976,10 +983,20 @@ def test_groupby_with_hier_columns():
     tm.assert_index_equal(result.columns, df.columns[:-1])
 
 
-def test_grouping_ndarray(df):
+def test_grouping_ndarray(df, using_infer_string):
     grouped = df.groupby(df["A"].values)
+    grouped2 = df.groupby(df["A"].rename(None))
+
+    if using_infer_string:
+        msg = "dtype 'str' does not support operation 'sum'"
+        with pytest.raises(TypeError, match=msg):
+            grouped.sum()
+        with pytest.raises(TypeError, match=msg):
+            grouped2.sum()
+        return
+
     result = grouped.sum()
-    expected = df.groupby(df["A"].rename(None)).sum()
+    expected = grouped2.sum()
     tm.assert_frame_equal(result, expected)
 
 
@@ -1478,13 +1495,23 @@ def f(group):
     assert names == expected_names
 
 
-def test_no_dummy_key_names(df):
+def test_no_dummy_key_names(df, using_infer_string):
     # see gh-1291
-    result = df.groupby(df["A"].values).sum()
+    gb = df.groupby(df["A"].values)
+    gb2 = df.groupby([df["A"].values, df["B"].values])
+    if using_infer_string:
+        msg = "dtype 'str' does not support operation 'sum'"
+        with pytest.raises(TypeError, match=msg):
+            gb.sum()
+        with pytest.raises(TypeError, match=msg):
+            gb2.sum()
+        return
+
+    result = gb.sum()
     assert result.index.name is None
 
-    result = df.groupby([df["A"].values, df["B"].values]).sum()
-    assert result.index.names == (None, None)
+    result2 = gb2.sum()
+    assert result2.index.names == (None, None)
 
 
 def test_groupby_sort_multiindex_series():
@@ -1820,7 +1847,7 @@ def get_categorical_invalid_expected():
             elif is_per:
                 msg = "Period type does not support"
             elif is_str:
-                msg = "str dtype does not support"
+                msg = f"dtype 'str' does not support operation '{op}'"
             else:
                 msg = "category type does not support"
             if op == "skew":
@@ -2750,7 +2777,7 @@ def test_obj_with_exclusions_duplicate_columns():
 def test_groupby_numeric_only_std_no_result(numeric_only):
     # GH 51080
     dicts_non_numeric = [{"a": "foo", "b": "bar"}, {"a": "car", "b": "dar"}]
-    df = DataFrame(dicts_non_numeric)
+    df = DataFrame(dicts_non_numeric, dtype=object)
     dfgb = df.groupby("a", as_index=False, sort=False)
 
     if numeric_only:
@@ -2809,10 +2836,14 @@ def test_grouping_with_categorical_interval_columns():
 def test_groupby_sum_on_nan_should_return_nan(bug_var):
     # GH 24196
     df = DataFrame({"A": [bug_var, bug_var, bug_var, np.nan]})
+    if isinstance(bug_var, str):
+        df = df.astype(object)
     dfgb = df.groupby(lambda x: x)
     result = dfgb.sum(min_count=1)
 
-    expected_df = DataFrame([bug_var, bug_var, bug_var, None], columns=["A"])
+    expected_df = DataFrame(
+        [bug_var, bug_var, bug_var, None], columns=["A"], dtype=df["A"].dtype
+    )
     tm.assert_frame_equal(result, expected_df)
 
 
diff --git a/pandas/tests/groupby/test_numeric_only.py b/pandas/tests/groupby/test_numeric_only.py
index 8a09625dba79f..3e5c7daf933df 100644
--- a/pandas/tests/groupby/test_numeric_only.py
+++ b/pandas/tests/groupby/test_numeric_only.py
@@ -28,7 +28,7 @@ def df(self):
                 "group": [1, 1, 2],
                 "int": [1, 2, 3],
                 "float": [4.0, 5.0, 6.0],
-                "string": list("abc"),
+                "string": Series(["a", "b", "c"], dtype=object),
                 "category_string": Series(list("abc")).astype("category"),
                 "category_int": [7, 8, 9],
                 "datetime": date_range("20130101", periods=3),
@@ -180,7 +180,7 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
                     "category type does not support sum operations",
                     re.escape(f"agg function failed [how->{method},dtype->object]"),
                     re.escape(f"agg function failed [how->{method},dtype->string]"),
-                    f"str dtype does not support {method} operations",
+                    f"dtype 'str' does not support operation '{method}'",
                 ]
             )
             with pytest.raises(exception, match=msg):
@@ -198,7 +198,7 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
                     f"Cannot perform {method} with non-ordered Categorical",
                     re.escape(f"agg function failed [how->{method},dtype->object]"),
                     re.escape(f"agg function failed [how->{method},dtype->string]"),
-                    f"str dtype does not support {method} operations",
+                    f"dtype 'str' does not support operation '{method}'",
                 ]
             )
             with pytest.raises(exception, match=msg):
diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py
index acc9729d235b0..e2ca6b08a8478 100644
--- a/pandas/tests/groupby/test_raises.py
+++ b/pandas/tests/groupby/test_raises.py
@@ -194,7 +194,7 @@ def test_groupby_raises_string(
             "skew",
             "quantile",
         ]:
-            msg = f"str dtype does not support {groupby_func} operations"
+            msg = f"dtype 'str' does not support operation '{groupby_func}'"
             if groupby_func == "sum":
                 # The object-dtype allows this, StringArray variants do not.
                 klass = TypeError
@@ -213,14 +213,13 @@ def test_groupby_raises_string(
             #  there.
             import pyarrow as pa
 
+            # TODO(infer_string): avoid bubbling up pyarrow exceptions
             klass = pa.lib.ArrowNotImplementedError
             msg = "Function 'subtract_checked' has no kernel matching input types"
         elif groupby_func in ["cummin", "cummax"]:
             msg = msg.replace("object", "str")
         elif groupby_func == "corrwith":
-            msg = (
-                "'.*NumpySemantics' with dtype str does " "not support operation 'mean'"
-            )
+            msg = "'.*NumpySemantics' with dtype str does not support operation 'mean'"
 
     if groupby_func == "fillna":
         kind = "Series" if groupby_series else "DataFrame"
@@ -275,7 +274,10 @@ def test_groupby_raises_string_np(
     if using_infer_string:
         klass = TypeError
         if df["d"].dtype.storage == "python":
-            msg = "Cannot perform reduction 'mean' with string dtype"
+            msg = (
+                f"Cannot perform reduction '{groupby_func_np.__name__}' "
+                "with string dtype"
+            )
         else:
             msg = (
                 "'ArrowStringArrayNumpySemantics' with dtype str does not "
diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py
index 18db05f554140..940c9e6700ea2 100644
--- a/pandas/tests/resample/test_resample_api.py
+++ b/pandas/tests/resample/test_resample_api.py
@@ -198,7 +198,7 @@ def tests_raises_on_nuisance(test_frame, using_infer_string):
     expected = r[["A", "B", "C"]].mean()
     msg = re.escape("agg function failed [how->mean,dtype->")
     if using_infer_string:
-        msg = "str dtype does not support mean operations"
+        msg = "dtype 'str' does not support operation 'mean'"
     with pytest.raises(TypeError, match=msg):
         r.mean()
     result = r.mean(numeric_only=True)
@@ -903,10 +903,10 @@ def test_frame_downsample_method(
             klass = TypeError
             msg = re.escape(f"agg function failed [how->{method},dtype->")
             if using_infer_string:
-                msg = f"str dtype does not support {method} operations"
+                msg = f"dtype 'str' does not support operation '{method}'"
         elif method in ["sum", "std", "sem"] and using_infer_string:
             klass = TypeError
-            msg = f"str dtype does not support {method} operations"
+            msg = f"dtype 'str' does not support operation '{method}'"
         else:
             klass = ValueError
             msg = expected_data
@@ -914,7 +914,7 @@ def test_frame_downsample_method(
             _ = func(**kwargs)
     elif method == "sum" and using_infer_string and numeric_only is not True:
         klass = TypeError
-        msg = "str dtype does not support sum operations"
+        msg = f"dtype 'str' does not support operation '{method}'"
         with pytest.raises(klass, match=msg):
             _ = func(**kwargs)
     else:
@@ -965,11 +965,11 @@ def test_series_downsample_method(
     elif method == "prod":
         msg = re.escape("agg function failed [how->prod,dtype->")
         if using_infer_string:
-            msg = "str dtype does not support prod operations"
+            msg = "dtype 'str' does not support operation 'prod'"
         with pytest.raises(TypeError, match=msg):
             func(**kwargs)
     elif method == "sum" and using_infer_string and numeric_only is not True:
-        msg = "str dtype does not support sum operations"
+        msg = "dtype 'str' does not support operation 'sum'"
         with pytest.raises(TypeError, match=msg):
             func(**kwargs)
 
diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py
index 8e3da5b3bee38..65bfea0b9beea 100644
--- a/pandas/tests/reshape/merge/test_join.py
+++ b/pandas/tests/reshape/merge/test_join.py
@@ -632,7 +632,7 @@ def test_mixed_type_join_with_suffix(self, using_infer_string):
         grouped = df.groupby("id")
         msg = re.escape("agg function failed [how->mean,dtype->")
         if using_infer_string:
-            msg = "str dtype does not support mean operations"
+            msg = "dtype 'str' does not support operation 'mean'"
         with pytest.raises(TypeError, match=msg):
             grouped.mean()
         mn = grouped.mean(numeric_only=True)
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index 64ba8da907557..9f26a31f1bdc1 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -135,10 +135,11 @@ def test_pivot_table_categorical_observed_equal(self, observed):
 
         tm.assert_frame_equal(result, expected)
 
-    def test_pivot_table_nocols(self, using_infer_string):
+    def test_pivot_table_nocols(self):
         df = DataFrame(
             {"rows": ["a", "b", "c"], "cols": ["x", "y", "z"], "values": [1, 2, 3]}
         )
+        df = df.astype({"rows": object, "cols": object})
         rs = df.pivot_table(columns="cols", aggfunc="sum")
         xp = df.pivot_table(index="cols", aggfunc="sum").T
         tm.assert_frame_equal(rs, xp)
@@ -942,7 +943,7 @@ def test_no_col(self, data, using_infer_string):
         data.columns = [k * 2 for k in data.columns]
         msg = re.escape("agg function failed [how->mean,dtype->")
         if using_infer_string:
-            msg = "str dtype does not support mean operations"
+            msg = "dtype 'str' does not support operation 'mean'"
         with pytest.raises(TypeError, match=msg):
             data.pivot_table(index=["AA", "BB"], margins=True, aggfunc="mean")
         table = data.drop(columns="CC").pivot_table(
@@ -1017,7 +1018,7 @@ def test_margin_with_only_columns_defined(
         if aggfunc != "sum":
             msg = re.escape("agg function failed [how->mean,dtype->")
             if using_infer_string:
-                msg = "str dtype does not support mean operations"
+                msg = "dtype 'str' does not support operation 'mean'"
             with pytest.raises(TypeError, match=msg):
                 df.pivot_table(columns=columns, margins=True, aggfunc=aggfunc)
         if "B" not in columns:

From 708e5d3220b9f16947e6e430e75a3cf0058b769e Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 22 Aug 2024 15:48:52 -0700
Subject: [PATCH 06/12] update message

---
 pandas/tests/groupby/test_raises.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py
index e2ca6b08a8478..d4c135a062052 100644
--- a/pandas/tests/groupby/test_raises.py
+++ b/pandas/tests/groupby/test_raises.py
@@ -219,7 +219,13 @@ def test_groupby_raises_string(
         elif groupby_func in ["cummin", "cummax"]:
             msg = msg.replace("object", "str")
         elif groupby_func == "corrwith":
-            msg = "'.*NumpySemantics' with dtype str does not support operation 'mean'"
+            if df["d"].dtype.storage == "pyarrow":
+                msg = (
+                    "ArrowStringArrayNumpySemantics' with dtype str does not "
+                    "support operation 'mean'"
+                )
+            else:
+                msg = "Cannot perform reduction 'mean' with string dtype"
 
     if groupby_func == "fillna":
         kind = "Series" if groupby_series else "DataFrame"

From 75eddea1673ddff9bbf848bbedae091186e567c5 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Tue, 27 Aug 2024 07:56:17 -0700
Subject: [PATCH 07/12] skip no-longer-supported

---
 asv_bench/benchmarks/groupby.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
index 19e95fefd4986..352615ca54cba 100644
--- a/asv_bench/benchmarks/groupby.py
+++ b/asv_bench/benchmarks/groupby.py
@@ -752,6 +752,9 @@ class String:
     ]
 
     def setup(self, dtype, method):
+        if dtype == "string[python]" and method == "sum":
+            raise NotImplementedError  # skip benchmark
+
         cols = list("abcdefghjkl")
         self.df = DataFrame(
             np.random.randint(0, 100, size=(10_000, len(cols))),
@@ -760,8 +763,6 @@ def setup(self, dtype, method):
         )
 
     def time_str_func(self, dtype, method):
-        if dtype == "string[python]" and method == "sum":
-            raise NotImplementedError
         self.df.groupby("a")[self.df.columns[1:]].agg(method)
 
 

From 72c59cf7b64526ce69342f095dc7bfd7bdef249e Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 28 Aug 2024 07:53:10 -0700
Subject: [PATCH 08/12] update exception messages

---
 pandas/core/groupby/groupby.py                |  6 +----
 pandas/tests/groupby/methods/test_quantile.py |  5 +----
 pandas/tests/groupby/test_groupby.py          | 22 ++++---------------
 pandas/tests/groupby/test_numeric_only.py     |  6 +++--
 pandas/tests/groupby/test_raises.py           | 10 ++-------
 5 files changed, 12 insertions(+), 37 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 007802131bcbe..efd4f1112aa09 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -4283,11 +4283,7 @@ def quantile(
         starts, ends = lib.generate_slices(splitter._slabels, splitter.ngroups)
 
         def pre_processor(vals: ArrayLike) -> tuple[np.ndarray, DtypeObj | None]:
-            if is_object_dtype(vals.dtype):
-                raise TypeError(
-                    "'quantile' cannot be performed against 'object' dtypes!"
-                )
-            elif isinstance(vals.dtype, StringDtype):
+            if isinstance(vals.dtype, StringDtype) or is_object_dtype(vals.dtype):
                 raise TypeError(
                     f"dtype '{vals.dtype}' does not support operation 'quantile'"
                 )
diff --git a/pandas/tests/groupby/methods/test_quantile.py b/pandas/tests/groupby/methods/test_quantile.py
index e2a5ab04c8887..3f22851426282 100644
--- a/pandas/tests/groupby/methods/test_quantile.py
+++ b/pandas/tests/groupby/methods/test_quantile.py
@@ -241,7 +241,6 @@ def test_groupby_quantile_nullable_array(values, q):
     tm.assert_series_equal(result, expected)
 
 
-# @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize("q", [0.5, [0.0, 0.5, 1.0]])
 @pytest.mark.parametrize("numeric_only", [True, False])
 def test_groupby_quantile_raises_on_invalid_dtype(q, numeric_only, using_infer_string):
@@ -251,9 +250,7 @@ def test_groupby_quantile_raises_on_invalid_dtype(q, numeric_only, using_infer_s
         expected = df.groupby("a")[["b"]].quantile(q)
         tm.assert_frame_equal(result, expected)
     else:
-        msg = "'quantile' cannot be performed against 'object' dtypes!"
-        if using_infer_string:
-            msg = "dtype 'str' does not support operation 'quantile'"
+        msg = "dtype '.*' does not support operation 'quantile'"
         with pytest.raises(TypeError, match=msg):
             df.groupby("a").quantile(q, numeric_only=numeric_only)
 
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index de4a225b92298..17eb30cb76f3b 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -983,18 +983,11 @@ def test_groupby_with_hier_columns():
     tm.assert_index_equal(result.columns, df.columns[:-1])
 
 
-def test_grouping_ndarray(df, using_infer_string):
+def test_grouping_ndarray(df):
+    df = df.astype({"A": object, "B": object})
     grouped = df.groupby(df["A"].values)
     grouped2 = df.groupby(df["A"].rename(None))
 
-    if using_infer_string:
-        msg = "dtype 'str' does not support operation 'sum'"
-        with pytest.raises(TypeError, match=msg):
-            grouped.sum()
-        with pytest.raises(TypeError, match=msg):
-            grouped2.sum()
-        return
-
     result = grouped.sum()
     expected = grouped2.sum()
     tm.assert_frame_equal(result, expected)
@@ -1495,18 +1488,11 @@ def f(group):
     assert names == expected_names
 
 
-def test_no_dummy_key_names(df, using_infer_string):
+def test_no_dummy_key_names(df):
     # see gh-1291
+    df = df.astype({"A": object, "B": object})
     gb = df.groupby(df["A"].values)
     gb2 = df.groupby([df["A"].values, df["B"].values])
-    if using_infer_string:
-        msg = "dtype 'str' does not support operation 'sum'"
-        with pytest.raises(TypeError, match=msg):
-            gb.sum()
-        with pytest.raises(TypeError, match=msg):
-            gb2.sum()
-        return
-
     result = gb.sum()
     assert result.index.name is None
 
diff --git a/pandas/tests/groupby/test_numeric_only.py b/pandas/tests/groupby/test_numeric_only.py
index 3e5c7daf933df..4d4800ca1b051 100644
--- a/pandas/tests/groupby/test_numeric_only.py
+++ b/pandas/tests/groupby/test_numeric_only.py
@@ -299,7 +299,9 @@ def test_numeric_only(kernel, has_arg, numeric_only, keys):
                 re.escape(f"agg function failed [how->{kernel},dtype->object]"),
             ]
         )
-        if kernel == "idxmin":
+        if kernel == "quantile":
+            msg = "dtype 'object' does not support operation 'quantile'"
+        elif kernel == "idxmin":
             msg = "'<' not supported between instances of 'type' and 'type'"
         elif kernel == "idxmax":
             msg = "'>' not supported between instances of 'type' and 'type'"
@@ -379,7 +381,7 @@ def test_deprecate_numeric_only_series(dtype, groupby_func, request):
     # that succeed should not be allowed to fail (without deprecation, at least)
     if groupby_func in fails_on_numeric_object and dtype is object:
         if groupby_func == "quantile":
-            msg = "cannot be performed against 'object' dtypes"
+            msg = "dtype 'object' does not support operation 'quantile'"
         else:
             msg = "is not supported for object dtype"
         with pytest.raises(TypeError, match=msg):
diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py
index d4c135a062052..629e8466a99b3 100644
--- a/pandas/tests/groupby/test_raises.py
+++ b/pandas/tests/groupby/test_raises.py
@@ -204,18 +204,12 @@ def test_groupby_raises_string(
         elif groupby_func == "pct_change" and df["d"].dtype.storage == "pyarrow":
             # This doesn't go through EA._groupby_op so the message isn't controlled
             #  there.
-            import pyarrow as pa
-
-            klass = pa.lib.ArrowNotImplementedError
-            msg = "Function 'divide' has no kernel matching input types"
+            msg = "operation 'truediv' not supported for dtype 'str' with dtype 'str'"
         elif groupby_func == "diff" and df["d"].dtype.storage == "pyarrow":
             # This doesn't go through EA._groupby_op so the message isn't controlled
             #  there.
-            import pyarrow as pa
+            msg = "operation 'sub' not supported for dtype 'str' with dtype 'str'"
 
-            # TODO(infer_string): avoid bubbling up pyarrow exceptions
-            klass = pa.lib.ArrowNotImplementedError
-            msg = "Function 'subtract_checked' has no kernel matching input types"
         elif groupby_func in ["cummin", "cummax"]:
             msg = msg.replace("object", "str")
         elif groupby_func == "corrwith":

From 10be506223add53686b6de5c3e7d7a2ad12c76b8 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 28 Aug 2024 08:26:21 -0700
Subject: [PATCH 09/12] update exception message

---
 pandas/tests/groupby/test_raises.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py
index 629e8466a99b3..c7ff64f5bcd5a 100644
--- a/pandas/tests/groupby/test_raises.py
+++ b/pandas/tests/groupby/test_raises.py
@@ -166,7 +166,7 @@ def test_groupby_raises_string(
             TypeError,
             re.escape("agg function failed [how->prod,dtype->object]"),
         ),
-        "quantile": (TypeError, "cannot be performed against 'object' dtypes!"),
+        "quantile": (TypeError, "dtype 'object' does not support operation 'quantile'"),
         "rank": (None, ""),
         "sem": (ValueError, "could not convert string to float"),
         "shift": (None, ""),

From c8ebe0793543491bd55994d438f08917798e7f2b Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 28 Aug 2024 08:26:55 -0700
Subject: [PATCH 10/12] update exception message

---
 pandas/tests/groupby/methods/test_quantile.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/methods/test_quantile.py b/pandas/tests/groupby/methods/test_quantile.py
index 3f22851426282..49c4b4ac3c474 100644
--- a/pandas/tests/groupby/methods/test_quantile.py
+++ b/pandas/tests/groupby/methods/test_quantile.py
@@ -162,7 +162,8 @@ def test_groupby_quantile_with_arraylike_q_and_int_columns(frame_size, groupby,
 def test_quantile_raises():
     df = DataFrame([["foo", "a"], ["foo", "b"], ["foo", "c"]], columns=["key", "val"])
 
-    with pytest.raises(TypeError, match="cannot be performed against 'object' dtypes"):
+    msg = "dtype 'object' does not support operation 'quantile'"
+    with pytest.raises(TypeError, match=msg):
         df.groupby("key").quantile()
 
 

From 08713262105e36841e692b0d7cba244ac4dcf1bd Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 1 Nov 2024 11:19:18 -0700
Subject: [PATCH 11/12] Update now that .sum() is supported

---
 pandas/core/arrays/arrow/array.py             |  1 -
 pandas/core/arrays/base.py                    |  1 -
 pandas/tests/extension/base/groupby.py        |  2 ++
 pandas/tests/groupby/aggregate/test_cython.py |  4 +--
 pandas/tests/groupby/test_groupby.py          |  4 +--
 pandas/tests/groupby/test_raises.py           | 26 +++++--------------
 pandas/tests/resample/test_resample_api.py    |  9 -------
 7 files changed, 12 insertions(+), 35 deletions(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index e08b78b98e642..7c42bb5a727ba 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -2304,7 +2304,6 @@ def _groupby_op(
     ):
         if isinstance(self.dtype, StringDtype):
             if how in [
-                "sum",
                 "prod",
                 "mean",
                 "median",
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 6cf9cca341794..4835d808f2433 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -2609,7 +2609,6 @@ def _groupby_op(
         if isinstance(self.dtype, StringDtype):
             # StringArray
             if op.how in [
-                "sum",
                 "prod",
                 "mean",
                 "median",
diff --git a/pandas/tests/extension/base/groupby.py b/pandas/tests/extension/base/groupby.py
index c1480f54163e0..bab8566a06dc2 100644
--- a/pandas/tests/extension/base/groupby.py
+++ b/pandas/tests/extension/base/groupby.py
@@ -6,6 +6,7 @@
     is_bool_dtype,
     is_numeric_dtype,
     is_object_dtype,
+    is_string_dtype,
 )
 
 import pandas as pd
@@ -150,6 +151,7 @@ def test_in_numeric_groupby(self, data_for_grouping):
             is_numeric_dtype(dtype)
             or is_bool_dtype(dtype)
             or dtype.name == "decimal"
+            or is_string_dtype(dtype)
             or is_object_dtype(dtype)
             or dtype.kind == "m"  # in particular duration[*][pyarrow]
         ):
diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py
index d28eb227314c7..b937e7dcc8136 100644
--- a/pandas/tests/groupby/aggregate/test_cython.py
+++ b/pandas/tests/groupby/aggregate/test_cython.py
@@ -148,11 +148,11 @@ def test_cython_agg_return_dict():
 
 def test_cython_fail_agg():
     dr = bdate_range("1/1/2000", periods=50)
-    ts = Series(["A", "B", "C", "D", "E"] * 10, index=dr)
+    ts = Series(["A", "B", "C", "D", "E"] * 10, dtype=object, index=dr)
 
     grouped = ts.groupby(lambda x: x.month)
     summed = grouped.sum()
-    expected = grouped.agg(np.sum)
+    expected = grouped.agg(np.sum).astype(object)
     tm.assert_series_equal(summed, expected)
 
 
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index a434391983c01..ac7f305880878 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -709,8 +709,6 @@ def test_omit_nuisance_agg(df, agg_function, numeric_only, using_infer_string):
     grouped = df.groupby("A")
 
     no_drop_nuisance = ("var", "std", "sem", "mean", "prod", "median")
-    if using_infer_string:
-        no_drop_nuisance += ("sum",)
     if agg_function in no_drop_nuisance and not numeric_only:
         # Added numeric_only as part of GH#46560; these do not drop nuisance
         # columns when numeric_only is False
@@ -1814,7 +1812,7 @@ def get_categorical_invalid_expected():
 
     if op in ["prod", "sum", "skew"]:
         # ops that require more than just ordered-ness
-        if is_dt64 or is_cat or is_per or is_str:
+        if is_dt64 or is_cat or is_per or (is_str and op != "sum"):
             # GH#41291
             # datetime64 -> prod and sum are invalid
             if is_dt64:
diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py
index f3768ee3433b5..e915011875c60 100644
--- a/pandas/tests/groupby/test_raises.py
+++ b/pandas/tests/groupby/test_raises.py
@@ -182,7 +182,6 @@ def test_groupby_raises_string(
 
     if using_infer_string:
         if groupby_func in [
-            "sum",
             "prod",
             "mean",
             "median",
@@ -213,13 +212,7 @@ def test_groupby_raises_string(
         elif groupby_func in ["cummin", "cummax"]:
             msg = msg.replace("object", "str")
         elif groupby_func == "corrwith":
-            if df["d"].dtype.storage == "pyarrow":
-                msg = (
-                    "ArrowStringArrayNumpySemantics' with dtype str does not "
-                    "support operation 'mean'"
-                )
-            else:
-                msg = "Cannot perform reduction 'mean' with string dtype"
+            msg = "Cannot perform reduction 'mean' with string dtype"
 
     if groupby_func == "fillna":
         kind = "Series" if groupby_series else "DataFrame"
@@ -273,17 +266,12 @@ def test_groupby_raises_string_np(
     }[groupby_func_np]
 
     if using_infer_string:
-        klass = TypeError
-        if df["d"].dtype.storage == "python":
-            msg = (
-                f"Cannot perform reduction '{groupby_func_np.__name__}' "
-                "with string dtype"
-            )
-        else:
-            msg = (
-                "'ArrowStringArrayNumpySemantics' with dtype str does not "
-                f"support operation '{groupby_func_np.__name__}'"
-            )
+        if groupby_func_np is np.mean:
+            klass = TypeError
+        msg = (
+            f"Cannot perform reduction '{groupby_func_np.__name__}' "
+            "with string dtype"
+        )
 
     _call_and_check(klass, msg, how, gb, groupby_func_np, ())
 
diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py
index 940c9e6700ea2..b7b80b5e427ff 100644
--- a/pandas/tests/resample/test_resample_api.py
+++ b/pandas/tests/resample/test_resample_api.py
@@ -912,11 +912,6 @@ def test_frame_downsample_method(
             msg = expected_data
         with pytest.raises(klass, match=msg):
             _ = func(**kwargs)
-    elif method == "sum" and using_infer_string and numeric_only is not True:
-        klass = TypeError
-        msg = f"dtype 'str' does not support operation '{method}'"
-        with pytest.raises(klass, match=msg):
-            _ = func(**kwargs)
     else:
         result = func(**kwargs)
         expected = DataFrame(expected_data, index=expected_index)
@@ -968,10 +963,6 @@ def test_series_downsample_method(
             msg = "dtype 'str' does not support operation 'prod'"
         with pytest.raises(TypeError, match=msg):
             func(**kwargs)
-    elif method == "sum" and using_infer_string and numeric_only is not True:
-        msg = "dtype 'str' does not support operation 'sum'"
-        with pytest.raises(TypeError, match=msg):
-            func(**kwargs)
 
     else:
         result = func(**kwargs)

From baa1dd95ef140deb9ec87d7fdf523c46ac241ace Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 4 Nov 2024 09:28:29 +0100
Subject: [PATCH 12/12] more cleanups now sum is implemented

---
 asv_bench/benchmarks/groupby.py                  |  3 ---
 pandas/tests/generic/test_frame.py               |  1 -
 pandas/tests/groupby/aggregate/test_aggregate.py |  2 +-
 pandas/tests/groupby/methods/test_quantile.py    |  2 +-
 pandas/tests/groupby/test_groupby.py             |  9 ++-------
 pandas/tests/groupby/test_numeric_only.py        | 10 ++++++++--
 pandas/tests/groupby/test_raises.py              |  5 +----
 pandas/tests/reshape/test_pivot.py               |  1 -
 8 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
index 352615ca54cba..abffa1f702b9c 100644
--- a/asv_bench/benchmarks/groupby.py
+++ b/asv_bench/benchmarks/groupby.py
@@ -752,9 +752,6 @@ class String:
     ]
 
     def setup(self, dtype, method):
-        if dtype == "string[python]" and method == "sum":
-            raise NotImplementedError  # skip benchmark
-
         cols = list("abcdefghjkl")
         self.df = DataFrame(
             np.random.randint(0, 100, size=(10_000, len(cols))),
diff --git a/pandas/tests/generic/test_frame.py b/pandas/tests/generic/test_frame.py
index 7b74856273ad3..1d0f491529b56 100644
--- a/pandas/tests/generic/test_frame.py
+++ b/pandas/tests/generic/test_frame.py
@@ -61,7 +61,6 @@ def test_metadata_propagation_indiv_groupby(self):
                 "D": np.random.default_rng(2).standard_normal(8),
             }
         )
-        df = df.astype({"A": object, "B": object})
         result = df.groupby("A").sum()
         tm.assert_metadata_equivalent(df, result)
 
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index a14d4e8385576..46c27849356b5 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -1020,7 +1020,7 @@ def test_groupby_as_index_agg(df):
 
     result2 = grouped.agg({"C": "mean", "D": "sum"})
     expected2 = grouped.mean(numeric_only=True)
-    expected2["D"] = grouped.sum(numeric_only=True)["D"]
+    expected2["D"] = grouped.sum()["D"]
     tm.assert_frame_equal(result2, expected2)
 
     grouped = df.groupby("A", as_index=True)
diff --git a/pandas/tests/groupby/methods/test_quantile.py b/pandas/tests/groupby/methods/test_quantile.py
index 49c4b4ac3c474..4a8ad65200caa 100644
--- a/pandas/tests/groupby/methods/test_quantile.py
+++ b/pandas/tests/groupby/methods/test_quantile.py
@@ -244,7 +244,7 @@ def test_groupby_quantile_nullable_array(values, q):
 
 @pytest.mark.parametrize("q", [0.5, [0.0, 0.5, 1.0]])
 @pytest.mark.parametrize("numeric_only", [True, False])
-def test_groupby_quantile_raises_on_invalid_dtype(q, numeric_only, using_infer_string):
+def test_groupby_quantile_raises_on_invalid_dtype(q, numeric_only):
     df = DataFrame({"a": [1], "b": [2.0], "c": ["x"]})
     if numeric_only:
         result = df.groupby("a").quantile(q, numeric_only=numeric_only)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index ac7f305880878..3305b48a4dcdc 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -575,7 +575,6 @@ def test_ops_not_as_index(reduction_func):
 
 
 def test_as_index_series_return_frame(df):
-    df = df.astype({"A": object, "B": object})
     grouped = df.groupby("A", as_index=False)
     grouped2 = df.groupby(["A", "B"], as_index=False)
 
@@ -979,7 +978,6 @@ def test_groupby_with_hier_columns():
 
 
 def test_grouping_ndarray(df):
-    df = df.astype({"A": object, "B": object})
     grouped = df.groupby(df["A"].values)
     grouped2 = df.groupby(df["A"].rename(None))
 
@@ -1477,13 +1475,10 @@ def f(group):
 
 def test_no_dummy_key_names(df):
     # see gh-1291
-    df = df.astype({"A": object, "B": object})
-    gb = df.groupby(df["A"].values)
-    gb2 = df.groupby([df["A"].values, df["B"].values])
-    result = gb.sum()
+    result = df.groupby(df["A"].values).sum()
     assert result.index.name is None
 
-    result2 = gb2.sum()
+    result2 = df.groupby([df["A"].values, df["B"].values]).sum()
     assert result2.index.names == (None, None)
 
 
diff --git a/pandas/tests/groupby/test_numeric_only.py b/pandas/tests/groupby/test_numeric_only.py
index 4d4800ca1b051..cb4569812f600 100644
--- a/pandas/tests/groupby/test_numeric_only.py
+++ b/pandas/tests/groupby/test_numeric_only.py
@@ -28,7 +28,8 @@ def df(self):
                 "group": [1, 1, 2],
                 "int": [1, 2, 3],
                 "float": [4.0, 5.0, 6.0],
-                "string": Series(["a", "b", "c"], dtype=object),
+                "string": Series(["a", "b", "c"], dtype="str"),
+                "object": Series(["a", "b", "c"], dtype=object),
                 "category_string": Series(list("abc")).astype("category"),
                 "category_int": [7, 8, 9],
                 "datetime": date_range("20130101", periods=3),
@@ -40,6 +41,7 @@ def df(self):
                 "int",
                 "float",
                 "string",
+                "object",
                 "category_string",
                 "category_int",
                 "datetime",
@@ -112,6 +114,7 @@ def test_first_last(self, df, method):
                 "int",
                 "float",
                 "string",
+                "object",
                 "category_string",
                 "category_int",
                 "datetime",
@@ -159,7 +162,9 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
 
         # object dtypes for transformations are not implemented in Cython and
         # have no Python fallback
-        exception = NotImplementedError if method.startswith("cum") else TypeError
+        exception = (
+            (NotImplementedError, TypeError) if method.startswith("cum") else TypeError
+        )
 
         if method in ("min", "max", "cummin", "cummax", "cumsum", "cumprod"):
             # The methods default to numeric_only=False and raise TypeError
@@ -170,6 +175,7 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
                     re.escape(f"agg function failed [how->{method},dtype->object]"),
                     # cumsum/cummin/cummax/cumprod
                     "function is not implemented for this dtype",
+                    f"dtype 'str' does not support operation '{method}'",
                 ]
             )
             with pytest.raises(exception, match=msg):
diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py
index e915011875c60..1e0a15d0ba796 100644
--- a/pandas/tests/groupby/test_raises.py
+++ b/pandas/tests/groupby/test_raises.py
@@ -194,10 +194,7 @@ def test_groupby_raises_string(
             "quantile",
         ]:
             msg = f"dtype 'str' does not support operation '{groupby_func}'"
-            if groupby_func == "sum":
-                # The object-dtype allows this, StringArray variants do not.
-                klass = TypeError
-            elif groupby_func in ["sem", "std", "skew"]:
+            if groupby_func in ["sem", "std", "skew"]:
                 # The object-dtype raises ValueError when trying to convert to numeric.
                 klass = TypeError
         elif groupby_func == "pct_change" and df["d"].dtype.storage == "pyarrow":
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index 72dc1be251064..d8a9acdc561fd 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -139,7 +139,6 @@ def test_pivot_table_nocols(self):
         df = DataFrame(
             {"rows": ["a", "b", "c"], "cols": ["x", "y", "z"], "values": [1, 2, 3]}
         )
-        df = df.astype({"rows": object, "cols": object})
         rs = df.pivot_table(columns="cols", aggfunc="sum")
         xp = df.pivot_table(index="cols", aggfunc="sum").T
         tm.assert_frame_equal(rs, xp)