From 9528ffc7d0bacf449de2a0c18dc82a9a840f9695 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 3 Feb 2023 17:51:22 -0800
Subject: [PATCH 1/7] REF: move groupby reduction methods to EA

---
 pandas/core/arrays/base.py        |  53 +++++++++
 pandas/core/arrays/categorical.py |  24 ++++
 pandas/core/arrays/masked.py      |  31 +++++
 pandas/core/groupby/ops.py        | 188 +++---------------------------
 4 files changed, 125 insertions(+), 171 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index c261a41e1e77e..29d3127fee492 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -1688,6 +1688,59 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
 
         return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs)
 
+    # ------------------------------------------------------------------------
+    # GroupBy Methods
+
+    def groupby_op(
+        self, op, *, min_count: int, ngroups: int, ids: npt.NDArray[np.intp], **kwargs
+    ):
+        from pandas.core.arrays import (
+            DatetimeArray,
+            PeriodArray,
+            TimedeltaArray,
+        )
+        from pandas.core.arrays.string_ import StringDtype
+
+        # GH#43682
+        if isinstance(self, (DatetimeArray, PeriodArray, TimedeltaArray)):
+            # All of the functions implemented here are ordinal, so we can
+            #  operate on the tz-naive equivalents
+            npvalues = self._ndarray.view("M8[ns]")
+        elif isinstance(self.dtype, StringDtype):
+            # StringArray
+            npvalues = self.to_numpy(object, na_value=np.nan)
+        else:
+            raise NotImplementedError(
+                f"function is not implemented for this dtype: {self.dtype}"
+            )
+
+        res_values = op._cython_op_ndim_compat(
+            npvalues,
+            min_count=min_count,
+            ngroups=ngroups,
+            comp_ids=ids,
+            mask=None,
+            **kwargs,
+        )
+
+        if op.how in op.cast_blocklist:
+            # i.e. how in ["rank"], since other cast_blocklist methods don't go
+            #  through cython_operation
+            return res_values
+
+        if isinstance(self.dtype, StringDtype):
+            dtype = self.dtype
+            string_array_cls = dtype.construct_array_type()
+            return string_array_cls._from_sequence(res_values, dtype=dtype)
+
+        elif isinstance(self, (DatetimeArray, TimedeltaArray, PeriodArray)):
+            # In to_cython_values we took a view as M8[ns]
+            assert res_values.dtype == "M8[ns]"
+            res_values = res_values.view(self._ndarray.dtype)
+            return self._from_backing_data(res_values)
+        else:
+            raise NotImplementedError
+
 
 class ExtensionArraySupportsAnyAll(ExtensionArray):
     def any(self, *, skipna: bool = True) -> bool:
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index f816b30b825b7..ceae5c385967b 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2318,6 +2318,30 @@ def _str_get_dummies(self, sep: str = "|"):
 
         return PandasArray(self.astype(str))._str_get_dummies(sep)
 
+    # ------------------------------------------------------------------------
+    # GroupBy Methods
+
+    def groupby_op(
+        self, op, *, min_count: int, ngroups: int, ids: npt.NDArray[np.intp], **kwargs
+    ):
+        assert op.how == "rank"  # the only one implemented ATM
+        assert self.ordered  # checked earlier
+        mask = self.isna()
+        npvalues = self._ndarray
+
+        res_values = op._cython_op_ndim_compat(
+            npvalues,
+            min_count=min_count,
+            ngroups=ngroups,
+            comp_ids=ids,
+            mask=mask,
+            **kwargs,
+        )
+
+        # If we ever have more than just "rank" here, we'll need to do
+        #  `if op.how in op.cast_blocklist` like we do for other dtypes.
+        return res_values
+
 
 # The Series.cat accessor
 
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 8324d4b2618f1..bf12a9ac01a5a 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -1383,3 +1383,34 @@ def _accumulate(
         data, mask = op(data, mask, skipna=skipna, **kwargs)
 
         return type(self)(data, mask, copy=False)
+
+    # ------------------------------------------------------------------
+    # GroupBy Methods
+
+    def groupby_op(
+        self, op, *, min_count: int, ngroups: int, ids: npt.NDArray[np.intp], **kwargs
+    ):
+        # libgroupby functions are responsible for NOT altering mask
+        mask = self._mask
+        if op.kind != "aggregate":
+            result_mask = mask.copy()
+        else:
+            result_mask = np.zeros(ngroups, dtype=bool)
+
+        res_values = op._cython_op_ndim_compat(
+            self._data,
+            min_count=min_count,
+            ngroups=ngroups,
+            comp_ids=ids,
+            mask=mask,
+            result_mask=result_mask,
+            **kwargs,
+        )
+
+        if op.how == "ohlc":
+            arity = op._cython_arity.get(op.how, 1)
+            result_mask = np.tile(result_mask, (arity, 1)).T
+
+        # res_values should already have the correct dtype, we just need to
+        #  wrap in a MaskedArray
+        return self._maybe_mask_result(res_values, result_mask)
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index bff61ec135d74..6d959bd7a3a79 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -65,18 +65,6 @@
     maybe_fill,
 )
 
-from pandas.core.arrays import (
-    Categorical,
-    DatetimeArray,
-    ExtensionArray,
-    PeriodArray,
-    TimedeltaArray,
-)
-from pandas.core.arrays.masked import (
-    BaseMaskedArray,
-    BaseMaskedDtype,
-)
-from pandas.core.arrays.string_ import StringDtype
 from pandas.core.frame import DataFrame
 from pandas.core.groupby import grouper
 from pandas.core.indexes.api import (
@@ -220,7 +208,7 @@ def _get_cython_vals(self, values: np.ndarray) -> np.ndarray:
         return values
 
     # TODO: general case implementation overridable by EAs.
-    def _disallow_invalid_ops(self, dtype: DtypeObj, is_numeric: bool = False):
+    def _disallow_invalid_ops(self, dtype: DtypeObj):
         """
         Check if we can do this operation with our cython functions.
 
@@ -233,7 +221,7 @@ def _disallow_invalid_ops(self, dtype: DtypeObj, is_numeric: bool = False):
         """
         how = self.how
 
-        if is_numeric:
+        if is_numeric_dtype(dtype):
             # never an invalid op for those dtypes, so return early as fastpath
             return
 
@@ -321,145 +309,6 @@ def _get_result_dtype(self, dtype: np.dtype) -> np.dtype:
                 return np.dtype(np.float64)
         return dtype
 
-    @final
-    def _ea_wrap_cython_operation(
-        self,
-        values: ExtensionArray,
-        min_count: int,
-        ngroups: int,
-        comp_ids: np.ndarray,
-        **kwargs,
-    ) -> ArrayLike:
-        """
-        If we have an ExtensionArray, unwrap, call _cython_operation, and
-        re-wrap if appropriate.
-        """
-        if isinstance(values, BaseMaskedArray):
-            return self._masked_ea_wrap_cython_operation(
-                values,
-                min_count=min_count,
-                ngroups=ngroups,
-                comp_ids=comp_ids,
-                **kwargs,
-            )
-
-        elif isinstance(values, Categorical):
-            assert self.how == "rank"  # the only one implemented ATM
-            assert values.ordered  # checked earlier
-            mask = values.isna()
-            npvalues = values._ndarray
-
-            res_values = self._cython_op_ndim_compat(
-                npvalues,
-                min_count=min_count,
-                ngroups=ngroups,
-                comp_ids=comp_ids,
-                mask=mask,
-                **kwargs,
-            )
-
-            # If we ever have more than just "rank" here, we'll need to do
-            #  `if self.how in self.cast_blocklist` like we do for other dtypes.
-            return res_values
-
-        npvalues = self._ea_to_cython_values(values)
-
-        res_values = self._cython_op_ndim_compat(
-            npvalues,
-            min_count=min_count,
-            ngroups=ngroups,
-            comp_ids=comp_ids,
-            mask=None,
-            **kwargs,
-        )
-
-        if self.how in self.cast_blocklist:
-            # i.e. how in ["rank"], since other cast_blocklist methods don't go
-            #  through cython_operation
-            return res_values
-
-        return self._reconstruct_ea_result(values, res_values)
-
-    # TODO: general case implementation overridable by EAs.
-    def _ea_to_cython_values(self, values: ExtensionArray) -> np.ndarray:
-        # GH#43682
-        if isinstance(values, (DatetimeArray, PeriodArray, TimedeltaArray)):
-            # All of the functions implemented here are ordinal, so we can
-            #  operate on the tz-naive equivalents
-            npvalues = values._ndarray.view("M8[ns]")
-        elif isinstance(values.dtype, StringDtype):
-            # StringArray
-            npvalues = values.to_numpy(object, na_value=np.nan)
-        else:
-            raise NotImplementedError(
-                f"function is not implemented for this dtype: {values.dtype}"
-            )
-        return npvalues
-
-    # TODO: general case implementation overridable by EAs.
-    def _reconstruct_ea_result(
-        self, values: ExtensionArray, res_values: np.ndarray
-    ) -> ExtensionArray:
-        """
-        Construct an ExtensionArray result from an ndarray result.
-        """
-        dtype: BaseMaskedDtype | StringDtype
-
-        if isinstance(values.dtype, StringDtype):
-            dtype = values.dtype
-            string_array_cls = dtype.construct_array_type()
-            return string_array_cls._from_sequence(res_values, dtype=dtype)
-
-        elif isinstance(values, (DatetimeArray, TimedeltaArray, PeriodArray)):
-            # In to_cython_values we took a view as M8[ns]
-            assert res_values.dtype == "M8[ns]"
-            res_values = res_values.view(values._ndarray.dtype)
-            return values._from_backing_data(res_values)
-
-        raise NotImplementedError
-
-    @final
-    def _masked_ea_wrap_cython_operation(
-        self,
-        values: BaseMaskedArray,
-        min_count: int,
-        ngroups: int,
-        comp_ids: np.ndarray,
-        **kwargs,
-    ) -> BaseMaskedArray:
-        """
-        Equivalent of `_ea_wrap_cython_operation`, but optimized for masked EA's
-        and cython algorithms which accept a mask.
-        """
-        orig_values = values
-
-        # libgroupby functions are responsible for NOT altering mask
-        mask = values._mask
-        if self.kind != "aggregate":
-            result_mask = mask.copy()
-        else:
-            result_mask = np.zeros(ngroups, dtype=bool)
-
-        arr = values._data
-
-        res_values = self._cython_op_ndim_compat(
-            arr,
-            min_count=min_count,
-            ngroups=ngroups,
-            comp_ids=comp_ids,
-            mask=mask,
-            result_mask=result_mask,
-            **kwargs,
-        )
-
-        if self.how == "ohlc":
-            arity = self._cython_arity.get(self.how, 1)
-            result_mask = np.tile(result_mask, (arity, 1)).T
-
-        # res_values should already have the correct dtype, we just need to
-        #  wrap in a MaskedArray
-        return orig_values._maybe_mask_result(res_values, result_mask)
-
     @final
     def _cython_op_ndim_compat(
         self,
@@ -614,6 +463,17 @@ def _call_cython_op(
 
         return op_result
 
+    @final
+    def _validate_axis(self, axis: AxisInt, values: ArrayLike) -> None:
+        if values.ndim > 2:
+            raise NotImplementedError("number of dimensions is currently limited to 2")
+        if values.ndim == 2:
+            assert axis == 1, axis
+        elif not is_1d_only_ea_dtype(values.dtype):
+            # Note: it is *not* the case that axis is always 0 for 1-dim values,
+            #  as we can have 1D ExtensionArrays that we need to treat as 2D
+            assert axis == 0
+
     @final
     def cython_operation(
         self,
@@ -628,30 +488,16 @@ def cython_operation(
         """
         Call our cython function, with appropriate pre- and post- processing.
         """
-        if values.ndim > 2:
-            raise NotImplementedError("number of dimensions is currently limited to 2")
-        if values.ndim == 2:
-            assert axis == 1, axis
-        elif not is_1d_only_ea_dtype(values.dtype):
-            # Note: it is *not* the case that axis is always 0 for 1-dim values,
-            #  as we can have 1D ExtensionArrays that we need to treat as 2D
-            assert axis == 0
-
-        dtype = values.dtype
-        is_numeric = is_numeric_dtype(dtype)
+        self._validate_axis(axis, values)
 
         # can we do this operation with our cython functions
         # if not raise NotImplementedError
-        self._disallow_invalid_ops(dtype, is_numeric)
+        self._disallow_invalid_ops(values.dtype)
 
         if not isinstance(values, np.ndarray):
             # i.e. ExtensionArray
-            return self._ea_wrap_cython_operation(
-                values,
-                min_count=min_count,
-                ngroups=ngroups,
-                comp_ids=comp_ids,
-                **kwargs,
+            return values.groupby_op(
+                self, min_count=min_count, ngroups=ngroups, ids=comp_ids, **kwargs
             )
 
         return self._cython_op_ndim_compat(

From 1ea4a723c5e0309f28dc24d049b9e92db5bcb0c2 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 3 Feb 2023 22:09:53 -0800
Subject: [PATCH 2/7] REF: move EA-specific checks to EAs

---
 pandas/core/arrays/base.py         | 16 +--------
 pandas/core/arrays/categorical.py  | 14 ++++++++
 pandas/core/arrays/datetimelike.py | 44 ++++++++++++++++++++++++
 pandas/core/arrays/sparse/array.py |  8 +++++
 pandas/core/groupby/ops.py         | 55 ------------------------------
 5 files changed, 67 insertions(+), 70 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 29d3127fee492..83c818039fda8 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -1694,19 +1694,10 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
     def groupby_op(
         self, op, *, min_count: int, ngroups: int, ids: npt.NDArray[np.intp], **kwargs
     ):
-        from pandas.core.arrays import (
-            DatetimeArray,
-            PeriodArray,
-            TimedeltaArray,
-        )
         from pandas.core.arrays.string_ import StringDtype
 
         # GH#43682
-        if isinstance(self, (DatetimeArray, PeriodArray, TimedeltaArray)):
-            # All of the functions implemented here are ordinal, so we can
-            #  operate on the tz-naive equivalents
-            npvalues = self._ndarray.view("M8[ns]")
-        elif isinstance(self.dtype, StringDtype):
+        if isinstance(self.dtype, StringDtype):
             # StringArray
             npvalues = self.to_numpy(object, na_value=np.nan)
         else:
@@ -1733,11 +1724,6 @@ def groupby_op(
             string_array_cls = dtype.construct_array_type()
             return string_array_cls._from_sequence(res_values, dtype=dtype)
 
-        elif isinstance(self, (DatetimeArray, TimedeltaArray, PeriodArray)):
-            # In to_cython_values we took a view as M8[ns]
-            assert res_values.dtype == "M8[ns]"
-            res_values = res_values.view(self._ndarray.dtype)
-            return self._from_backing_data(res_values)
         else:
             raise NotImplementedError
 
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index ceae5c385967b..e12562d334e01 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2324,6 +2324,20 @@ def _str_get_dummies(self, sep: str = "|"):
     def groupby_op(
         self, op, *, min_count: int, ngroups: int, ids: npt.NDArray[np.intp], **kwargs
     ):
+
+        how = op.how
+        dtype = self.dtype
+        if how in ["sum", "prod", "cumsum", "cumprod"]:
+            raise TypeError(f"{dtype} type does not support {how} operations")
+        if how in ["min", "max", "rank"] and not dtype.ordered:
+            # raise TypeError instead of NotImplementedError to ensure we
+            #  don't go down a group-by-group path, since in the empty-groups
+            #  case that would fail to raise
+            raise TypeError(f"Cannot perform {how} with non-ordered Categorical")
+        if how not in ["rank"]:
+            # only "rank" is implemented in cython
+            raise NotImplementedError(f"{dtype} dtype not supported")
+
         assert op.how == "rank"  # the only one implemented ATM
         assert self.ordered  # checked earlier
         mask = self.isna()
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index 4b26528e6661c..03c525316ed66 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -1573,6 +1573,50 @@ def _mode(self, dropna: bool = True):
         npmodes = cast(np.ndarray, npmodes)
         return self._from_backing_data(npmodes)
 
+    # ------------------------------------------------------------------
+    # GroupBy Methods
+
+    def groupby_op(
+        self, op, *, min_count: int, ngroups: int, ids: npt.NDArray[np.intp], **kwargs
+    ):
+        dtype = self.dtype
+        how = op.how
+        if dtype.kind == "M":
+            # Adding/multiplying datetimes is not valid
+            if how in ["sum", "prod", "cumsum", "cumprod"]:
+                raise TypeError(f"datetime64 type does not support {how} operations")
+        elif is_period_dtype(dtype):
+            # Adding/multiplying Periods is not valid
+            if how in ["sum", "prod", "cumsum", "cumprod"]:
+                raise TypeError(f"Period type does not support {how} operations")
+        else:
+            # timedeltas we can add but not multiply
+            if how in ["prod", "cumprod"]:
+                raise TypeError(f"timedelta64 type does not support {how} operations")
+
+        # All of the functions implemented here are ordinal, so we can
+        #  operate on the tz-naive equivalents
+        npvalues = self._ndarray.view("M8[ns]")
+
+        res_values = op._cython_op_ndim_compat(
+            npvalues,
+            min_count=min_count,
+            ngroups=ngroups,
+            comp_ids=ids,
+            mask=None,
+            **kwargs,
+        )
+
+        if op.how in op.cast_blocklist:
+            # i.e. how in ["rank"], since other cast_blocklist methods don't go
+            #  through cython_operation
+            return res_values
+
+        # We did a view to M8[ns] above, now we go the other direction
+        assert res_values.dtype == "M8[ns]"
+        res_values = res_values.view(self._ndarray.dtype)
+        return self._from_backing_data(res_values)
+
 
 class DatelikeOps(DatetimeLikeArrayMixin):
     """
diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
index c8c33d3f52102..dd192e951ee7e 100644
--- a/pandas/core/arrays/sparse/array.py
+++ b/pandas/core/arrays/sparse/array.py
@@ -1814,6 +1814,14 @@ def _formatter(self, boxed: bool = False):
         # This will infer the correct formatter from the dtype of the values.
         return None
 
+    # ------------------------------------------------------------------------
+    # GroupBy Methods
+
+    def groupby_op(
+        self, op, *, min_count: int, ngroups: int, ids: npt.NDArray[np.intp], **kwargs
+    ):
+        raise NotImplementedError(f"{self.dtype} dtype not supported")
+
 
 def _make_sparse(
     arr: np.ndarray,
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 6d959bd7a3a79..a1814e811735f 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -30,7 +30,6 @@
 from pandas._typing import (
     ArrayLike,
     AxisInt,
-    DtypeObj,
     NDFrameT,
     Shape,
     npt,
@@ -50,16 +49,11 @@
     is_1d_only_ea_dtype,
     is_bool_dtype,
     is_complex_dtype,
-    is_datetime64_any_dtype,
     is_float_dtype,
     is_integer_dtype,
     is_numeric_dtype,
-    is_period_dtype,
-    is_sparse,
-    is_timedelta64_dtype,
     needs_i8_conversion,
 )
-from pandas.core.dtypes.dtypes import CategoricalDtype
 from pandas.core.dtypes.missing import (
     isna,
     maybe_fill,
@@ -207,51 +201,6 @@ def _get_cython_vals(self, values: np.ndarray) -> np.ndarray:
 
         return values
 
-    # TODO: general case implementation overridable by EAs.
-    def _disallow_invalid_ops(self, dtype: DtypeObj):
-        """
-        Check if we can do this operation with our cython functions.
-
-        Raises
-        ------
-        TypeError
-            This is not a valid operation for this dtype.
-        NotImplementedError
-            This may be a valid operation, but does not have a cython implementation.
-        """
-        how = self.how
-
-        if is_numeric_dtype(dtype):
-            # never an invalid op for those dtypes, so return early as fastpath
-            return
-
-        if isinstance(dtype, CategoricalDtype):
-            if how in ["sum", "prod", "cumsum", "cumprod"]:
-                raise TypeError(f"{dtype} type does not support {how} operations")
-            if how in ["min", "max", "rank"] and not dtype.ordered:
-                # raise TypeError instead of NotImplementedError to ensure we
-                #  don't go down a group-by-group path, since in the empty-groups
-                #  case that would fail to raise
-                raise TypeError(f"Cannot perform {how} with non-ordered Categorical")
-            if how not in ["rank"]:
-                # only "rank" is implemented in cython
-                raise NotImplementedError(f"{dtype} dtype not supported")
-
-        elif is_sparse(dtype):
-            raise NotImplementedError(f"{dtype} dtype not supported")
-        elif is_datetime64_any_dtype(dtype):
-            # Adding/multiplying datetimes is not valid
-            if how in ["sum", "prod", "cumsum", "cumprod"]:
-                raise TypeError(f"datetime64 type does not support {how} operations")
-        elif is_period_dtype(dtype):
-            # Adding/multiplying Periods is not valid
-            if how in ["sum", "prod", "cumsum", "cumprod"]:
-                raise TypeError(f"Period type does not support {how} operations")
-        elif is_timedelta64_dtype(dtype):
-            # timedeltas we can add but not multiply
-            if how in ["prod", "cumprod"]:
-                raise TypeError(f"timedelta64 type does not support {how} operations")
-
     def _get_output_shape(self, ngroups: int, values: np.ndarray) -> Shape:
         how = self.how
         kind = self.kind
@@ -490,10 +439,6 @@ def cython_operation(
         """
         self._validate_axis(axis, values)
 
-        # can we do this operation with our cython functions
-        # if not raise NotImplementedError
-        self._disallow_invalid_ops(values.dtype)
-
         if not isinstance(values, np.ndarray):
             # i.e. ExtensionArray
             return values.groupby_op(

From 8645cf199733dace07875da7098c3c7466b5ea58 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sat, 4 Feb 2023 12:45:06 -0800
Subject: [PATCH 3/7] REF: dont pass op to groupby_op

---
 pandas/core/arrays/base.py         | 13 ++++++++++++-
 pandas/core/arrays/categorical.py  | 16 +++++++++++++---
 pandas/core/arrays/datetimelike.py | 15 +++++++++++++--
 pandas/core/arrays/masked.py       | 14 +++++++++++++-
 pandas/core/groupby/ops.py         | 13 ++++++++++++-
 5 files changed, 63 insertions(+), 8 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 83c818039fda8..378b3b83899e6 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -1692,9 +1692,20 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
     # GroupBy Methods
 
     def groupby_op(
-        self, op, *, min_count: int, ngroups: int, ids: npt.NDArray[np.intp], **kwargs
+        self,
+        *,
+        how: str,
+        has_dropped_na: bool,
+        min_count: int,
+        ngroups: int,
+        ids: npt.NDArray[np.intp],
+        **kwargs,
     ):
         from pandas.core.arrays.string_ import StringDtype
+        from pandas.core.groupby.ops import WrappedCythonOp
+
+        kind = WrappedCythonOp.get_kind_from_how(how)
+        op = WrappedCythonOp(how=how, kind=kind, has_dropped_na=has_dropped_na)
 
         # GH#43682
         if isinstance(self.dtype, StringDtype):
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index e12562d334e01..4f0277f10ca43 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2322,10 +2322,20 @@ def _str_get_dummies(self, sep: str = "|"):
     # GroupBy Methods
 
     def groupby_op(
-        self, op, *, min_count: int, ngroups: int, ids: npt.NDArray[np.intp], **kwargs
+        self,
+        *,
+        how: str,
+        has_dropped_na: bool,
+        min_count: int,
+        ngroups: int,
+        ids: npt.NDArray[np.intp],
+        **kwargs,
     ):
+        from pandas.core.groupby.ops import WrappedCythonOp
+
+        kind = WrappedCythonOp.get_kind_from_how(how)
+        op = WrappedCythonOp(how=how, kind=kind, has_dropped_na=has_dropped_na)
 
-        how = op.how
         dtype = self.dtype
         if how in ["sum", "prod", "cumsum", "cumprod"]:
             raise TypeError(f"{dtype} type does not support {how} operations")
@@ -2338,7 +2348,7 @@ def groupby_op(
             # only "rank" is implemented in cython
             raise NotImplementedError(f"{dtype} dtype not supported")
 
-        assert op.how == "rank"  # the only one implemented ATM
+        assert how == "rank"  # the only one implemented ATM
         assert self.ordered  # checked earlier
         mask = self.isna()
         npvalues = self._ndarray
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index 03c525316ed66..0557361906da5 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -1577,10 +1577,16 @@ def _mode(self, dropna: bool = True):
     # GroupBy Methods
 
     def groupby_op(
-        self, op, *, min_count: int, ngroups: int, ids: npt.NDArray[np.intp], **kwargs
+        self,
+        *,
+        how: str,
+        has_dropped_na: bool,
+        min_count: int,
+        ngroups: int,
+        ids: npt.NDArray[np.intp],
+        **kwargs,
     ):
         dtype = self.dtype
-        how = op.how
         if dtype.kind == "M":
             # Adding/multiplying datetimes is not valid
             if how in ["sum", "prod", "cumsum", "cumprod"]:
@@ -1598,6 +1604,11 @@ def groupby_op(
         #  operate on the tz-naive equivalents
         npvalues = self._ndarray.view("M8[ns]")
 
+        from pandas.core.groupby.ops import WrappedCythonOp
+
+        kind = WrappedCythonOp.get_kind_from_how(how)
+        op = WrappedCythonOp(how=how, kind=kind, has_dropped_na=has_dropped_na)
+
         res_values = op._cython_op_ndim_compat(
             npvalues,
             min_count=min_count,
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index bf12a9ac01a5a..0a2e99abac13a 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -1388,8 +1388,20 @@ def _accumulate(
     # GroupBy Methods
 
     def groupby_op(
-        self, op, *, min_count: int, ngroups: int, ids: npt.NDArray[np.intp], **kwargs
+        self,
+        *,
+        how: str,
+        has_dropped_na: bool,
+        min_count: int,
+        ngroups: int,
+        ids: npt.NDArray[np.intp],
+        **kwargs,
     ):
+        from pandas.core.groupby.ops import WrappedCythonOp
+
+        kind = WrappedCythonOp.get_kind_from_how(how)
+        op = WrappedCythonOp(how=how, kind=kind, has_dropped_na=has_dropped_na)
+
         # libgroupby functions are responsible for NOT altering mask
         mask = self._mask
         if op.kind != "aggregate":
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index a1814e811735f..119252d9b5dcc 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -128,6 +128,12 @@ def __init__(self, kind: str, how: str, has_dropped_na: bool) -> None:
 
     _cython_arity = {"ohlc": 4}  # OHLC
 
+    @classmethod
+    def get_kind_from_how(cls, how: str) -> str:
+        if how in cls._CYTHON_FUNCTIONS["aggregate"]:
+            return "aggregate"
+        return "transform"
+
     # Note: we make this a classmethod and pass kind+how so that caching
     #  works at the class level and not the instance level
     @classmethod
@@ -442,7 +448,12 @@ def cython_operation(
         if not isinstance(values, np.ndarray):
             # i.e. ExtensionArray
             return values.groupby_op(
-                self, min_count=min_count, ngroups=ngroups, ids=comp_ids, **kwargs
+                how=self.how,
+                has_dropped_na=self.has_dropped_na,
+                min_count=min_count,
+                ngroups=ngroups,
+                ids=comp_ids,
+                **kwargs,
             )
 
         return self._cython_op_ndim_compat(

From ea40255c9d6b8136d27fd755e719103d94c28317 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sat, 4 Feb 2023 15:20:22 -0800
Subject: [PATCH 4/7] mypy fixup

---
 pandas/core/arrays/sparse/array.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
index dd192e951ee7e..787c2d78561f2 100644
--- a/pandas/core/arrays/sparse/array.py
+++ b/pandas/core/arrays/sparse/array.py
@@ -1818,7 +1818,14 @@ def _formatter(self, boxed: bool = False):
     # GroupBy Methods
 
     def groupby_op(
-        self, op, *, min_count: int, ngroups: int, ids: npt.NDArray[np.intp], **kwargs
+        self,
+        *,
+        how: str,
+        has_dropped_na: bool,
+        min_count: int,
+        ngroups: int,
+        ids: npt.NDArray[np.intp],
+        **kwargs,
     ):
         raise NotImplementedError(f"{self.dtype} dtype not supported")
 

From a2e7e64e3e8433bdb3998dfa42f683f91e9b0a6a Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 6 Feb 2023 15:47:48 -0800
Subject: [PATCH 5/7] groupby_op -> _groupby_op

---
 pandas/core/arrays/base.py         | 29 +++++++++++++++++++++++++++--
 pandas/core/arrays/categorical.py  |  2 +-
 pandas/core/arrays/datetimelike.py |  2 +-
 pandas/core/arrays/masked.py       |  2 +-
 pandas/core/arrays/sparse/array.py |  2 +-
 pandas/core/groupby/ops.py         |  2 +-
 6 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 378b3b83899e6..37ae7f2c8e2f8 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -1691,7 +1691,7 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
     # ------------------------------------------------------------------------
     # GroupBy Methods
 
-    def groupby_op(
+    def _groupby_op(
         self,
         *,
         how: str,
@@ -1700,7 +1700,32 @@ def groupby_op(
         ngroups: int,
         ids: npt.NDArray[np.intp],
         **kwargs,
-    ):
+    ) -> ArrayLike:
+        """
+        Dispatch GroupBy reduction or transformation operation.
+
+        This is an *experimental* API to allow ExtensionArray authors to implement
+        reductions and transformations. The API is subject to change.
+
+        Parameters
+        ----------
+        how : {'sum', 'prod', 'min', 'max', 'mean', 'median',
+               'median', 'var', 'nth', 'last', 'ohlc',
+               'cumprod', 'cumsum', 'cummin', 'cummax', 'rank'}
+        has_dropped_na : bool
+        min_count : int
+        ngroups : int
+        ids : np.ndarray[np.intp]
+            ids[i] gives the integer label for the group that self[i] belongs to.
+        **kwargs : operation-specific
+            'var' -> ['ddof']
+            'cumprod', 'cumsum', 'cummin', 'cummax' -> ['skipna']
+            'rank' -> ['ties_method', 'ascending', 'na_option', 'pct']
+
+        Returns
+        -------
+        np.ndarray or ExtensionArray
+        """
         from pandas.core.arrays.string_ import StringDtype
         from pandas.core.groupby.ops import WrappedCythonOp
 
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 4f0277f10ca43..a94f2708014ea 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2321,7 +2321,7 @@ def _str_get_dummies(self, sep: str = "|"):
     # ------------------------------------------------------------------------
     # GroupBy Methods
 
-    def groupby_op(
+    def _groupby_op(
         self,
         *,
         how: str,
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index 0557361906da5..a4b6c0cff790f 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -1576,7 +1576,7 @@ def _mode(self, dropna: bool = True):
     # ------------------------------------------------------------------
     # GroupBy Methods
 
-    def groupby_op(
+    def _groupby_op(
         self,
         *,
         how: str,
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 0a2e99abac13a..f03846de1e9c2 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -1387,7 +1387,7 @@ def _accumulate(
     # ------------------------------------------------------------------
     # GroupBy Methods
 
-    def groupby_op(
+    def _groupby_op(
         self,
         *,
         how: str,
diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
index 787c2d78561f2..7a3e1272d98aa 100644
--- a/pandas/core/arrays/sparse/array.py
+++ b/pandas/core/arrays/sparse/array.py
@@ -1817,7 +1817,7 @@ def _formatter(self, boxed: bool = False):
     # ------------------------------------------------------------------------
     # GroupBy Methods
 
-    def groupby_op(
+    def _groupby_op(
         self,
         *,
         how: str,
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 119252d9b5dcc..0f7fa4806e0ec 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -447,7 +447,7 @@ def cython_operation(
 
         if not isinstance(values, np.ndarray):
             # i.e. ExtensionArray
-            return values.groupby_op(
+            return values._groupby_op(
                 how=self.how,
                 has_dropped_na=self.has_dropped_na,
                 min_count=min_count,

From d80c0b2a32c1f4c8c7d5963cb9b7a28be03b9544 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 22 Mar 2023 15:35:41 -0700
Subject: [PATCH 6/7] mypy fixup

---
 pandas/core/arrays/base.py        | 7 ++++---
 pandas/core/arrays/categorical.py | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 0a8b0382c243d..51e7785c178ac 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -1743,8 +1743,8 @@ def _groupby_op(
 
         Parameters
         ----------
-        how : {'sum', 'prod', 'min', 'max', 'mean', 'median',
-               'median', 'var', 'nth', 'last', 'ohlc',
+        how : {'any', 'all', 'sum', 'prod', 'min', 'max', 'mean', 'median',
+               'median', 'var', 'std', 'sem', 'nth', 'last', 'ohlc',
                'cumprod', 'cumsum', 'cummin', 'cummax', 'rank'}
         has_dropped_na : bool
         min_count : int
@@ -1752,7 +1752,8 @@ def _groupby_op(
         ids : np.ndarray[np.intp]
             ids[i] gives the integer label for the group that self[i] belongs to.
         **kwargs : operation-specific
-            'var' -> ['ddof']
+            'any', 'all' -> ['skipna']
+            'var', 'std', 'sem' -> ['ddof']
             'cumprod', 'cumsum', 'cummin', 'cummax' -> ['skipna']
             'rank' -> ['ties_method', 'ascending', 'na_option', 'pct']
 
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 66c4fa8b8e195..c0179642461aa 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2388,7 +2388,7 @@ def _groupby_op(
             npvalues = self._ndarray
         elif how in ["first", "last", "min", "max"]:
             npvalues = self._ndarray
-            result_mask = np.zeros(ngroups, dtype=np.uint8)
+            result_mask = np.zeros(ngroups, dtype=bool)
         else:
             # any/all
             npvalues = self.astype(bool)

From fab8725b37b9b152d7f2920f16e1f36f2cf4687b Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 31 Mar 2023 13:28:56 -0700
Subject: [PATCH 7/7] mypy fixup

---
 pandas/core/dtypes/concat.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index d54b37c06c6bd..1ec7e717839bf 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -79,7 +79,12 @@ def concat_compat(
             # e.g. DatetimeArray
             # NB: We are assuming here that ensure_wrapped_if_arraylike has
             #  been called where relevant.
-            return obj._concat_same_type(to_concat_eas, axis=axis)
+            return obj._concat_same_type(
+                # error: Unexpected keyword argument "axis" for "_concat_same_type"
+                # of "ExtensionArray"
+                to_concat_eas,
+                axis=axis,  # type: ignore[call-arg]
+            )
 
     # filter empty arrays
     # 1-d dtypes always are included here