From 777c35c83e7ad35fa8f665497515be633c109b45 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 1 Feb 2023 11:08:27 -0800
Subject: [PATCH 1/6] REF: implement groupby_any_all, groupby_std

---
 pandas/core/arrays/base.py     | 181 ++++++++++++++++++++++++++++++++-
 pandas/core/groupby/groupby.py |  18 +++-
 2 files changed, 195 insertions(+), 4 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index c261a41e1e77e..0363a749572d7 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -8,6 +8,7 @@
 """
 from __future__ import annotations
 
+from functools import partial
 import operator
 from typing import (
     TYPE_CHECKING,
@@ -24,7 +25,10 @@
 
 import numpy as np
 
-from pandas._libs import lib
+from pandas._libs import (
+    groupby as libgroupby,
+    lib,
+)
 from pandas._typing import (
     ArrayLike,
     AstypeArg,
@@ -58,6 +62,7 @@
     is_datetime64_dtype,
     is_dtype_equal,
     is_list_like,
+    is_object_dtype,
     is_scalar,
     is_timedelta64_dtype,
     pandas_dtype,
@@ -1688,6 +1693,180 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
 
         return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs)
 
+    # ------------------------------------------------------------------------
+    # GroupBy Methods
+
+    def groupby_any_all(
+        self, *, ngroups: int, ids: npt.NDArray[np.intp], val_test: str, skipna: bool
+    ):
+        values = self
+        how = "any_all"
+        base_func = libgroupby.group_any_all
+        cython_dtype = np.dtype(np.int8)
+        needs_counts = False
+
+        from pandas.core.arrays import (
+            BaseMaskedArray,
+            BooleanArray,
+        )
+
+        def pre_processing(vals: ArrayLike) -> tuple[np.ndarray, type]:
+            if is_object_dtype(vals.dtype) and skipna:
+                # GH#37501: don't raise on pd.NA when skipna=True
+                mask = isna(vals)
+                if mask.any():
+                    # mask on original values computed separately
+                    vals = vals.copy()
+                    vals[mask] = True
+            elif isinstance(vals, BaseMaskedArray):
+                vals = vals._data
+            vals = vals.astype(bool, copy=False)
+            return vals.view(np.int8), bool
+
+        def post_processing(
+            result: np.ndarray,
+            inference: type,
+            nullable: bool = False,
+        ) -> ArrayLike:
+            if nullable:
+                return BooleanArray(result.astype(bool, copy=False), result == -1)
+            else:
+                return result.astype(inference, copy=False)
+
+        values = values.T
+        ncols = 1 if values.ndim == 1 else values.shape[1]
+
+        result: ArrayLike
+        result = np.zeros(ngroups * ncols, dtype=cython_dtype)
+        result = result.reshape((ngroups, ncols))
+
+        func = partial(base_func, out=result, labels=ids)
+
+        inferences = None
+
+        if needs_counts:
+            counts = np.zeros(ngroups, dtype=np.int64)
+            func = partial(func, counts=counts)
+
+        vals = values
+        if pre_processing:
+            vals, inferences = pre_processing(vals)
+
+        vals = vals.astype(cython_dtype, copy=False)
+        if vals.ndim == 1:
+            vals = vals.reshape((-1, 1))
+        func = partial(func, values=vals)
+
+        if how != "std" or isinstance(values, BaseMaskedArray):
+            mask = isna(values).view(np.uint8)
+            if mask.ndim == 1:
+                mask = mask.reshape(-1, 1)
+            func = partial(func, mask=mask)
+
+        if how != "std":
+            is_nullable = isinstance(values, BaseMaskedArray)
+            func = partial(func, nullable=is_nullable)
+
+        elif isinstance(values, BaseMaskedArray):
+            result_mask = np.zeros(result.shape, dtype=np.bool_)
+            func = partial(func, result_mask=result_mask)
+
+        func(val_test=val_test, skipna=skipna)  # Call func to modify result in place
+
+        if values.ndim == 1:
+            assert result.shape[1] == 1, result.shape
+            result = result[:, 0]
+
+        if post_processing:
+            pp_kwargs: dict[str, bool | np.ndarray] = {}
+            pp_kwargs["nullable"] = isinstance(values, BaseMaskedArray)
+            if how == "std" and pp_kwargs["nullable"]:
+                pp_kwargs["result_mask"] = result_mask
+
+            result = post_processing(result, inferences, **pp_kwargs)
+
+        return result.T
+
+    def groupby_std(self, *, ngroups: int, ids: npt.NDArray[np.intp], ddof: int):
+        values = self
+        how = "std"
+        base_func = libgroupby.group_var
+        cython_dtype = np.dtype(np.float64)
+        needs_counts = True
+
+        from pandas.core.arrays import (
+            BaseMaskedArray,
+            FloatingArray,
+        )
+
+        def pre_processing(values):
+            if isinstance(values, BaseMaskedArray):
+                return values._data, None
+            return values, None
+
+        def post_processing(
+            vals, inference, nullable: bool = False, result_mask=None
+        ) -> ArrayLike:
+            if nullable:
+                if result_mask.ndim == 2:
+                    result_mask = result_mask[:, 0]
+                return FloatingArray(np.sqrt(vals), result_mask.view(np.bool_))
+            return np.sqrt(vals)
+
+        values = values.T
+        ncols = 1 if values.ndim == 1 else values.shape[1]
+
+        result: ArrayLike
+        result = np.zeros(ngroups * ncols, dtype=cython_dtype)
+        result = result.reshape((ngroups, ncols))
+
+        func = partial(base_func, out=result, labels=ids)
+
+        inferences = None
+
+        if needs_counts:
+            counts = np.zeros(ngroups, dtype=np.int64)
+            func = partial(func, counts=counts)
+
+        vals = values
+        if pre_processing:
+            vals, inferences = pre_processing(vals)
+
+        vals = vals.astype(cython_dtype, copy=False)
+        if vals.ndim == 1:
+            vals = vals.reshape((-1, 1))
+        func = partial(func, values=vals)
+
+        if how != "std" or isinstance(values, BaseMaskedArray):
+            mask = isna(values).view(np.uint8)
+            if mask.ndim == 1:
+                mask = mask.reshape(-1, 1)
+            func = partial(func, mask=mask)
+
+        if how != "std":
+            is_nullable = isinstance(values, BaseMaskedArray)
+            func = partial(func, nullable=is_nullable)
+
+        elif isinstance(values, BaseMaskedArray):
+            result_mask = np.zeros(result.shape, dtype=np.bool_)
+            func = partial(func, result_mask=result_mask)
+
+        func(ddof=ddof)  # Call func to modify result in place
+
+        if values.ndim == 1:
+            assert result.shape[1] == 1, result.shape
+            result = result[:, 0]
+
+        if post_processing:
+            pp_kwargs: dict[str, bool | np.ndarray] = {}
+            pp_kwargs["nullable"] = isinstance(values, BaseMaskedArray)
+            if how == "std" and pp_kwargs["nullable"]:
+                pp_kwargs["result_mask"] = result_mask
+
+            result = post_processing(result, inferences, **pp_kwargs)
+
+        return result.T
+
 
 class ExtensionArraySupportsAnyAll(ExtensionArray):
     def any(self, *, skipna: bool = True) -> bool:
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 696f924e31179..c8cd5835f11d0 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -3719,9 +3719,21 @@ def _get_cythonized_result(
 
         ids, _, ngroups = grouper.group_info
 
-        base_func = partial(base_func, labels=ids)
-
         def blk_func(values: ArrayLike) -> ArrayLike:
+
+            if isinstance(values, ExtensionArray):
+                if how == "std":
+                    return values.groupby_std(
+                        ngroups=ngroups, ids=ids, ddof=kwargs["ddof"]
+                    )
+                elif how == "any_all":
+                    return values.groupby_any_all(
+                        ngroups=ngroups,
+                        ids=ids,
+                        skipna=kwargs["skipna"],
+                        val_test=kwargs["val_test"],
+                    )
+
             values = values.T
             ncols = 1 if values.ndim == 1 else values.shape[1]
 
@@ -3729,7 +3741,7 @@ def blk_func(values: ArrayLike) -> ArrayLike:
             result = np.zeros(ngroups * ncols, dtype=cython_dtype)
             result = result.reshape((ngroups, ncols))
 
-            func = partial(base_func, out=result)
+            func = partial(base_func, out=result, labels=ids)
 
             inferences = None
 

From cfa3bfd612095be67841e514fa40310997db2945 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 1 Feb 2023 14:18:17 -0800
Subject: [PATCH 2/6] Implement MaskedArray methods

---
 pandas/_libs/groupby.pyi       |   1 +
 pandas/core/arrays/base.py     | 195 +++++++++------------------------
 pandas/core/arrays/masked.py   |  62 +++++++++++
 pandas/core/groupby/groupby.py |  43 +-------
 4 files changed, 118 insertions(+), 183 deletions(-)

diff --git a/pandas/_libs/groupby.pyi b/pandas/_libs/groupby.pyi
index 0f72b2b72141f..cd87c406d7f32 100644
--- a/pandas/_libs/groupby.pyi
+++ b/pandas/_libs/groupby.pyi
@@ -55,6 +55,7 @@ def group_any_all(
     mask: np.ndarray,  # const uint8_t[::1]
     val_test: Literal["any", "all"],
     skipna: bool,
+    nullable: bool = ...,
 ) -> None: ...
 def group_sum(
     out: np.ndarray,  # complexfloatingintuint_t[:, ::1]
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 0363a749572d7..d2b154c298b6f 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -8,7 +8,6 @@
 """
 from __future__ import annotations
 
-from functools import partial
 import operator
 from typing import (
     TYPE_CHECKING,
@@ -1697,174 +1696,78 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
     # GroupBy Methods
 
     def groupby_any_all(
-        self, *, ngroups: int, ids: npt.NDArray[np.intp], val_test: str, skipna: bool
+        self,
+        *,
+        ngroups: int,
+        ids: npt.NDArray[np.intp],
+        val_test: Literal["any", "all"],
+        skipna: bool,
     ):
-        values = self
-        how = "any_all"
-        base_func = libgroupby.group_any_all
-        cython_dtype = np.dtype(np.int8)
-        needs_counts = False
-
-        from pandas.core.arrays import (
-            BaseMaskedArray,
-            BooleanArray,
-        )
-
-        def pre_processing(vals: ArrayLike) -> tuple[np.ndarray, type]:
-            if is_object_dtype(vals.dtype) and skipna:
-                # GH#37501: don't raise on pd.NA when skipna=True
-                mask = isna(vals)
-                if mask.any():
-                    # mask on original values computed separately
-                    vals = vals.copy()
-                    vals[mask] = True
-            elif isinstance(vals, BaseMaskedArray):
-                vals = vals._data
-            vals = vals.astype(bool, copy=False)
-            return vals.view(np.int8), bool
-
-        def post_processing(
-            result: np.ndarray,
-            inference: type,
-            nullable: bool = False,
-        ) -> ArrayLike:
-            if nullable:
-                return BooleanArray(result.astype(bool, copy=False), result == -1)
-            else:
-                return result.astype(inference, copy=False)
-
-        values = values.T
+        values = self.T
         ncols = 1 if values.ndim == 1 else values.shape[1]
 
-        result: ArrayLike
-        result = np.zeros(ngroups * ncols, dtype=cython_dtype)
-        result = result.reshape((ngroups, ncols))
-
-        func = partial(base_func, out=result, labels=ids)
-
-        inferences = None
+        result = np.zeros((ngroups, ncols), dtype=np.int8)
 
-        if needs_counts:
-            counts = np.zeros(ngroups, dtype=np.int64)
-            func = partial(func, counts=counts)
+        obj = values
+        if is_object_dtype(self.dtype) and skipna:
+            # GH#37501: don't raise on pd.NA when skipna=True
+            mask = self.isna()
+            if mask.any():
+                # mask on original values computed separately
+                obj = obj.copy()
+                obj[mask] = True
 
-        vals = values
-        if pre_processing:
-            vals, inferences = pre_processing(vals)
-
-        vals = vals.astype(cython_dtype, copy=False)
+        vals = obj.astype(bool, copy=False)
+        vals = vals.view(np.int8)
         if vals.ndim == 1:
-            vals = vals.reshape((-1, 1))
-        func = partial(func, values=vals)
-
-        if how != "std" or isinstance(values, BaseMaskedArray):
-            mask = isna(values).view(np.uint8)
-            if mask.ndim == 1:
-                mask = mask.reshape(-1, 1)
-            func = partial(func, mask=mask)
-
-        if how != "std":
-            is_nullable = isinstance(values, BaseMaskedArray)
-            func = partial(func, nullable=is_nullable)
-
-        elif isinstance(values, BaseMaskedArray):
-            result_mask = np.zeros(result.shape, dtype=np.bool_)
-            func = partial(func, result_mask=result_mask)
-
-        func(val_test=val_test, skipna=skipna)  # Call func to modify result in place
+            vals = vals.reshape(-1, 1)
+
+        mask = isna(values).view(np.uint8)
+        if mask.ndim == 1:
+            mask = mask.reshape(-1, 1)
+
+        # Call func to modify result in place
+        libgroupby.group_any_all(
+            out=result,
+            labels=ids,
+            values=vals,
+            val_test=val_test,
+            skipna=skipna,
+            mask=mask,
+            nullable=False,
+        )
 
         if values.ndim == 1:
             assert result.shape[1] == 1, result.shape
             result = result[:, 0]
 
-        if post_processing:
-            pp_kwargs: dict[str, bool | np.ndarray] = {}
-            pp_kwargs["nullable"] = isinstance(values, BaseMaskedArray)
-            if how == "std" and pp_kwargs["nullable"]:
-                pp_kwargs["result_mask"] = result_mask
-
-            result = post_processing(result, inferences, **pp_kwargs)
-
+        result.astype(bool, copy=False)
         return result.T
 
     def groupby_std(self, *, ngroups: int, ids: npt.NDArray[np.intp], ddof: int):
-        values = self
-        how = "std"
-        base_func = libgroupby.group_var
         cython_dtype = np.dtype(np.float64)
-        needs_counts = True
 
-        from pandas.core.arrays import (
-            BaseMaskedArray,
-            FloatingArray,
-        )
-
-        def pre_processing(values):
-            if isinstance(values, BaseMaskedArray):
-                return values._data, None
-            return values, None
-
-        def post_processing(
-            vals, inference, nullable: bool = False, result_mask=None
-        ) -> ArrayLike:
-            if nullable:
-                if result_mask.ndim == 2:
-                    result_mask = result_mask[:, 0]
-                return FloatingArray(np.sqrt(vals), result_mask.view(np.bool_))
-            return np.sqrt(vals)
-
-        values = values.T
-        ncols = 1 if values.ndim == 1 else values.shape[1]
-
-        result: ArrayLike
-        result = np.zeros(ngroups * ncols, dtype=cython_dtype)
-        result = result.reshape((ngroups, ncols))
+        ncols = 1 if self.ndim == 1 else self.shape[0]
 
-        func = partial(base_func, out=result, labels=ids)
+        result = np.zeros((ngroups, ncols), dtype=cython_dtype)
+        counts = np.zeros(ngroups, dtype=np.int64)
 
-        inferences = None
-
-        if needs_counts:
-            counts = np.zeros(ngroups, dtype=np.int64)
-            func = partial(func, counts=counts)
-
-        vals = values
-        if pre_processing:
-            vals, inferences = pre_processing(vals)
-
-        vals = vals.astype(cython_dtype, copy=False)
-        if vals.ndim == 1:
-            vals = vals.reshape((-1, 1))
-        func = partial(func, values=vals)
+        vals = self.astype(cython_dtype, copy=False)
 
-        if how != "std" or isinstance(values, BaseMaskedArray):
-            mask = isna(values).view(np.uint8)
-            if mask.ndim == 1:
-                mask = mask.reshape(-1, 1)
-            func = partial(func, mask=mask)
-
-        if how != "std":
-            is_nullable = isinstance(values, BaseMaskedArray)
-            func = partial(func, nullable=is_nullable)
-
-        elif isinstance(values, BaseMaskedArray):
-            result_mask = np.zeros(result.shape, dtype=np.bool_)
-            func = partial(func, result_mask=result_mask)
-
-        func(ddof=ddof)  # Call func to modify result in place
+        # Call func to modify result in place
+        libgroupby.group_var(
+            out=result,
+            labels=ids,
+            values=np.atleast_2d(vals).T,
+            counts=counts,
+            ddof=ddof,
+        )
 
-        if values.ndim == 1:
+        if self.ndim == 1:
             assert result.shape[1] == 1, result.shape
             result = result[:, 0]
 
-        if post_processing:
-            pp_kwargs: dict[str, bool | np.ndarray] = {}
-            pp_kwargs["nullable"] = isinstance(values, BaseMaskedArray)
-            if how == "std" and pp_kwargs["nullable"]:
-                pp_kwargs["result_mask"] = result_mask
-
-            result = post_processing(result, inferences, **pp_kwargs)
-
+        result = np.sqrt(result)
         return result.T
 
 
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 8324d4b2618f1..205c8ef0ee581 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -14,6 +14,7 @@
 import numpy as np
 
 from pandas._libs import (
+    groupby as libgroupby,
     lib,
     missing as libmissing,
 )
@@ -1383,3 +1384,64 @@ def _accumulate(
         data, mask = op(data, mask, skipna=skipna, **kwargs)
 
         return type(self)(data, mask, copy=False)
+
+    # ------------------------------------------------------------------------
+    # GroupBy Methods
+
+    def groupby_any_all(
+        self,
+        *,
+        ngroups: int,
+        ids: npt.NDArray[np.intp],
+        val_test: Literal["any", "all"],
+        skipna: bool,
+    ):
+        from pandas.core.arrays import BooleanArray
+
+        result = np.zeros(ngroups * 1, dtype=np.int8).reshape(-1, 1)
+        vals = self._data.astype(bool, copy=False).view(np.int8).reshape(-1, 1)
+        mask = self.isna().view(np.uint8).reshape(-1, 1)
+
+        # Call func to modify result in place
+        libgroupby.group_any_all(
+            out=result,
+            labels=ids,
+            values=vals,
+            mask=mask,
+            val_test=val_test,
+            skipna=skipna,
+            nullable=True,
+        )
+
+        assert result.shape[1] == 1, result.shape
+        result = result[:, 0]
+
+        return BooleanArray(result.astype(bool, copy=False), result == -1)
+
+    def groupby_std(self, *, ngroups: int, ids: npt.NDArray[np.intp], ddof: int):
+        from pandas.core.arrays import FloatingArray
+
+        result = np.zeros(ngroups * 1, dtype=np.float64).reshape(-1, 1)
+        counts = np.zeros(ngroups, dtype=np.int64)
+        vals = self._data.astype(np.float64, copy=False).reshape(-1, 1)
+        mask = self.isna().view(np.uint8).reshape(-1, 1)
+        result_mask = np.zeros(result.shape, dtype=np.bool_)
+
+        # Call func to modify result in place
+        libgroupby.group_var(
+            out=result,
+            labels=ids,
+            values=vals,
+            mask=mask,
+            counts=counts,
+            result_mask=result_mask,
+            ddof=ddof,
+        )
+
+        assert result.shape[1] == 1, result.shape
+        result = result[:, 0]
+
+        assert result_mask.shape[1] == 1, result_mask.shape
+        result_mask = result_mask[:, 0]
+
+        return FloatingArray(np.sqrt(result), result_mask.view(np.bool_))
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index c8cd5835f11d0..54ccdacc8cb5a 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -97,7 +97,6 @@ class providing the base-class of operations.
 from pandas.core._numba import executor
 from pandas.core.arrays import (
     BaseMaskedArray,
-    BooleanArray,
     Categorical,
     ExtensionArray,
     FloatingArray,
@@ -1808,8 +1807,6 @@ def objs_to_bool(vals: ArrayLike) -> tuple[np.ndarray, type]:
                     # mask on original values computed separately
                     vals = vals.copy()
                     vals[mask] = True
-            elif isinstance(vals, BaseMaskedArray):
-                vals = vals._data
             vals = vals.astype(bool, copy=False)
             return vals.view(np.int8), bool
 
@@ -1818,10 +1815,7 @@ def result_to_bool(
             inference: type,
             nullable: bool = False,
         ) -> ArrayLike:
-            if nullable:
-                return BooleanArray(result.astype(bool, copy=False), result == -1)
-            else:
-                return result.astype(inference, copy=False)
+            return result.astype(inference, copy=False)
 
         return self._get_cythonized_result(
             libgroupby.group_any_all,
@@ -2105,18 +2099,9 @@ def std(
                     f"numeric_only={numeric_only} and dtype {self.obj.dtype}"
                 )
 
-            def _preprocessing(values):
-                if isinstance(values, BaseMaskedArray):
-                    return values._data, None
-                return values, None
-
             def _postprocessing(
                 vals, inference, nullable: bool = False, result_mask=None
             ) -> ArrayLike:
-                if nullable:
-                    if result_mask.ndim == 2:
-                        result_mask = result_mask[:, 0]
-                    return FloatingArray(np.sqrt(vals), result_mask.view(np.bool_))
                 return np.sqrt(vals)
 
             result = self._get_cythonized_result(
@@ -2124,7 +2109,7 @@ def _postprocessing(
                 cython_dtype=np.dtype(np.float64),
                 numeric_only=numeric_only,
                 needs_counts=True,
-                pre_processing=_preprocessing,
+                pre_processing=None,
                 post_processing=_postprocessing,
                 ddof=ddof,
                 how="std",
@@ -3737,9 +3722,7 @@ def blk_func(values: ArrayLike) -> ArrayLike:
             values = values.T
             ncols = 1 if values.ndim == 1 else values.shape[1]
 
-            result: ArrayLike
-            result = np.zeros(ngroups * ncols, dtype=cython_dtype)
-            result = result.reshape((ngroups, ncols))
+            result = np.zeros((ngroups, ncols), dtype=cython_dtype)
 
             func = partial(base_func, out=result, labels=ids)
 
@@ -3758,19 +3741,12 @@ def blk_func(values: ArrayLike) -> ArrayLike:
                 vals = vals.reshape((-1, 1))
             func = partial(func, values=vals)
 
-            if how != "std" or isinstance(values, BaseMaskedArray):
+            if how != "std":
                 mask = isna(values).view(np.uint8)
                 if mask.ndim == 1:
                     mask = mask.reshape(-1, 1)
                 func = partial(func, mask=mask)
-
-            if how != "std":
-                is_nullable = isinstance(values, BaseMaskedArray)
-                func = partial(func, nullable=is_nullable)
-
-            elif isinstance(values, BaseMaskedArray):
-                result_mask = np.zeros(result.shape, dtype=np.bool_)
-                func = partial(func, result_mask=result_mask)
+                func = partial(func, nullable=False)
 
             func(**kwargs)  # Call func to modify result in place
 
@@ -3778,14 +3754,7 @@ def blk_func(values: ArrayLike) -> ArrayLike:
                 assert result.shape[1] == 1, result.shape
                 result = result[:, 0]
 
-            if post_processing:
-                pp_kwargs: dict[str, bool | np.ndarray] = {}
-                pp_kwargs["nullable"] = isinstance(values, BaseMaskedArray)
-                if how == "std" and pp_kwargs["nullable"]:
-                    pp_kwargs["result_mask"] = result_mask
-
-                result = post_processing(result, inferences, **pp_kwargs)
-
+            result = post_processing(result, inferences, nullable=False)
             return result.T
 
         obj = self._obj_with_exclusions

From 87ade9fa4e0b523582a3e1e1476c94ed9f948219 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 1 Feb 2023 15:29:08 -0800
Subject: [PATCH 3/6] REF: dispatch to PandasArray

---
 pandas/core/arrays/base.py     |  24 +++-----
 pandas/core/groupby/groupby.py | 105 ++-------------------------------
 2 files changed, 14 insertions(+), 115 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index d2b154c298b6f..3b2a73d493316 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -1703,12 +1703,10 @@ def groupby_any_all(
         val_test: Literal["any", "all"],
         skipna: bool,
     ):
-        values = self.T
-        ncols = 1 if values.ndim == 1 else values.shape[1]
-
+        ncols = 1 if self.ndim == 1 else self.shape[0]
         result = np.zeros((ngroups, ncols), dtype=np.int8)
 
-        obj = values
+        obj = self
         if is_object_dtype(self.dtype) and skipna:
             # GH#37501: don't raise on pd.NA when skipna=True
             mask = self.isna()
@@ -1717,31 +1715,25 @@ def groupby_any_all(
                 obj = obj.copy()
                 obj[mask] = True
 
-        vals = obj.astype(bool, copy=False)
-        vals = vals.view(np.int8)
-        if vals.ndim == 1:
-            vals = vals.reshape(-1, 1)
-
-        mask = isna(values).view(np.uint8)
-        if mask.ndim == 1:
-            mask = mask.reshape(-1, 1)
+        vals = obj.astype(bool, copy=False).view(np.int8)
+        mask = self.isna().view(np.uint8)
 
         # Call func to modify result in place
         libgroupby.group_any_all(
             out=result,
             labels=ids,
-            values=vals,
+            values=np.atleast_2d(vals).T,
+            mask=np.atleast_2d(mask).T,
             val_test=val_test,
             skipna=skipna,
-            mask=mask,
             nullable=False,
         )
 
-        if values.ndim == 1:
+        if self.ndim == 1:
             assert result.shape[1] == 1, result.shape
             result = result[:, 0]
 
-        result.astype(bool, copy=False)
+        result = result.astype(bool, copy=False)
         return result.T
 
     def groupby_std(self, *, ngroups: int, ids: npt.NDArray[np.intp], ddof: int):
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 54ccdacc8cb5a..744c26977982b 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -100,6 +100,7 @@ class providing the base-class of operations.
     Categorical,
     ExtensionArray,
     FloatingArray,
+    PandasArray,
 )
 from pandas.core.base import (
     PandasObject,
@@ -1798,31 +1799,8 @@ def _bool_agg(self, val_test: Literal["any", "all"], skipna: bool):
         """
         Shared func to call any / all Cython GroupBy implementations.
         """
-
-        def objs_to_bool(vals: ArrayLike) -> tuple[np.ndarray, type]:
-            if is_object_dtype(vals.dtype) and skipna:
-                # GH#37501: don't raise on pd.NA when skipna=True
-                mask = isna(vals)
-                if mask.any():
-                    # mask on original values computed separately
-                    vals = vals.copy()
-                    vals[mask] = True
-            vals = vals.astype(bool, copy=False)
-            return vals.view(np.int8), bool
-
-        def result_to_bool(
-            result: np.ndarray,
-            inference: type,
-            nullable: bool = False,
-        ) -> ArrayLike:
-            return result.astype(inference, copy=False)
-
         return self._get_cythonized_result(
-            libgroupby.group_any_all,
             numeric_only=False,
-            cython_dtype=np.dtype(np.int8),
-            pre_processing=objs_to_bool,
-            post_processing=result_to_bool,
             val_test=val_test,
             skipna=skipna,
         )
@@ -2099,18 +2077,8 @@ def std(
                     f"numeric_only={numeric_only} and dtype {self.obj.dtype}"
                 )
 
-            def _postprocessing(
-                vals, inference, nullable: bool = False, result_mask=None
-            ) -> ArrayLike:
-                return np.sqrt(vals)
-
             result = self._get_cythonized_result(
-                libgroupby.group_var,
-                cython_dtype=np.dtype(np.float64),
                 numeric_only=numeric_only,
-                needs_counts=True,
-                pre_processing=None,
-                post_processing=_postprocessing,
                 ddof=ddof,
                 how="std",
             )
@@ -3651,12 +3619,7 @@ def cummax(
     @final
     def _get_cythonized_result(
         self,
-        base_func: Callable,
-        cython_dtype: np.dtype,
         numeric_only: bool = False,
-        needs_counts: bool = False,
-        pre_processing=None,
-        post_processing=None,
         how: str = "any_all",
         **kwargs,
     ):
@@ -3665,27 +3628,8 @@ def _get_cythonized_result(
 
         Parameters
         ----------
-        base_func : callable, Cythonized function to be called
-        cython_dtype : np.dtype
-            Type of the array that will be modified by the Cython call.
         numeric_only : bool, default False
             Whether only numeric datatypes should be computed
-        needs_counts : bool, default False
-            Whether the counts should be a part of the Cython call
-        pre_processing : function, default None
-            Function to be applied to `values` prior to passing to Cython.
-            Function should return a tuple where the first element is the
-            values to be passed to Cython and the second element is an optional
-            type which the values should be converted to after being returned
-            by the Cython operation. This function is also responsible for
-            raising a TypeError if the values have an invalid type. Raises
-            if `needs_values` is False.
-        post_processing : function, default None
-            Function to be applied to result of Cython function. Should accept
-            an array of values as the first argument and type inferences as its
-            second argument, i.e. the signature should be
-            (ndarray, Type). If `needs_nullable=True`, a third argument should be
-            `nullable`, to allow for processing specific to nullable values.
         how : str, default any_all
             Determines if any/all cython interface or std interface is used.
         **kwargs : dict
@@ -3695,11 +3639,6 @@ def _get_cythonized_result(
         -------
         `Series` or `DataFrame`  with filled values
         """
-        if post_processing and not callable(post_processing):
-            raise ValueError("'post_processing' must be a callable!")
-        if pre_processing and not callable(pre_processing):
-            raise ValueError("'pre_processing' must be a callable!")
-
         grouper = self.grouper
 
         ids, _, ngroups = grouper.group_info
@@ -3718,44 +3657,12 @@ def blk_func(values: ArrayLike) -> ArrayLike:
                         skipna=kwargs["skipna"],
                         val_test=kwargs["val_test"],
                     )
+                else:
+                    raise NotImplementedError
 
-            values = values.T
-            ncols = 1 if values.ndim == 1 else values.shape[1]
-
-            result = np.zeros((ngroups, ncols), dtype=cython_dtype)
-
-            func = partial(base_func, out=result, labels=ids)
-
-            inferences = None
-
-            if needs_counts:
-                counts = np.zeros(ngroups, dtype=np.int64)
-                func = partial(func, counts=counts)
-
-            vals = values
-            if pre_processing:
-                vals, inferences = pre_processing(vals)
-
-            vals = vals.astype(cython_dtype, copy=False)
-            if vals.ndim == 1:
-                vals = vals.reshape((-1, 1))
-            func = partial(func, values=vals)
-
-            if how != "std":
-                mask = isna(values).view(np.uint8)
-                if mask.ndim == 1:
-                    mask = mask.reshape(-1, 1)
-                func = partial(func, mask=mask)
-                func = partial(func, nullable=False)
-
-            func(**kwargs)  # Call func to modify result in place
-
-            if values.ndim == 1:
-                assert result.shape[1] == 1, result.shape
-                result = result[:, 0]
-
-            result = post_processing(result, inferences, nullable=False)
-            return result.T
+            arr = PandasArray(values)
+            result = blk_func(arr)
+            return result
 
         obj = self._obj_with_exclusions
 

From ee0e8138d8a5748145d3dc7b696ae0cd57e36174 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 1 Feb 2023 18:02:35 -0800
Subject: [PATCH 4/6] mypy fixup

---
 pandas/core/arrays/base.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 3b2a73d493316..0b8f48788b6e5 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -1709,14 +1709,14 @@ def groupby_any_all(
         obj = self
         if is_object_dtype(self.dtype) and skipna:
             # GH#37501: don't raise on pd.NA when skipna=True
-            mask = self.isna()
-            if mask.any():
+            self_mask = self.isna()
+            if self_mask.any():
                 # mask on original values computed separately
                 obj = obj.copy()
-                obj[mask] = True
+                obj[self_mask] = True
 
         vals = obj.astype(bool, copy=False).view(np.int8)
-        mask = self.isna().view(np.uint8)
+        mask = np.asarray(self.isna()).view(np.uint8)
 
         # Call func to modify result in place
         libgroupby.group_any_all(

From 25f6802d2173e91935f827bff08f0a82bdda207f Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 9 Feb 2023 14:21:34 -0800
Subject: [PATCH 5/6] remove leftover *1

---
 pandas/core/arrays/masked.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index d4a2147f05ce3..3d1c5102b2030 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -1405,7 +1405,7 @@ def groupby_any_all(
     ):
         from pandas.core.arrays import BooleanArray
 
-        result = np.zeros(ngroups * 1, dtype=np.int8).reshape(-1, 1)
+        result = np.zeros(ngroups, dtype=np.int8).reshape(-1, 1)
         vals = self._data.astype(bool, copy=False).view(np.int8).reshape(-1, 1)
         mask = self.isna().view(np.uint8).reshape(-1, 1)
 
@@ -1428,7 +1428,7 @@ def groupby_any_all(
     def groupby_std(self, *, ngroups: int, ids: npt.NDArray[np.intp], ddof: int):
         from pandas.core.arrays import FloatingArray
 
-        result = np.zeros(ngroups * 1, dtype=np.float64).reshape(-1, 1)
+        result = np.zeros(ngroups, dtype=np.float64).reshape(-1, 1)
         counts = np.zeros(ngroups, dtype=np.int64)
         vals = self._data.astype(np.float64, copy=False).reshape(-1, 1)
         mask = self.isna().view(np.uint8).reshape(-1, 1)

From 9844dea6d22ea292c3b05131fa36e97de22c7a7e Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 1 Mar 2023 11:16:50 -0800
Subject: [PATCH 6/6] restore warnings import

---
 pandas/core/groupby/groupby.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 4b80856e5c59c..d853bf4a21d0e 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -29,6 +29,7 @@ class providing the base-class of operations.
     cast,
     final,
 )
+import warnings
 
 import numpy as np
 
@@ -1902,7 +1903,6 @@ def std(
 
             return np.sqrt(self._numba_agg_general(sliding_var, engine_kwargs, ddof))
         else:
-
             result = self._get_cythonized_result(
                 numeric_only=numeric_only,
                 ddof=ddof,
@@ -3601,7 +3601,6 @@ def _get_cythonized_result(
         ids, _, ngroups = grouper.group_info
 
         def blk_func(values: ArrayLike) -> ArrayLike:
-
             if isinstance(values, ExtensionArray):
                 if how == "std":
                     return values._groupby_std(