REF: share GroupBy.count (#43519)

jbrockmendel · web-flow · commit b3a7292e7f9a · 2021-09-12T10:54:26.000-04:00
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -26,10 +26,7 @@
 
 import numpy as np
 
-from pandas._libs import (
-    lib,
-    reduction as libreduction,
-)
+from pandas._libs import reduction as libreduction
 from pandas._typing import (
     ArrayLike,
     FrameOrSeries,
@@ -161,6 +158,19 @@ def pinner(cls):
 class SeriesGroupBy(GroupBy[Series]):
     _apply_allowlist = base.series_apply_allowlist
 
+    def _wrap_agged_manager(self, mgr: Manager2D) -> Series:
+        single = mgr.iget(0)
+        ser = self.obj._constructor(single, name=self.obj.name)
+        ser.index = self.grouper.result_index
+        return ser
+
+    def _get_data_to_aggregate(self) -> Manager2D:
+        obj = self._obj_with_exclusions
+        df = obj.to_frame()
+        df.columns = [obj.name]  # in case name is None, we need to overwrite [0]
+
+        return df._mgr
+
     def _iterate_slices(self) -> Iterable[Series]:
         yield self._selected_obj
 
@@ -768,30 +778,6 @@ def build_codes(lev_codes: np.ndarray) -> np.ndarray:
             out = ensure_int64(out)
         return self.obj._constructor(out, index=mi, name=self.obj.name)
 
-    def count(self) -> Series:
-        """
-        Compute count of group, excluding missing values.
-
-        Returns
-        -------
-        Series
-            Count of values within each group.
-        """
-        ids, _, ngroups = self.grouper.group_info
-        val = self.obj._values
-
-        mask = (ids != -1) & ~isna(val)
-        minlength = ngroups or 0
-        out = np.bincount(ids[mask], minlength=minlength)
-
-        result = self.obj._constructor(
-            out,
-            index=self.grouper.result_index,
-            name=self.obj.name,
-            dtype="int64",
-        )
-        return self._reindex_output(result, fill_value=0)
-
     @doc(Series.nlargest)
     def nlargest(self, n: int = 5, keep: str = "first"):
         f = partial(Series.nlargest, n=n, keep=keep)
@@ -1583,40 +1569,6 @@ def _apply_to_column_groupbys(self, func, obj: FrameOrSeries) -> DataFrame:
         else:
             return concat(results, keys=columns, axis=1)
 
-    def count(self) -> DataFrame:
-        """
-        Compute count of group, excluding missing values.
-
-        Returns
-        -------
-        DataFrame
-            Count of values within each group.
-        """
-        data = self._get_data_to_aggregate()
-        ids, _, ngroups = self.grouper.group_info
-        mask = ids != -1
-
-        def hfunc(bvalues: ArrayLike) -> ArrayLike:
-            # TODO(2DEA): reshape would not be necessary with 2D EAs
-            if bvalues.ndim == 1:
-                # EA
-                masked = mask & ~isna(bvalues).reshape(1, -1)
-            else:
-                masked = mask & ~isna(bvalues)
-
-            counted = lib.count_level_2d(masked, labels=ids, max_bin=ngroups, axis=1)
-            return counted
-
-        new_mgr = data.grouped_reduce(hfunc)
-
-        # If we are grouping on categoricals we want unobserved categories to
-        # return zero, rather than the default of NaN which the reindexing in
-        # _wrap_agged_manager() returns. GH 35028
-        with com.temp_setattr(self, "observed", True):
-            result = self._wrap_agged_manager(new_mgr)
-
-        return self._reindex_output(result, fill_value=0)
-
     def nunique(self, dropna: bool = True) -> DataFrame:
         """
         Return DataFrame with counts of unique elements in each position.
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -1720,9 +1720,10 @@ def all(self, skipna: bool = True):
         """
         return self._bool_agg("all", skipna)
 
+    @final
     @Substitution(name="groupby")
     @Appender(_common_see_also)
-    def count(self):
+    def count(self) -> Series | DataFrame:
         """
         Compute count of group, excluding missing values.
 
@@ -1731,8 +1732,30 @@ def count(self):
         Series or DataFrame
             Count of values within each group.
         """
-        # defined here for API doc
-        raise NotImplementedError
+        data = self._get_data_to_aggregate()
+        ids, _, ngroups = self.grouper.group_info
+        mask = ids != -1
+
+        def hfunc(bvalues: ArrayLike) -> ArrayLike:
+            # TODO(2DEA): reshape would not be necessary with 2D EAs
+            if bvalues.ndim == 1:
+                # EA
+                masked = mask & ~isna(bvalues).reshape(1, -1)
+            else:
+                masked = mask & ~isna(bvalues)
+
+            counted = lib.count_level_2d(masked, labels=ids, max_bin=ngroups, axis=1)
+            return counted
+
+        new_mgr = data.grouped_reduce(hfunc)
+
+        # If we are grouping on categoricals we want unobserved categories to
+        # return zero, rather than the default of NaN which the reindexing in
+        # _wrap_agged_manager() returns. GH 35028
+        with com.temp_setattr(self, "observed", True):
+            result = self._wrap_agged_manager(new_mgr)
+
+        return self._reindex_output(result, fill_value=0)
 
     @final
     @Substitution(name="groupby")