Skip to content

Commit b3a7292

Browse files
authored
REF: share GroupBy.count (#43519)
1 parent dfbc76e commit b3a7292

File tree

2 files changed

+40
-65
lines changed

2 files changed

+40
-65
lines changed

pandas/core/groupby/generic.py

+14-62
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,7 @@
2626

2727
import numpy as np
2828

29-
from pandas._libs import (
30-
lib,
31-
reduction as libreduction,
32-
)
29+
from pandas._libs import reduction as libreduction
3330
from pandas._typing import (
3431
ArrayLike,
3532
FrameOrSeries,
@@ -161,6 +158,19 @@ def pinner(cls):
161158
class SeriesGroupBy(GroupBy[Series]):
162159
_apply_allowlist = base.series_apply_allowlist
163160

161+
def _wrap_agged_manager(self, mgr: Manager2D) -> Series:
162+
single = mgr.iget(0)
163+
ser = self.obj._constructor(single, name=self.obj.name)
164+
ser.index = self.grouper.result_index
165+
return ser
166+
167+
def _get_data_to_aggregate(self) -> Manager2D:
168+
obj = self._obj_with_exclusions
169+
df = obj.to_frame()
170+
df.columns = [obj.name] # in case name is None, we need to overwrite [0]
171+
172+
return df._mgr
173+
164174
def _iterate_slices(self) -> Iterable[Series]:
165175
yield self._selected_obj
166176

@@ -768,30 +778,6 @@ def build_codes(lev_codes: np.ndarray) -> np.ndarray:
768778
out = ensure_int64(out)
769779
return self.obj._constructor(out, index=mi, name=self.obj.name)
770780

771-
def count(self) -> Series:
772-
"""
773-
Compute count of group, excluding missing values.
774-
775-
Returns
776-
-------
777-
Series
778-
Count of values within each group.
779-
"""
780-
ids, _, ngroups = self.grouper.group_info
781-
val = self.obj._values
782-
783-
mask = (ids != -1) & ~isna(val)
784-
minlength = ngroups or 0
785-
out = np.bincount(ids[mask], minlength=minlength)
786-
787-
result = self.obj._constructor(
788-
out,
789-
index=self.grouper.result_index,
790-
name=self.obj.name,
791-
dtype="int64",
792-
)
793-
return self._reindex_output(result, fill_value=0)
794-
795781
@doc(Series.nlargest)
796782
def nlargest(self, n: int = 5, keep: str = "first"):
797783
f = partial(Series.nlargest, n=n, keep=keep)
@@ -1583,40 +1569,6 @@ def _apply_to_column_groupbys(self, func, obj: FrameOrSeries) -> DataFrame:
15831569
else:
15841570
return concat(results, keys=columns, axis=1)
15851571

1586-
def count(self) -> DataFrame:
1587-
"""
1588-
Compute count of group, excluding missing values.
1589-
1590-
Returns
1591-
-------
1592-
DataFrame
1593-
Count of values within each group.
1594-
"""
1595-
data = self._get_data_to_aggregate()
1596-
ids, _, ngroups = self.grouper.group_info
1597-
mask = ids != -1
1598-
1599-
def hfunc(bvalues: ArrayLike) -> ArrayLike:
1600-
# TODO(2DEA): reshape would not be necessary with 2D EAs
1601-
if bvalues.ndim == 1:
1602-
# EA
1603-
masked = mask & ~isna(bvalues).reshape(1, -1)
1604-
else:
1605-
masked = mask & ~isna(bvalues)
1606-
1607-
counted = lib.count_level_2d(masked, labels=ids, max_bin=ngroups, axis=1)
1608-
return counted
1609-
1610-
new_mgr = data.grouped_reduce(hfunc)
1611-
1612-
# If we are grouping on categoricals we want unobserved categories to
1613-
# return zero, rather than the default of NaN which the reindexing in
1614-
# _wrap_agged_manager() returns. GH 35028
1615-
with com.temp_setattr(self, "observed", True):
1616-
result = self._wrap_agged_manager(new_mgr)
1617-
1618-
return self._reindex_output(result, fill_value=0)
1619-
16201572
def nunique(self, dropna: bool = True) -> DataFrame:
16211573
"""
16221574
Return DataFrame with counts of unique elements in each position.

pandas/core/groupby/groupby.py

+26-3
Original file line numberDiff line numberDiff line change
@@ -1720,9 +1720,10 @@ def all(self, skipna: bool = True):
17201720
"""
17211721
return self._bool_agg("all", skipna)
17221722

1723+
@final
17231724
@Substitution(name="groupby")
17241725
@Appender(_common_see_also)
1725-
def count(self):
1726+
def count(self) -> Series | DataFrame:
17261727
"""
17271728
Compute count of group, excluding missing values.
17281729
@@ -1731,8 +1732,30 @@ def count(self):
17311732
Series or DataFrame
17321733
Count of values within each group.
17331734
"""
1734-
# defined here for API doc
1735-
raise NotImplementedError
1735+
data = self._get_data_to_aggregate()
1736+
ids, _, ngroups = self.grouper.group_info
1737+
mask = ids != -1
1738+
1739+
def hfunc(bvalues: ArrayLike) -> ArrayLike:
1740+
# TODO(2DEA): reshape would not be necessary with 2D EAs
1741+
if bvalues.ndim == 1:
1742+
# EA
1743+
masked = mask & ~isna(bvalues).reshape(1, -1)
1744+
else:
1745+
masked = mask & ~isna(bvalues)
1746+
1747+
counted = lib.count_level_2d(masked, labels=ids, max_bin=ngroups, axis=1)
1748+
return counted
1749+
1750+
new_mgr = data.grouped_reduce(hfunc)
1751+
1752+
# If we are grouping on categoricals we want unobserved categories to
1753+
# return zero, rather than the default of NaN which the reindexing in
1754+
# _wrap_agged_manager() returns. GH 35028
1755+
with com.temp_setattr(self, "observed", True):
1756+
result = self._wrap_agged_manager(new_mgr)
1757+
1758+
return self._reindex_output(result, fill_value=0)
17361759

17371760
@final
17381761
@Substitution(name="groupby")

0 commit comments

Comments
 (0)