Skip to content

Commit 9a21c3c

Browse files
authored
PERF: Groupby.count (pandas-dev#43694)
1 parent b490507 commit 9a21c3c

File tree

2 files changed

+12
-9
lines changed

2 files changed

+12
-9
lines changed

pandas/core/groupby/generic.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@
8080
Index,
8181
MultiIndex,
8282
all_indexes_same,
83+
default_index,
8384
)
8485
from pandas.core.series import Series
8586
from pandas.core.util.numba_ import maybe_use_numba
@@ -161,15 +162,16 @@ class SeriesGroupBy(GroupBy[Series]):
161162
def _wrap_agged_manager(self, mgr: Manager2D) -> Series:
162163
single = mgr.iget(0)
163164
ser = self.obj._constructor(single, name=self.obj.name)
164-
ser.index = self.grouper.result_index
165+
# NB: caller is responsible for setting ser.index
165166
return ser
166167

167168
def _get_data_to_aggregate(self) -> Manager2D:
168-
obj = self._obj_with_exclusions
169-
df = obj.to_frame()
170-
df.columns = [obj.name] # in case name is None, we need to overwrite [0]
171-
172-
return df._mgr
169+
ser = self._obj_with_exclusions
170+
single = ser._mgr
171+
columns = default_index(1)
172+
# Much faster than using ser.to_frame() since we avoid inferring columns
173+
# from scalar
174+
return single.to_2d_mgr(columns)
173175

174176
def _iterate_slices(self) -> Iterable[Series]:
175177
yield self._selected_obj

pandas/core/groupby/groupby.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -1763,6 +1763,8 @@ def hfunc(bvalues: ArrayLike) -> ArrayLike:
17631763
# _wrap_agged_manager() returns. GH 35028
17641764
with com.temp_setattr(self, "observed", True):
17651765
result = self._wrap_agged_manager(new_mgr)
1766+
if result.ndim == 1:
1767+
result.index = self.grouper.result_index
17661768

17671769
return self._reindex_output(result, fill_value=0)
17681770

@@ -2715,9 +2717,7 @@ def blk_func(values: ArrayLike) -> ArrayLike:
27152717
mgr.grouped_reduce(blk_func, ignore_failures=False)
27162718

27172719
if is_ser:
2718-
res = obj._constructor_expanddim(res_mgr)
2719-
res = res[res.columns[0]] # aka res.squeeze()
2720-
res.name = obj.name
2720+
res = self._wrap_agged_manager(res_mgr)
27212721
else:
27222722
res = obj._constructor(res_mgr)
27232723

@@ -3164,6 +3164,7 @@ def blk_func(values: ArrayLike) -> ArrayLike:
31643164

31653165
if orig_ndim == 1:
31663166
out = self._wrap_agged_manager(res_mgr)
3167+
out.index = self.grouper.result_index
31673168
else:
31683169
out = type(obj)(res_mgr)
31693170

0 commit comments

Comments
 (0)