Skip to content

Commit 43d8e34

Browse files
committed
PERF: SeriesGroupBy.value_counts no longer relies on apply with categorical
1 parent 8bc0832 commit 43d8e34

File tree

2 files changed

+28
-4
lines changed

2 files changed

+28
-4
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -468,6 +468,7 @@ Performance improvements
468468
- Performance improvement when setting values in a pyarrow backed string array (:issue:`46400`)
469469
- Performance improvement in :func:`factorize` (:issue:`46109`)
470470
- Performance improvement in :class:`DataFrame` and :class:`Series` constructors for extension dtype scalars (:issue:`45854`)
471+
- Performance improvement in :meth:`SeriesGroupBy.value_counts` with categorical values. (:issue:`46202`)
471472

472473
.. ---------------------------------------------------------------------------
473474
.. _whatsnew_150.bug_fixes:

pandas/core/groupby/generic.py

+27-4
Original file line numberDiff line numberDiff line change
@@ -609,12 +609,35 @@ def value_counts(
609609

610610
names = self.grouper.names + [self.obj.name]
611611

612-
if is_categorical_dtype(val.dtype) or (
613-
bins is not None and not np.iterable(bins)
614-
):
612+
if is_categorical_dtype(val.dtype):
613+
df = self.obj.to_frame()
614+
df.columns = [self.obj.name]
615+
# GH38672 relates to categorical dtype
616+
groupby = DataFrameGroupBy(
617+
df,
618+
self.grouper,
619+
axis=self.axis,
620+
level=self.level,
621+
grouper=self.grouper,
622+
exclusions=self.exclusions,
623+
as_index=self.as_index,
624+
sort=self.sort,
625+
group_keys=self.group_keys,
626+
squeeze=self.squeeze,
627+
observed=self.observed,
628+
mutated=self.mutated,
629+
dropna=self.dropna,
630+
)
631+
ser = groupby.value_counts(
632+
normalize=normalize, sort=sort, ascending=ascending
633+
)
634+
ser.name = self.obj.name
635+
return ser
636+
637+
if bins is not None and not np.iterable(bins):
615638
# scalar bins cannot be done at top level
616639
# in a backward compatible way
617-
# GH38672 relates to categorical dtype
640+
618641
ser = self.apply(
619642
Series.value_counts,
620643
normalize=normalize,

0 commit comments

Comments
 (0)