Skip to content

Commit 1d70500

Browse files
authored
PERF: groupby(...).__len__ (#57595)
* PERF: groupby(...).__len__ * GH#
1 parent a730486 commit 1d70500

File tree

4 files changed

+26
-2
lines changed

4 files changed

+26
-2
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ Performance improvements
178178
- Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`)
179179
- Performance improvement in indexing operations for string dtypes (:issue:`56997`)
180180
- :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`?``)
181+
- Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`)
181182

182183
.. ---------------------------------------------------------------------------
183184
.. _whatsnew_300.bug_fixes:

pandas/conftest.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ def axis(request):
271271
return request.param
272272

273273

274-
@pytest.fixture(params=[True, False, None])
274+
@pytest.fixture(params=[True, False])
275275
def observed(request):
276276
"""
277277
Pass in the observed keyword to groupby for [True, False]

pandas/core/groupby/groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -596,7 +596,7 @@ class BaseGroupBy(PandasObject, SelectionMixin[NDFrameT], GroupByIndexingMixin):
596596

597597
@final
598598
def __len__(self) -> int:
599-
return len(self.groups)
599+
return self._grouper.ngroups
600600

601601
@final
602602
def __repr__(self) -> str:

pandas/tests/groupby/test_groupby.py

+23
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,29 @@ def test_len_nan_group():
149149
assert len(df.groupby(["a", "b"])) == 0
150150

151151

152+
@pytest.mark.parametrize("keys", [["a"], ["a", "b"]])
153+
def test_len_categorical(dropna, observed, keys):
154+
# GH#57595
155+
df = DataFrame(
156+
{
157+
"a": Categorical([1, 1, 2, np.nan], categories=[1, 2, 3]),
158+
"b": Categorical([1, 1, 2, np.nan], categories=[1, 2, 3]),
159+
"c": 1,
160+
}
161+
)
162+
gb = df.groupby(keys, observed=observed, dropna=dropna)
163+
result = len(gb)
164+
if observed and dropna:
165+
expected = 2
166+
elif observed and not dropna:
167+
expected = 3
168+
elif len(keys) == 1:
169+
expected = 3 if dropna else 4
170+
else:
171+
expected = 9 if dropna else 16
172+
assert result == expected, f"{result} vs {expected}"
173+
174+
152175
def test_basic_regression():
153176
# regression
154177
result = Series([1.0 * x for x in list(range(1, 10)) * 10])

0 commit comments

Comments
 (0)