Skip to content

Commit 2b1808b

Browse files
committed
BUG: value_counts can handle the case even with empty groups (pandas-dev#28479)
* If applying rep to recons_labels go fail, use ids which has no consecutive duplicates instead.
1 parent 3f0e816 commit 2b1808b

File tree

3 files changed

+35
-2
lines changed

3 files changed

+35
-2
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ Other
243243
- Using :meth:`DataFrame.replace` with overlapping keys in a nested dictionary will no longer raise, now matching the behavior of a flat dictionary (:issue:`27660`)
244244
- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support dicts as ``compression`` argument with key ``'method'`` being the compression method and others as additional compression options when the compression method is ``'zip'``. (:issue:`26023`)
245245
- :meth:`Series.append` will no longer raise a ``TypeError`` when passed a tuple of ``Series`` (:issue:`28410`)
246+
- :meth:`SeriesGroupBy.value_counts` will be able to handle the case even when the :class:`Grouper` makes empty groups (:issue: 28479)
246247

247248
.. _whatsnew_1000.contributors:
248249

pandas/core/groupby/generic.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -1266,7 +1266,14 @@ def value_counts(
12661266
rep = partial(np.repeat, repeats=np.add.reduceat(inc, idx))
12671267

12681268
# multi-index components
1269-
labels = list(map(rep, self.grouper.recons_labels)) + [llab(lab, inc)]
1269+
try:
1270+
labels = list(map(rep, self.grouper.recons_labels)) + [llab(lab, inc)]
1271+
except ValueError:
1272+
# If applying rep to recons_labels go fail, use ids which has no
1273+
# consecutive duplicates instead.
1274+
_ids_idx = np.ones(len(ids), dtype=bool)
1275+
_ids_idx[1:] = ids[1:] != ids[:-1]
1276+
labels = list(map(rep, [ids[_ids_idx]])) + [llab(lab, inc)]
12701277
levels = [ping.group_index for ping in self.grouper.groupings] + [lev]
12711278
names = self.grouper.names + [self._selection_name]
12721279

pandas/tests/groupby/test_value_counts.py

+26-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import numpy as np
1010
import pytest
1111

12-
from pandas import DataFrame, MultiIndex, Series, date_range
12+
from pandas import DataFrame, MultiIndex, Series, date_range, Grouper
1313
from pandas.util import testing as tm
1414

1515

@@ -78,3 +78,28 @@ def rebuild_index(df):
7878
# have to sort on index because of unstable sort on values
7979
left, right = map(rebuild_index, (left, right)) # xref GH9212
8080
tm.assert_series_equal(left.sort_index(), right.sort_index())
81+
82+
83+
@pytest.mark.parametrize(
84+
"freq, size, frac", product(["1D", "2D", "1W", "1Y"], [100, 1000], [0.1, 0.5, 1])
85+
)
86+
def test_series_groupby_value_counts_with_grouper(freq, size, frac):
87+
np.random.seed(42)
88+
89+
df = DataFrame.from_dict(
90+
{
91+
"date": date_range("2019-09-25", periods=size),
92+
"name": np.random.choice(list("abcd"), size),
93+
}
94+
).sample(frac=frac)
95+
96+
gr = df.groupby(Grouper(key="date", freq=freq))["name"]
97+
98+
# have to sort on index because of unstable sort on values xref GH9212
99+
result = gr.value_counts().sort_index()
100+
expected = gr.apply(Series.value_counts).sort_index()
101+
expected.index.names = (
102+
result.index.names
103+
) # .apply(Series.value_counts) can't create all names
104+
105+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)