Skip to content

Commit 175a33d

Browse files
committed
BUG: value_counts can handle the case even with empty groups (pandas-dev#28479)
* If applying rep to recons_labels go fail, use ids which has no consecutive duplicates instead.
1 parent 3f0e816 commit 175a33d

File tree

3 files changed

+30
-4
lines changed

3 files changed

+30
-4
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ Other
243243
- Using :meth:`DataFrame.replace` with overlapping keys in a nested dictionary will no longer raise, now matching the behavior of a flat dictionary (:issue:`27660`)
244244
- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support dicts as ``compression`` argument with key ``'method'`` being the compression method and others as additional compression options when the compression method is ``'zip'``. (:issue:`26023`)
245245
- :meth:`Series.append` will no longer raise a ``TypeError`` when passed a tuple of ``Series`` (:issue:`28410`)
246+
- :meth:`SeriesGroupBy.value_counts` will be able to handle the case even when the :class:`Grouper` makes empty groups (:issue: 28479)
246247

247248
.. _whatsnew_1000.contributors:
248249

pandas/core/groupby/generic.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -1264,9 +1264,16 @@ def value_counts(
12641264

12651265
# num. of times each group should be repeated
12661266
rep = partial(np.repeat, repeats=np.add.reduceat(inc, idx))
1267-
1268-
# multi-index components
1269-
labels = list(map(rep, self.grouper.recons_labels)) + [llab(lab, inc)]
1267+
1268+
#multi-index components
1269+
try:
1270+
labels = list(map(rep, self.grouper.recons_labels )) + [llab(lab, inc)]
1271+
except ValueError:
1272+
# If applying rep to recons_labels go fail, use ids which has no
1273+
# consecutive duplicates instead.
1274+
_ids_idx = np.ones(len(ids), dtype=bool)
1275+
_ids_idx[1:] = ids[1:] != ids[:-1]
1276+
labels = list(map(rep, [ids[_ids_idx]])) + [llab(lab, inc)]
12701277
levels = [ping.group_index for ping in self.grouper.groupings] + [lev]
12711278
names = self.grouper.names + [self._selection_name]
12721279

pandas/tests/groupby/test_value_counts.py

+19-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import numpy as np
1010
import pytest
1111

12-
from pandas import DataFrame, MultiIndex, Series, date_range
12+
from pandas import DataFrame, MultiIndex, Series, date_range, Grouper
1313
from pandas.util import testing as tm
1414

1515

@@ -78,3 +78,21 @@ def rebuild_index(df):
7878
# have to sort on index because of unstable sort on values
7979
left, right = map(rebuild_index, (left, right)) # xref GH9212
8080
tm.assert_series_equal(left.sort_index(), right.sort_index())
81+
82+
@pytest.mark.parametrize('freq, size, frac', product(['1D', '2D', '1W', '1Y'], [100, 1000], [0.1, 0.5, 1]))
83+
def test_series_groupby_value_counts_with_grouper(freq, size, frac):
84+
np.random.seed(42)
85+
86+
df = DataFrame.from_dict({'date': date_range('2019-09-25', periods=size),
87+
'name': np.random.choice(list('abcd'), size)
88+
}).sample(frac=frac)
89+
90+
gr = df.groupby(Grouper(key='date', freq=freq))['name']
91+
92+
# have to sort on index because of unstable sort on values xref GH9212
93+
result = gr.value_counts().sort_index()
94+
expected = gr.apply(Series.value_counts).sort_index()
95+
expected.index.names = result.index.names # .apply(Series.value_counts) can't create all names
96+
97+
tm.assert_series_equal(result, expected)
98+

0 commit comments

Comments
 (0)