Skip to content

Commit b28456a

Browse files
committed
BUG: value_counts can handle the case even with empty groups (pandas-dev#28479)
* If applying rep to recons_labels go fail, use ids which has no consecutive duplicates instead.
1 parent 4fb853f commit b28456a

File tree

3 files changed

+35
-2
lines changed

3 files changed

+35
-2
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,7 @@ Other
299299
- Using :meth:`DataFrame.replace` with overlapping keys in a nested dictionary will no longer raise, now matching the behavior of a flat dictionary (:issue:`27660`)
300300
- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support dicts as ``compression`` argument with key ``'method'`` being the compression method and others as additional compression options when the compression method is ``'zip'``. (:issue:`26023`)
301301
- :meth:`Series.append` will no longer raise a ``TypeError`` when passed a tuple of ``Series`` (:issue:`28410`)
302+
- :meth:`SeriesGroupBy.value_counts` will be able to handle the case even when the :class:`Grouper` makes empty groups (:issue: 28479)
302303

303304
.. _whatsnew_1000.contributors:
304305

pandas/core/groupby/generic.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -1259,7 +1259,14 @@ def value_counts(
12591259
rep = partial(np.repeat, repeats=np.add.reduceat(inc, idx))
12601260

12611261
# multi-index components
1262-
labels = list(map(rep, self.grouper.recons_labels)) + [llab(lab, inc)]
1262+
try:
1263+
labels = list(map(rep, self.grouper.recons_labels)) + [llab(lab, inc)]
1264+
except ValueError:
1265+
# If applying rep to recons_labels go fail, use ids which has no
1266+
# consecutive duplicates instead.
1267+
_ids_idx = np.ones(len(ids), dtype=bool)
1268+
_ids_idx[1:] = ids[1:] != ids[:-1]
1269+
labels = list(map(rep, [ids[_ids_idx]])) + [llab(lab, inc)]
12631270
levels = [ping.group_index for ping in self.grouper.groupings] + [lev]
12641271
names = self.grouper.names + [self._selection_name]
12651272

pandas/tests/groupby/test_value_counts.py

+26-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import numpy as np
1010
import pytest
1111

12-
from pandas import DataFrame, MultiIndex, Series, date_range
12+
from pandas import DataFrame, MultiIndex, Series, date_range, Grouper
1313
from pandas.util import testing as tm
1414

1515

@@ -79,3 +79,28 @@ def rebuild_index(df):
7979
# have to sort on index because of unstable sort on values
8080
left, right = map(rebuild_index, (left, right)) # xref GH9212
8181
tm.assert_series_equal(left.sort_index(), right.sort_index())
82+
83+
84+
@pytest.mark.parametrize(
85+
"freq, size, frac", product(["1D", "2D", "1W", "1Y"], [100, 1000], [0.1, 0.5, 1])
86+
)
87+
def test_series_groupby_value_counts_with_grouper(freq, size, frac):
88+
np.random.seed(42)
89+
90+
df = DataFrame.from_dict(
91+
{
92+
"date": date_range("2019-09-25", periods=size),
93+
"name": np.random.choice(list("abcd"), size),
94+
}
95+
).sample(frac=frac)
96+
97+
gr = df.groupby(Grouper(key="date", freq=freq))["name"]
98+
99+
# have to sort on index because of unstable sort on values xref GH9212
100+
result = gr.value_counts().sort_index()
101+
expected = gr.apply(Series.value_counts).sort_index()
102+
expected.index.names = (
103+
result.index.names
104+
) # .apply(Series.value_counts) can't create all names
105+
106+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)