diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index b40a64420a0be..2130e0c88ef4d 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -454,6 +454,7 @@ Other - :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support dicts as ``compression`` argument with key ``'method'`` being the compression method and others as additional compression options when the compression method is ``'zip'``. (:issue:`26023`) - Bug in :meth:`Series.diff` where a boolean series would incorrectly raise a ``TypeError`` (:issue:`17294`) - :meth:`Series.append` will no longer raise a ``TypeError`` when passed a tuple of ``Series`` (:issue:`28410`) +- :meth:`SeriesGroupBy.value_counts` will be able to handle the case even when the :class:`Grouper` makes empty groups (:issue: 28479) - Fix corrupted error message when calling ``pandas.libs._json.encode()`` on a 0d array (:issue:`18878`) - Fix :class:`AbstractHolidayCalendar` to return correct results for years after 2030 (now goes up to 2200) (:issue:`27790`) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 2c8aa1294451d..9599ce0bf39a9 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -767,6 +767,11 @@ def group_info(self): ngroups, ) + @cache_readonly + def recons_codes(self): + # get unique result indices, and prepend 0 as groupby starts from the first + return [np.r_[0, np.flatnonzero(self.bins[1:] != self.bins[:-1]) + 1]] + @cache_readonly def result_index(self): if len(self.binlabels) != 0 and isna(self.binlabels[0]): diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py index 363c5a9af0180..c76ee09f977b5 100644 --- a/pandas/tests/groupby/test_value_counts.py +++ b/pandas/tests/groupby/test_value_counts.py @@ -9,7 +9,7 @@ import numpy as np import pytest -from pandas import DataFrame, MultiIndex, Series, date_range +from pandas import DataFrame, Grouper, MultiIndex, Series, date_range, to_datetime import pandas.util.testing as tm @@ -79,3 +79,31 @@ def rebuild_index(df): # have to sort on index because of unstable sort on values left, right = map(rebuild_index, (left, right)) # xref GH9212 tm.assert_series_equal(left.sort_index(), right.sort_index()) + + +def test_series_groupby_value_counts_with_grouper(): + # GH28479 + df = DataFrame( + { + "Timestamp": [ + 1565083561, + 1565083561 + 86400, + 1565083561 + 86500, + 1565083561 + 86400 * 2, + 1565083561 + 86400 * 3, + 1565083561 + 86500 * 3, + 1565083561 + 86400 * 4, + ], + "Food": ["apple", "apple", "banana", "banana", "orange", "orange", "pear"], + } + ).drop([3]) + + df["Datetime"] = to_datetime(df["Timestamp"].apply(lambda t: str(t)), unit="s") + dfg = df.groupby(Grouper(freq="1D", key="Datetime")) + + # have to sort on index because of unstable sort on values xref GH9212 + result = dfg["Food"].value_counts().sort_index() + expected = dfg["Food"].apply(Series.value_counts).sort_index() + expected.index.names = result.index.names + + tm.assert_series_equal(result, expected)