BUG: value_counts can handle the case even with empty groups (#28479) (#28634)

dongho-jung · jreback · commit 8efc717e4652 · 2019-11-07T16:19:57.000-05:00
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -455,6 +455,7 @@ Other
 - :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support dicts as ``compression`` argument with key ``'method'`` being the compression method and others as additional compression options when the compression method is ``'zip'``. (:issue:`26023`)
 - Bug in :meth:`Series.diff` where a boolean series would incorrectly raise a ``TypeError`` (:issue:`17294`)
 - :meth:`Series.append` will no longer raise a ``TypeError`` when passed a tuple of ``Series`` (:issue:`28410`)
+- :meth:`SeriesGroupBy.value_counts` will be able to handle the case even when the :class:`Grouper` makes empty groups (:issue: 28479)
 - Fix corrupted error message when calling ``pandas.libs._json.encode()`` on a 0d array (:issue:`18878`)
 - Fix :class:`AbstractHolidayCalendar` to return correct results for
   years after 2030 (now goes up to 2200) (:issue:`27790`)
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -767,6 +767,11 @@ def group_info(self):
             ngroups,
         )
 
+    @cache_readonly
+    def recons_codes(self):
+        # get unique result indices, and prepend 0 as groupby starts from the first
+        return [np.r_[0, np.flatnonzero(self.bins[1:] != self.bins[:-1]) + 1]]
+
     @cache_readonly
     def result_index(self):
         if len(self.binlabels) != 0 and isna(self.binlabels[0]):
diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py
@@ -9,7 +9,7 @@
 import numpy as np
 import pytest
 
-from pandas import DataFrame, MultiIndex, Series, date_range
+from pandas import DataFrame, Grouper, MultiIndex, Series, date_range, to_datetime
 import pandas.util.testing as tm
 
 
@@ -79,3 +79,31 @@ def rebuild_index(df):
     # have to sort on index because of unstable sort on values
     left, right = map(rebuild_index, (left, right))  # xref GH9212
     tm.assert_series_equal(left.sort_index(), right.sort_index())
+
+
+def test_series_groupby_value_counts_with_grouper():
+    # GH28479
+    df = DataFrame(
+        {
+            "Timestamp": [
+                1565083561,
+                1565083561 + 86400,
+                1565083561 + 86500,
+                1565083561 + 86400 * 2,
+                1565083561 + 86400 * 3,
+                1565083561 + 86500 * 3,
+                1565083561 + 86400 * 4,
+            ],
+            "Food": ["apple", "apple", "banana", "banana", "orange", "orange", "pear"],
+        }
+    ).drop([3])
+
+    df["Datetime"] = to_datetime(df["Timestamp"].apply(lambda t: str(t)), unit="s")
+    dfg = df.groupby(Grouper(freq="1D", key="Datetime"))
+
+    # have to sort on index because of unstable sort on values xref GH9212
+    result = dfg["Food"].value_counts().sort_index()
+    expected = dfg["Food"].apply(Series.value_counts).sort_index()
+    expected.index.names = result.index.names
+
+    tm.assert_series_equal(result, expected)