diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index 926e191c96754..497ee2a8dbb4d 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -106,6 +106,7 @@ Performance Improvements Bug Fixes ~~~~~~~~~ +- Bug in ``GroupBy.size`` when data-frame is empty. (:issue:`11699`) - Bug in ``.loc`` against ``CategoricalIndex`` may result in normal ``Index`` (:issue:`11586`) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 456fedb272e18..462ead70c9f93 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -1095,7 +1095,8 @@ def value_counts(self, dropna=True): ix, clean = np.arange(ncat), mask.all() if dropna or clean: - count = bincount(code if clean else code[mask], minlength=ncat) + obs = code if clean else code[mask] + count = bincount(obs, minlength=ncat or None) else: count = bincount(np.where(mask, code, ncat)) ix = np.append(ix, -1) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index e9aa9066b75a5..584b946d47618 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1439,7 +1439,7 @@ def size(self): """ ids, _, ngroup = self.group_info ids = com._ensure_platform_int(ids) - out = np.bincount(ids[ids != -1], minlength=ngroup) + out = np.bincount(ids[ids != -1], minlength=ngroup or None) return Series(out, index=self.result_index, dtype='int64') @cache_readonly @@ -2822,7 +2822,7 @@ def count(self): mask = (ids != -1) & ~isnull(val) ids = com._ensure_platform_int(ids) - out = np.bincount(ids[mask], minlength=ngroups) if ngroups != 0 else [] + out = np.bincount(ids[mask], minlength=ngroups or None) return Series(out, index=self.grouper.result_index, name=self.name, dtype='int64') diff --git a/pandas/core/series.py b/pandas/core/series.py index e603c6aa75d6f..f3e059b3d6e98 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1142,7 +1142,8 @@ def count(self, level=None): lab[mask] = cnt = len(lev) lev = lev.insert(cnt, _get_na_value(lev.dtype.type)) - out = np.bincount(lab[notnull(self.values)], minlength=len(lev)) + obs = lab[notnull(self.values)] + out = np.bincount(obs, minlength=len(lev) or None) return self._constructor(out, index=lev, dtype='int64').__finalize__(self) def mode(self): diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 025ed17194979..c3b9aee57c0de 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -2525,6 +2525,11 @@ def test_size(self): right = df.groupby(key, sort=sort)['c'].apply(lambda a: a.shape[0]) assert_series_equal(left, right, check_names=False) + # GH11699 + df = DataFrame([], columns=['A', 'B']) + out = Series([], dtype='int64', index=Index([], name='A')) + assert_series_equal(df.groupby('A').size(), out) + def test_count(self): from string import ascii_lowercase n = 1 << 15