diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 1ac6d075946dd..37c7e9267b39a 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -536,11 +536,11 @@ Groupby/Resample/Rolling - Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` with ``as_index=False`` leading to the loss of timezone information (:issue:`15884`) - Bug in :meth:`DatetimeIndex.resample` when downsampling across a DST boundary (:issue:`8531`) -- -- - +- Bug where ``ValueError`` is wrongly raised when calling :func:`~pandas.core.groupby.SeriesGroupBy.count` method of a + ``SeriesGroupBy`` when the grouping variable only contains NaNs and numpy version < 1.13 (:issue:`21956`). - Multiple bugs in :func:`pandas.core.Rolling.min` with ``closed='left'` and a datetime-like index leading to incorrect results and also segfault. (:issue:`21704`) +- Sparse ^^^^^^ diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index fdededc325b03..4c87f6122b956 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1207,7 +1207,7 @@ def count(self): mask = (ids != -1) & ~isna(val) ids = ensure_platform_int(ids) - out = np.bincount(ids[mask], minlength=ngroups or 0) + out = np.bincount(ids[mask], minlength=ngroups or None) return Series(out, index=self.grouper.result_index, diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py index 787d99086873e..a14b6ff014f37 100644 --- a/pandas/tests/groupby/test_counting.py +++ b/pandas/tests/groupby/test_counting.py @@ -212,3 +212,13 @@ def test_count_with_datetimelike(self, datetimelike): expected = DataFrame({'y': [2, 1]}, index=['a', 'b']) expected.index.name = "x" assert_frame_equal(expected, res) + + def test_count_with_only_nans_in_first_group(self): + # GH21956 + df = DataFrame({'A': [np.nan, np.nan], 'B': ['a', 'b'], 'C': [1, 2]}) + result = df.groupby(['A', 'B']).C.count() + mi = MultiIndex(levels=[[], ['a', 'b']], + labels=[[], []], + names=['A', 'B']) + expected = Series([], index=mi, dtype=np.int64, name='C') + assert_series_equal(result, expected, check_index_type=False)