diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index da712f84eb1b5..5f6483ede5544 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -381,6 +381,7 @@ Groupby/Resample/Rolling - Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.nunique` in which the names of column levels were lost (:issue:`23222`) - Bug in :func:`pandas.core.groupby.GroupBy.agg` when applying a aggregation function to timezone aware data (:issue:`23683`) - Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` where timezone information would be dropped (:issue:`21603`) +- Bug in :func:`pandas.core.groupby.GroupBy.size` when grouping only NA values (:issue:`23050`) - Bug in :func:`Series.groupby` where using ``groupby`` with a :class:`MultiIndex` Series with a list of labels equal to the length of the series caused incorrect grouping (:issue:`25704`) - Ensured that ordering of outputs in ``groupby`` aggregation functions is consistent across all versions of Python (:issue:`25692`) - Ensured that result group order is correct when grouping on an ordered ``Categorical`` and specifying ``observed=True`` (:issue:`25871`, :issue:`25167`) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index ec22548de6da3..8a6ec285cc79e 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -246,7 +246,7 @@ def size(self): if ngroup: out = np.bincount(ids[ids != -1], minlength=ngroup) else: - out = ids + out = [] return Series(out, index=self.result_index, dtype='int64') diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 187fea5403aea..7ca483e12b332 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -1090,6 +1090,15 @@ def test_size(df): tm.assert_series_equal(df.groupby('A').size(), out) +def test_size_groupby_all_null(): + # GH23050 + # Assert no 'Value Error : Length of passed values is 2, index implies 0' + df = DataFrame({'A': [None, None]}) # all-null groups + result = df.groupby('A').size() + expected = Series(dtype='int64', index=Index([], name='A')) + tm.assert_series_equal(result, expected) + + # quantile # -------------------------------- @pytest.mark.parametrize("interpolation", [