From f0ce12cf553a49565b7d2879472a8eae88706cfa Mon Sep 17 00:00:00 2001 From: Adam Hooper Date: Tue, 16 Apr 2019 17:48:13 -0400 Subject: [PATCH 1/2] BUG: GroupBy.size with all-null data raises ValueError --- doc/source/whatsnew/v0.25.0.rst | 1 + pandas/core/groupby/ops.py | 2 +- pandas/tests/groupby/test_function.py | 6 ++++++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index da712f84eb1b5..5f6483ede5544 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -381,6 +381,7 @@ Groupby/Resample/Rolling - Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.nunique` in which the names of column levels were lost (:issue:`23222`) - Bug in :func:`pandas.core.groupby.GroupBy.agg` when applying a aggregation function to timezone aware data (:issue:`23683`) - Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` where timezone information would be dropped (:issue:`21603`) +- Bug in :func:`pandas.core.groupby.GroupBy.size` when grouping only NA values (:issue:`23050`) - Bug in :func:`Series.groupby` where using ``groupby`` with a :class:`MultiIndex` Series with a list of labels equal to the length of the series caused incorrect grouping (:issue:`25704`) - Ensured that ordering of outputs in ``groupby`` aggregation functions is consistent across all versions of Python (:issue:`25692`) - Ensured that result group order is correct when grouping on an ordered ``Categorical`` and specifying ``observed=True`` (:issue:`25871`, :issue:`25167`) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index ec22548de6da3..8a6ec285cc79e 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -246,7 +246,7 @@ def size(self): if ngroup: out = np.bincount(ids[ids != -1], minlength=ngroup) else: - out = ids + out = [] return Series(out, index=self.result_index, dtype='int64') diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 187fea5403aea..2909f8e840029 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -1089,6 +1089,12 @@ def test_size(df): out = Series(dtype='int64', index=Index([], name='A')) tm.assert_series_equal(df.groupby('A').size(), out) + # GH23050 + # Assert no 'Value Error : Length of passed values is 2, index implies 0' + df = DataFrame({'A': [None, None]}) # all-null groups + out = Series(dtype='int64', index=Index([], name='A')) + tm.assert_series_equal(df.groupby('A').size(), out) + # quantile # -------------------------------- From 3f9f66138c858e3ad6bf2d49624384039dcdbbec Mon Sep 17 00:00:00 2001 From: Adam Hooper Date: Tue, 16 Apr 2019 22:21:58 -0400 Subject: [PATCH 2/2] Move bug-23050 test to separate function --- pandas/tests/groupby/test_function.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 2909f8e840029..7ca483e12b332 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -1089,11 +1089,14 @@ def test_size(df): out = Series(dtype='int64', index=Index([], name='A')) tm.assert_series_equal(df.groupby('A').size(), out) + +def test_size_groupby_all_null(): # GH23050 # Assert no 'Value Error : Length of passed values is 2, index implies 0' df = DataFrame({'A': [None, None]}) # all-null groups - out = Series(dtype='int64', index=Index([], name='A')) - tm.assert_series_equal(df.groupby('A').size(), out) + result = df.groupby('A').size() + expected = Series(dtype='int64', index=Index([], name='A')) + tm.assert_series_equal(result, expected) # quantile