From 2fe943a22580e40dc071304257bb87e253568d4b Mon Sep 17 00:00:00 2001 From: tp Date: Tue, 17 Jul 2018 23:33:52 +0100 Subject: [PATCH 1/2] fix bug where np.bincount default arg minlength must be None for np<1.13 --- doc/source/whatsnew/v0.23.4.txt | 1 + pandas/core/groupby/generic.py | 6 +++++- pandas/tests/groupby/test_counting.py | 10 ++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.4.txt b/doc/source/whatsnew/v0.23.4.txt index 69525aaea1d62..f1aed02ba5183 100644 --- a/doc/source/whatsnew/v0.23.4.txt +++ b/doc/source/whatsnew/v0.23.4.txt @@ -32,6 +32,7 @@ Bug Fixes - Bug where calling :func:`DataFrameGroupBy.agg` with a list of functions including ``ohlc`` as the non-initial element would raise a ``ValueError`` (:issue:`21716`) - Bug in ``roll_quantile`` caused a memory leak when calling ``.rolling(...).quantile(q)`` with ``q`` in (0,1) (:issue:`21965`) +- Bug in :func:`pandas.core.groupby.SeriesGroupBy.count` when using numpy < 1.13 and ngroups=0 (:issue:`21956`). - **Conversion** diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index fdededc325b03..2448777956246 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -46,6 +46,7 @@ from pandas.core.index import Index, MultiIndex, CategoricalIndex from pandas.core.arrays.categorical import Categorical from pandas.core.internals import BlockManager, make_block +from pandas.compat.numpy import _np_version_under1p13 from pandas.plotting._core import boxplot_frame_groupby @@ -1207,7 +1208,10 @@ def count(self): mask = (ids != -1) & ~isna(val) ids = ensure_platform_int(ids) - out = np.bincount(ids[mask], minlength=ngroups or 0) + minlength = ngroups or 0 + if _np_version_under1p13 and minlength == 0: + minlength = None + out = np.bincount(ids[mask], minlength=minlength) return Series(out, index=self.grouper.result_index, diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py index 787d99086873e..a14b6ff014f37 100644 --- a/pandas/tests/groupby/test_counting.py +++ b/pandas/tests/groupby/test_counting.py @@ -212,3 +212,13 @@ def test_count_with_datetimelike(self, datetimelike): expected = DataFrame({'y': [2, 1]}, index=['a', 'b']) expected.index.name = "x" assert_frame_equal(expected, res) + + def test_count_with_only_nans_in_first_group(self): + # GH21956 + df = DataFrame({'A': [np.nan, np.nan], 'B': ['a', 'b'], 'C': [1, 2]}) + result = df.groupby(['A', 'B']).C.count() + mi = MultiIndex(levels=[[], ['a', 'b']], + labels=[[], []], + names=['A', 'B']) + expected = Series([], index=mi, dtype=np.int64, name='C') + assert_series_equal(result, expected, check_index_type=False) From 32a5fcfda0d658fcad6c2e0c08b32670001949a4 Mon Sep 17 00:00:00 2001 From: tp Date: Wed, 18 Jul 2018 00:30:50 +0100 Subject: [PATCH 2/2] changes according to comments --- doc/source/whatsnew/v0.23.4.txt | 1 - doc/source/whatsnew/v0.24.0.txt | 6 +++--- pandas/core/groupby/generic.py | 6 +----- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.23.4.txt b/doc/source/whatsnew/v0.23.4.txt index f1aed02ba5183..69525aaea1d62 100644 --- a/doc/source/whatsnew/v0.23.4.txt +++ b/doc/source/whatsnew/v0.23.4.txt @@ -32,7 +32,6 @@ Bug Fixes - Bug where calling :func:`DataFrameGroupBy.agg` with a list of functions including ``ohlc`` as the non-initial element would raise a ``ValueError`` (:issue:`21716`) - Bug in ``roll_quantile`` caused a memory leak when calling ``.rolling(...).quantile(q)`` with ``q`` in (0,1) (:issue:`21965`) -- Bug in :func:`pandas.core.groupby.SeriesGroupBy.count` when using numpy < 1.13 and ngroups=0 (:issue:`21956`). - **Conversion** diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 1ac6d075946dd..37c7e9267b39a 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -536,11 +536,11 @@ Groupby/Resample/Rolling - Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` with ``as_index=False`` leading to the loss of timezone information (:issue:`15884`) - Bug in :meth:`DatetimeIndex.resample` when downsampling across a DST boundary (:issue:`8531`) -- -- - +- Bug where ``ValueError`` is wrongly raised when calling :func:`~pandas.core.groupby.SeriesGroupBy.count` method of a + ``SeriesGroupBy`` when the grouping variable only contains NaNs and numpy version < 1.13 (:issue:`21956`). - Multiple bugs in :func:`pandas.core.Rolling.min` with ``closed='left'` and a datetime-like index leading to incorrect results and also segfault. (:issue:`21704`) +- Sparse ^^^^^^ diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 2448777956246..4c87f6122b956 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -46,7 +46,6 @@ from pandas.core.index import Index, MultiIndex, CategoricalIndex from pandas.core.arrays.categorical import Categorical from pandas.core.internals import BlockManager, make_block -from pandas.compat.numpy import _np_version_under1p13 from pandas.plotting._core import boxplot_frame_groupby @@ -1208,10 +1207,7 @@ def count(self): mask = (ids != -1) & ~isna(val) ids = ensure_platform_int(ids) - minlength = ngroups or 0 - if _np_version_under1p13 and minlength == 0: - minlength = None - out = np.bincount(ids[mask], minlength=minlength) + out = np.bincount(ids[mask], minlength=ngroups or None) return Series(out, index=self.grouper.result_index,