From 14eb32567cc89891e7e5c3e91a1fc3c0e1b1da34 Mon Sep 17 00:00:00 2001 From: Alex Watt Date: Sat, 27 Apr 2019 18:28:23 -0400 Subject: [PATCH 01/16] BUG: Fix #10355, std() groupby calculation --- pandas/core/groupby/groupby.py | 5 +++-- pandas/tests/groupby/test_groupby.py | 25 +++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index dc0d44ef9ef5c..3cff02e424504 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1187,8 +1187,9 @@ def std(self, ddof=1, *args, **kwargs): """ # TODO: implement at Cython level? - nv.validate_groupby_func('std', args, kwargs) - return np.sqrt(self.var(ddof=ddof, **kwargs)) + with _group_selection_context(self): + f = lambda x: x.std(axis=self.axis, **kwargs) + return self._python_agg_general(f) @Substitution(name='groupby') @Appender(_common_see_also) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 4481f1fbb2a03..8971b01034daf 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -26,6 +26,31 @@ def test_repr(): assert result == expected +def test_groupby_std(): + # GH10355 + df = pd.DataFrame({ + 'a': [1, 1, 1, 2, 2, 2, 3, 3, 3], + 'b': [1, 2, 3, 3, 5, 7, 7, 8, 9], + }) + result = df.groupby('a', as_index=False).std() + expected = pd.DataFrame({ + 'a': [1, 2, 3], + 'b': [1, 2, 1] + }) + assert_frame_equal(result, expected) + + df = pd.DataFrame({ + 'a': [1, 1, 1, 2, 2, 2, 3, 3, 3], + 'b': [1, 2, 3, 3, 5, 7, 7, 8, 9], + }) + result = df.groupby('a', as_index=True).std() + expected = pd.DataFrame({ + 'a': [1, 2, 3], + 'b': [1, 2, 1] + }).set_index('a') + assert_frame_equal(result, expected) + + @pytest.mark.parametrize('dtype', ['int64', 'int32', 'float64', 'float32']) def test_basic(dtype): From 281ae55bb40946181377843aa73561e16c6e290f Mon Sep 17 00:00:00 2001 From: Alex Watt Date: Sat, 27 Apr 2019 18:37:45 -0400 Subject: [PATCH 02/16] Add whatsnew note --- doc/source/whatsnew/v0.25.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 2784b9299e447..3e60b7ecf8550 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -399,6 +399,7 @@ Groupby/Resample/Rolling - Bug in :meth:`pandas.core.groupby.GroupBy.idxmax` and :meth:`pandas.core.groupby.GroupBy.idxmin` with datetime column would return incorrect dtype (:issue:`25444`, :issue:`15306`) - Bug in :meth:`pandas.core.groupby.GroupBy.cumsum`, :meth:`pandas.core.groupby.GroupBy.cumprod`, :meth:`pandas.core.groupby.GroupBy.cummin` and :meth:`pandas.core.groupby.GroupBy.cummax` with categorical column having absent categories, would return incorrect result or segfault (:issue:`16771`) - Bug in :meth:`pandas.core.groupby.GroupBy.nth` where NA values in the grouping would return incorrect results (:issue:`26011`) +- Bug in :meth:`pandas.core.groupby.GroupBy.std` that computed standard deviation without respecting groupby context when `as_index=False` (:issue:`10355`) Reshaping From 0b457efb757bcc334d1f037a0aca78084a5f0c0a Mon Sep 17 00:00:00 2001 From: Alex Watt Date: Sat, 27 Apr 2019 19:46:18 -0400 Subject: [PATCH 03/16] Pass ddof to std, remove axis --- pandas/core/groupby/groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 3cff02e424504..3419ca43c8764 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1188,7 +1188,7 @@ def std(self, ddof=1, *args, **kwargs): # TODO: implement at Cython level? with _group_selection_context(self): - f = lambda x: x.std(axis=self.axis, **kwargs) + f = lambda x: x.std(ddof=ddof, **kwargs) return self._python_agg_general(f) @Substitution(name='groupby') From 17b5aaf753bcc797091b2bd2564207384ba814ce Mon Sep 17 00:00:00 2001 From: Alex Watt Date: Sat, 27 Apr 2019 19:57:12 -0400 Subject: [PATCH 04/16] Add back validation --- pandas/core/groupby/groupby.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 3419ca43c8764..0e09a16747b18 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1187,6 +1187,7 @@ def std(self, ddof=1, *args, **kwargs): """ # TODO: implement at Cython level? + nv.validate_groupby_func('std', args, kwargs) with _group_selection_context(self): f = lambda x: x.std(ddof=ddof, **kwargs) return self._python_agg_general(f) From 85b1639dd67c5e86fb5217d61fbea3a989b0868c Mon Sep 17 00:00:00 2001 From: Alex Watt Date: Sat, 27 Apr 2019 20:55:20 -0400 Subject: [PATCH 05/16] Handle IndexError in _python_agg_general --- pandas/core/groupby/groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 0e09a16747b18..bc564488151f9 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -867,7 +867,7 @@ def _python_agg_general(self, func, *args, **kwargs): try: result, counts = self.grouper.agg_series(obj, f) output[name] = self._try_cast(result, obj, numeric_only=True) - except TypeError: + except (IndexError, TypeError): continue if len(output) == 0: From a0c3c3eda592f51652e412427483b4f721530432 Mon Sep 17 00:00:00 2001 From: Alex Watt Date: Sun, 28 Apr 2019 13:42:01 -0400 Subject: [PATCH 06/16] Make requested changes to tests --- pandas/tests/groupby/test_function.py | 19 +++++++++++++++++++ pandas/tests/groupby/test_groupby.py | 25 ------------------------- 2 files changed, 19 insertions(+), 25 deletions(-) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 3d9bfcd126377..fbda7d53ea93b 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -490,6 +490,25 @@ def test_ops_general(): raise +@pytest.mark.parametrize('as_index', [True, False]) +def test_groupby_std(as_index): + # GH 10355: Test that std does not affect the groupby column + df = pd.DataFrame({ + 'a': [1, 1, 1, 2, 2, 2, 3, 3, 3], + 'b': [1, 2, 3, 3, 5, 7, 7, 8, 9], + }) + result = df.groupby('a', as_index=as_index).std() + expected = pd.DataFrame({ + 'a': [1, 2, 3], + 'b': [1, 2, 1] + }) + + if as_index: + expected = expected.set_index('a') + + tm.assert_frame_equal(result, expected) + + def test_max_nan_bug(): raw = """,Date,app,File -04-23,2013-04-23 00:00:00,,log080001.log diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 8971b01034daf..4481f1fbb2a03 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -26,31 +26,6 @@ def test_repr(): assert result == expected -def test_groupby_std(): - # GH10355 - df = pd.DataFrame({ - 'a': [1, 1, 1, 2, 2, 2, 3, 3, 3], - 'b': [1, 2, 3, 3, 5, 7, 7, 8, 9], - }) - result = df.groupby('a', as_index=False).std() - expected = pd.DataFrame({ - 'a': [1, 2, 3], - 'b': [1, 2, 1] - }) - assert_frame_equal(result, expected) - - df = pd.DataFrame({ - 'a': [1, 1, 1, 2, 2, 2, 3, 3, 3], - 'b': [1, 2, 3, 3, 5, 7, 7, 8, 9], - }) - result = df.groupby('a', as_index=True).std() - expected = pd.DataFrame({ - 'a': [1, 2, 3], - 'b': [1, 2, 1] - }).set_index('a') - assert_frame_equal(result, expected) - - @pytest.mark.parametrize('dtype', ['int64', 'int32', 'float64', 'float32']) def test_basic(dtype): From 575a9cad4838b569caee94efb8896093172d1f01 Mon Sep 17 00:00:00 2001 From: Alex Watt Date: Sun, 28 Apr 2019 13:49:24 -0400 Subject: [PATCH 07/16] Add cython version of groupby std --- pandas/core/groupby/groupby.py | 14 ++++++++++---- pandas/core/groupby/ops.py | 4 ++++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index bc564488151f9..418fdadad1263 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1185,12 +1185,18 @@ def std(self, ddof=1, *args, **kwargs): ddof : integer, default 1 degrees of freedom """ - - # TODO: implement at Cython level? nv.validate_groupby_func('std', args, kwargs) - with _group_selection_context(self): + if ddof == 1: + try: + return self._cython_agg_general('std', **kwargs) + except Exception: + f = lambda x: x.std(ddof=ddof, **kwargs) + with _group_selection_context(self): + return self._python_agg_general(f) + else: f = lambda x: x.std(ddof=ddof, **kwargs) - return self._python_agg_general(f) + with _group_selection_context(self): + return self._python_agg_general(f) @Substitution(name='groupby') @Appender(_common_see_also) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index e6b7577d97bad..559804ea438ce 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -346,6 +346,10 @@ def get_group_levels(self): 'name': 'group_median' }, 'var': 'group_var', + 'std': { + 'name' : 'group_var_bin', + 'f' : lambda func, a: np.sqrt(func(a)), + }, 'first': { 'name': 'group_nth', 'f': lambda func, a, b, c, d, e: func(a, b, c, d, 1, -1) From ef89d648f31ef1ba32d95564b83f20299ba9393c Mon Sep 17 00:00:00 2001 From: Alex Watt Date: Sun, 28 Apr 2019 13:59:02 -0400 Subject: [PATCH 08/16] PEP8 fix --- pandas/core/groupby/ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 559804ea438ce..a2e86a490f0ec 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -347,8 +347,8 @@ def get_group_levels(self): }, 'var': 'group_var', 'std': { - 'name' : 'group_var_bin', - 'f' : lambda func, a: np.sqrt(func(a)), + 'name': 'group_var_bin', + 'f': lambda func, a: np.sqrt(func(a)), }, 'first': { 'name': 'group_nth', From 62ad090d5c1c31617550d2f9a543878e67b2a2b6 Mon Sep 17 00:00:00 2001 From: Alex Watt Date: Sun, 28 Apr 2019 14:06:02 -0400 Subject: [PATCH 09/16] Refactor std and var to reduce complexity --- pandas/core/groupby/groupby.py | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 418fdadad1263..536d656251d1f 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1190,13 +1190,11 @@ def std(self, ddof=1, *args, **kwargs): try: return self._cython_agg_general('std', **kwargs) except Exception: - f = lambda x: x.std(ddof=ddof, **kwargs) - with _group_selection_context(self): - return self._python_agg_general(f) - else: - f = lambda x: x.std(ddof=ddof, **kwargs) - with _group_selection_context(self): - return self._python_agg_general(f) + pass + + f = lambda x: x.std(ddof=ddof, **kwargs) + with _group_selection_context(self): + return self._python_agg_general(f) @Substitution(name='groupby') @Appender(_common_see_also) @@ -1216,13 +1214,11 @@ def var(self, ddof=1, *args, **kwargs): try: return self._cython_agg_general('var', **kwargs) except Exception: - f = lambda x: x.var(ddof=ddof, **kwargs) - with _group_selection_context(self): - return self._python_agg_general(f) - else: - f = lambda x: x.var(ddof=ddof, **kwargs) - with _group_selection_context(self): - return self._python_agg_general(f) + pass + + f = lambda x: x.var(ddof=ddof, **kwargs) + with _group_selection_context(self): + return self._python_agg_general(f) @Substitution(name='groupby') @Appender(_common_see_also) From 12b49c4b663bd16f0fba9f68bf70bf2a75259462 Mon Sep 17 00:00:00 2001 From: Alex Watt Date: Sun, 28 Apr 2019 15:46:23 -0400 Subject: [PATCH 10/16] Don't cactch IndexError --- pandas/core/groupby/groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 536d656251d1f..d4168b643adf1 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -867,7 +867,7 @@ def _python_agg_general(self, func, *args, **kwargs): try: result, counts = self.grouper.agg_series(obj, f) output[name] = self._try_cast(result, obj, numeric_only=True) - except (IndexError, TypeError): + except TypeError: continue if len(output) == 0: From 8c52aba0afa1030385853d9f9fd8eb86571f5673 Mon Sep 17 00:00:00 2001 From: Alex Watt Date: Mon, 29 Apr 2019 23:37:24 -0400 Subject: [PATCH 11/16] Begin updating test_regression_whitelist_methods --- pandas/tests/groupby/test_whitelist.py | 38 +++++++++++++++----------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/pandas/tests/groupby/test_whitelist.py b/pandas/tests/groupby/test_whitelist.py index 2bd2f3fb00b56..beb307b8d27b5 100644 --- a/pandas/tests/groupby/test_whitelist.py +++ b/pandas/tests/groupby/test_whitelist.py @@ -8,7 +8,7 @@ import numpy as np import pytest -from pandas import DataFrame, Index, MultiIndex, Series, date_range +from pandas import DataFrame, Index, MultiIndex, RangeIndex, Series, date_range from pandas.util import testing as tm AGG_FUNCTIONS = ['sum', 'prod', 'min', 'max', 'median', 'mean', 'skew', @@ -164,33 +164,39 @@ def raw_frame(): @pytest.mark.parametrize('axis', [0, 1]) @pytest.mark.parametrize('skipna', [True, False]) @pytest.mark.parametrize('sort', [True, False]) +@pytest.mark.parametrize('as_index', [True, False]) def test_regression_whitelist_methods( raw_frame, op, level, - axis, skipna, sort): + axis, skipna, sort, as_index): # GH6944 # GH 17537 # explicitly test the whitelist methods + if not as_index and axis == 1: + pytest.skip('as_index=False only valid for axis=0') + if axis == 0: frame = raw_frame else: frame = raw_frame.T + groupby_kwargs = {'level': level, 'axis': axis, 'sort': sort} #, 'as_index': as_index} + group_op_kwargs = {} + frame_op_kwargs = {'level': level, 'axis': axis} if op in AGG_FUNCTIONS_WITH_SKIPNA: - grouped = frame.groupby(level=level, axis=axis, sort=sort) - result = getattr(grouped, op)(skipna=skipna) - expected = getattr(frame, op)(level=level, axis=axis, - skipna=skipna) - if sort: - expected = expected.sort_index(axis=axis, level=level) - tm.assert_frame_equal(result, expected) - else: - grouped = frame.groupby(level=level, axis=axis, sort=sort) - result = getattr(grouped, op)() - expected = getattr(frame, op)(level=level, axis=axis) - if sort: - expected = expected.sort_index(axis=axis, level=level) - tm.assert_frame_equal(result, expected) + group_op_kwargs['skipna'] = skipna + frame_op_kwargs['skipna'] = skipna + + grouped = frame.groupby(**groupby_kwargs) + result = getattr(grouped, op)(**group_op_kwargs) + expected = getattr(frame, op)(**frame_op_kwargs) + if as_index: + pass + + if sort: + expected = expected.sort_index(axis=axis, level=level) + + tm.assert_frame_equal(result, expected) def test_groupby_blacklist(df_letters): From 34382dbfd5b9e279ea2be49029a88a5da4d6fb84 Mon Sep 17 00:00:00 2001 From: Alex Watt Date: Mon, 6 May 2019 21:14:10 -0400 Subject: [PATCH 12/16] Update test_regression_whitelist_methods, remove specialized test --- pandas/tests/groupby/test_function.py | 19 ------------------- pandas/tests/groupby/test_whitelist.py | 11 ++++++++--- 2 files changed, 8 insertions(+), 22 deletions(-) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index fbda7d53ea93b..3d9bfcd126377 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -490,25 +490,6 @@ def test_ops_general(): raise -@pytest.mark.parametrize('as_index', [True, False]) -def test_groupby_std(as_index): - # GH 10355: Test that std does not affect the groupby column - df = pd.DataFrame({ - 'a': [1, 1, 1, 2, 2, 2, 3, 3, 3], - 'b': [1, 2, 3, 3, 5, 7, 7, 8, 9], - }) - result = df.groupby('a', as_index=as_index).std() - expected = pd.DataFrame({ - 'a': [1, 2, 3], - 'b': [1, 2, 1] - }) - - if as_index: - expected = expected.set_index('a') - - tm.assert_frame_equal(result, expected) - - def test_max_nan_bug(): raw = """,Date,app,File -04-23,2013-04-23 00:00:00,,log080001.log diff --git a/pandas/tests/groupby/test_whitelist.py b/pandas/tests/groupby/test_whitelist.py index beb307b8d27b5..80f2ac8f4c26a 100644 --- a/pandas/tests/groupby/test_whitelist.py +++ b/pandas/tests/groupby/test_whitelist.py @@ -180,7 +180,7 @@ def test_regression_whitelist_methods( else: frame = raw_frame.T - groupby_kwargs = {'level': level, 'axis': axis, 'sort': sort} #, 'as_index': as_index} + groupby_kwargs = {'level': level, 'axis': axis, 'sort': sort, 'as_index': as_index} group_op_kwargs = {} frame_op_kwargs = {'level': level, 'axis': axis} if op in AGG_FUNCTIONS_WITH_SKIPNA: @@ -190,12 +190,17 @@ def test_regression_whitelist_methods( grouped = frame.groupby(**groupby_kwargs) result = getattr(grouped, op)(**group_op_kwargs) expected = getattr(frame, op)(**frame_op_kwargs) - if as_index: - pass if sort: expected = expected.sort_index(axis=axis, level=level) + if not as_index: + expected = expected.reset_index() + if level == 0: + expected = expected.drop(columns=['first']) + if level == 1: + expected = expected.drop(columns=['second']) + tm.assert_frame_equal(result, expected) From 42ad605cfff3655bdb5fec198ee3cf49c6886ee0 Mon Sep 17 00:00:00 2001 From: Alex Watt Date: Mon, 6 May 2019 21:35:59 -0400 Subject: [PATCH 13/16] Resolve PEP8 issue --- pandas/tests/groupby/test_whitelist.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_whitelist.py b/pandas/tests/groupby/test_whitelist.py index 80f2ac8f4c26a..50526c5541a37 100644 --- a/pandas/tests/groupby/test_whitelist.py +++ b/pandas/tests/groupby/test_whitelist.py @@ -8,7 +8,7 @@ import numpy as np import pytest -from pandas import DataFrame, Index, MultiIndex, RangeIndex, Series, date_range +from pandas import DataFrame, Index, MultiIndex, Series, date_range from pandas.util import testing as tm AGG_FUNCTIONS = ['sum', 'prod', 'min', 'max', 'median', 'mean', 'skew', @@ -180,7 +180,8 @@ def test_regression_whitelist_methods( else: frame = raw_frame.T - groupby_kwargs = {'level': level, 'axis': axis, 'sort': sort, 'as_index': as_index} + groupby_kwargs = {'level': level, 'axis': axis, 'sort': sort, + 'as_index': as_index} group_op_kwargs = {} frame_op_kwargs = {'level': level, 'axis': axis} if op in AGG_FUNCTIONS_WITH_SKIPNA: From 71e1fb8229c04a4216bce37d66e4dec63581205a Mon Sep 17 00:00:00 2001 From: Alex Watt Date: Mon, 6 May 2019 22:47:41 -0400 Subject: [PATCH 14/16] Make test code clearer --- pandas/tests/groupby/test_whitelist.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/pandas/tests/groupby/test_whitelist.py b/pandas/tests/groupby/test_whitelist.py index 50526c5541a37..90748c0fd865c 100644 --- a/pandas/tests/groupby/test_whitelist.py +++ b/pandas/tests/groupby/test_whitelist.py @@ -180,17 +180,15 @@ def test_regression_whitelist_methods( else: frame = raw_frame.T - groupby_kwargs = {'level': level, 'axis': axis, 'sort': sort, - 'as_index': as_index} - group_op_kwargs = {} - frame_op_kwargs = {'level': level, 'axis': axis} - if op in AGG_FUNCTIONS_WITH_SKIPNA: - group_op_kwargs['skipna'] = skipna - frame_op_kwargs['skipna'] = skipna + grouped = frame.groupby(level=level, axis=axis, sort=sort, + as_index=as_index) - grouped = frame.groupby(**groupby_kwargs) - result = getattr(grouped, op)(**group_op_kwargs) - expected = getattr(frame, op)(**frame_op_kwargs) + if op in AGG_FUNCTIONS_WITH_SKIPNA: + result = getattr(grouped, op)(skipna=skipna) + expected = getattr(frame, op)(level=level, axis=axis, skipna=skipna) + else: + result = getattr(grouped, op)() + expected = getattr(frame, op)(level=level, axis=axis) if sort: expected = expected.sort_index(axis=axis, level=level) From ee5ba0001f6a60d8735743f080185bf46d8f789e Mon Sep 17 00:00:00 2001 From: Alex Watt Date: Thu, 16 May 2019 23:57:28 -0400 Subject: [PATCH 15/16] Switch to fixture for axis, add appropriate pytest.skips --- pandas/tests/groupby/test_whitelist.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/tests/groupby/test_whitelist.py b/pandas/tests/groupby/test_whitelist.py index 90748c0fd865c..ed3915ffddb43 100644 --- a/pandas/tests/groupby/test_whitelist.py +++ b/pandas/tests/groupby/test_whitelist.py @@ -161,7 +161,6 @@ def raw_frame(): @pytest.mark.parametrize('op', AGG_FUNCTIONS) @pytest.mark.parametrize('level', [0, 1]) -@pytest.mark.parametrize('axis', [0, 1]) @pytest.mark.parametrize('skipna', [True, False]) @pytest.mark.parametrize('sort', [True, False]) @pytest.mark.parametrize('as_index', [True, False]) @@ -172,14 +171,17 @@ def test_regression_whitelist_methods( # GH 17537 # explicitly test the whitelist methods - if not as_index and axis == 1: + if not as_index and axis not in [0, 'index']: pytest.skip('as_index=False only valid for axis=0') - if axis == 0: + if axis in [0, 'index']: frame = raw_frame else: frame = raw_frame.T + if not isinstance(frame.index, MultiIndex) and (level > 0 or level < -1): + pytest.skip('level > 0 or level < -1 only valid with MultiIndex') + grouped = frame.groupby(level=level, axis=axis, sort=sort, as_index=as_index) From 83847b6c4441b3598799f9ffb1f659cb7acb636f Mon Sep 17 00:00:00 2001 From: Alex Watt Date: Fri, 17 May 2019 09:54:28 -0400 Subject: [PATCH 16/16] Handle case of no bins in reduction.pyx --- pandas/_libs/reduction.pyx | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 6c69f7669bee5..c491927711059 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -238,12 +238,15 @@ cdef class SeriesBinGrouper: counts = np.zeros(self.ngroups, dtype=np.int64) if self.ngroups > 0: - counts[0] = self.bins[0] - for i in range(1, self.ngroups): - if i == self.ngroups - 1: - counts[i] = len(self.arr) - self.bins[i - 1] - else: - counts[i] = self.bins[i] - self.bins[i - 1] + if len(self.bins) == 0: + return np.empty(0, dtype='O'), counts + else: + counts[0] = self.bins[0] + for i in range(1, self.ngroups): + if i == self.ngroups - 1: + counts[i] = len(self.arr) - self.bins[i - 1] + else: + counts[i] = self.bins[i] - self.bins[i - 1] group_size = 0 n = len(self.arr)