From 1d99270e659b0ceba51b22340d6d321439eccef1 Mon Sep 17 00:00:00 2001 From: Aaron Staple Date: Mon, 20 Oct 2014 20:10:47 -0700 Subject: [PATCH] Raise error in certain unimplemented aggregation cases. --- doc/source/whatsnew/v0.15.1.txt | 2 ++ pandas/core/categorical.py | 4 ++-- pandas/core/frame.py | 8 ++++---- pandas/core/generic.py | 7 +++---- pandas/core/panel.py | 8 ++++++-- pandas/core/series.py | 14 ++++++++++---- pandas/sparse/series.py | 4 ++-- pandas/tests/test_groupby.py | 6 +++--- pandas/tests/test_panel.py | 6 ++++++ pandas/tests/test_series.py | 9 +++++++++ 10 files changed, 47 insertions(+), 21 deletions(-) diff --git a/doc/source/whatsnew/v0.15.1.txt b/doc/source/whatsnew/v0.15.1.txt index dc69bd9f55752..7d5ce1952e7b2 100644 --- a/doc/source/whatsnew/v0.15.1.txt +++ b/doc/source/whatsnew/v0.15.1.txt @@ -30,6 +30,8 @@ Enhancements - Qualify memory usage in ``DataFrame.info()`` by adding ``+`` if it is a lower bound (:issue:`8578`) +- Raise errors in certain aggregation cases where an argument such as ``numeric_only`` is not handled (:issue:`8592`). + .. _whatsnew_0151.performance: diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index b35cfdcf7c8f1..d343dccacb6b8 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -1210,8 +1210,8 @@ def __setitem__(self, key, value): self._codes[key] = lindexer #### reduction ops #### - def _reduce(self, op, axis=0, skipna=True, numeric_only=None, - filter_type=None, name=None, **kwds): + def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None, + filter_type=None, **kwds): """ perform the reduction type operation """ func = getattr(self,name,None) if func is None: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d90ef76ddfa5e..c1b92147612aa 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4129,7 +4129,7 @@ def any(self, axis=None, bool_only=None, skipna=True, level=None, if level is not None: return self._agg_by_level('any', axis=axis, level=level, skipna=skipna) - return self._reduce(nanops.nanany, axis=axis, skipna=skipna, + return self._reduce(nanops.nanany, 'any', axis=axis, skipna=skipna, numeric_only=bool_only, filter_type='bool') def all(self, axis=None, bool_only=None, skipna=True, level=None, @@ -4160,11 +4160,11 @@ def all(self, axis=None, bool_only=None, skipna=True, level=None, if level is not None: return self._agg_by_level('all', axis=axis, level=level, skipna=skipna) - return self._reduce(nanops.nanall, axis=axis, skipna=skipna, + return self._reduce(nanops.nanall, 'all', axis=axis, skipna=skipna, numeric_only=bool_only, filter_type='bool') - def _reduce(self, op, axis=0, skipna=True, numeric_only=None, - filter_type=None, name=None, **kwds): + def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None, + filter_type=None, **kwds): axis = self._get_axis_number(axis) f = lambda x: op(x, axis=axis, skipna=skipna, **kwds) labels = self._get_agg_axis(axis) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 53abfe10fe8ea..71668a73d9286 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3934,9 +3934,8 @@ def stat_func(self, axis=None, skipna=None, level=None, if level is not None: return self._agg_by_level(name, axis=axis, level=level, skipna=skipna) - return self._reduce(f, axis=axis, - skipna=skipna, numeric_only=numeric_only, - name=name) + return self._reduce(f, name, axis=axis, + skipna=skipna, numeric_only=numeric_only) stat_func.__name__ = name return stat_func @@ -4005,7 +4004,7 @@ def stat_func(self, axis=None, skipna=None, level=None, ddof=1, if level is not None: return self._agg_by_level(name, axis=axis, level=level, skipna=skipna, ddof=ddof) - return self._reduce(f, axis=axis, + return self._reduce(f, name, axis=axis, skipna=skipna, ddof=ddof) stat_func.__name__ = name return stat_func diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 686a0c4f6cca4..72f9c5bd00cb7 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -1045,8 +1045,12 @@ def _apply_2d(self, func, axis): return self._construct_return_type(dict(results)) - def _reduce(self, op, axis=0, skipna=True, numeric_only=None, - filter_type=None, name=None, **kwds): + def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None, + filter_type=None, **kwds): + if numeric_only: + raise NotImplementedError( + 'Panel.{0} does not implement numeric_only.'.format(name)) + axis_name = self._get_axis_name(axis) axis_number = self._get_axis_number(axis_name) f = lambda x: op(x, axis=axis_number, skipna=skipna, **kwds) diff --git a/pandas/core/series.py b/pandas/core/series.py index 0408d62ce302c..f5d729b61e770 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2056,8 +2056,8 @@ def apply(self, func, convert_dtype=True, args=(), **kwds): return self._constructor(mapped, index=self.index).__finalize__(self) - def _reduce(self, op, axis=0, skipna=True, numeric_only=None, - filter_type=None, name=None, **kwds): + def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None, + filter_type=None, **kwds): """ perform a reduction operation @@ -2067,10 +2067,16 @@ def _reduce(self, op, axis=0, skipna=True, numeric_only=None, """ delegate = self.values if isinstance(delegate, np.ndarray): + # Validate that 'axis' is consistent with Series's single axis. + self._get_axis_number(axis) + if numeric_only: + raise NotImplementedError( + 'Series.{0} does not implement numeric_only.'.format(name)) return op(delegate, skipna=skipna, **kwds) - return delegate._reduce(op=op, axis=axis, skipna=skipna, numeric_only=numeric_only, - filter_type=filter_type, name=name, **kwds) + return delegate._reduce(op=op, name=name, axis=axis, skipna=skipna, + numeric_only=numeric_only, + filter_type=filter_type, **kwds) def _maybe_box(self, func, dropna=False): """ diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index bb428b7e4c6bb..39d286f3744e1 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -303,8 +303,8 @@ def __array_finalize__(self, obj): self.name = getattr(obj, 'name', None) self.fill_value = getattr(obj, 'fill_value', None) - def _reduce(self, op, axis=0, skipna=True, numeric_only=None, - filter_type=None, name=None, **kwds): + def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None, + filter_type=None, **kwds): """ perform a reduction operation """ return op(self.get_values(), skipna=skipna, **kwds) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 7ead8b30e8671..27171984f2873 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -1669,11 +1669,11 @@ def test_groupby_multiple_key(self): lambda x: x.month, lambda x: x.day], axis=1) - agged = grouped.agg(lambda x: x.sum(1)) + agged = grouped.agg(lambda x: x.sum()) self.assertTrue(agged.index.equals(df.columns)) assert_almost_equal(df.T.values, agged.values) - agged = grouped.agg(lambda x: x.sum(1)) + agged = grouped.agg(lambda x: x.sum()) assert_almost_equal(df.T.values, agged.values) def test_groupby_multi_corner(self): @@ -1708,7 +1708,7 @@ def test_omit_nuisance(self): # won't work with axis = 1 grouped = df.groupby({'A': 0, 'C': 0, 'D': 1, 'E': 1}, axis=1) result = self.assertRaises(TypeError, grouped.agg, - lambda x: x.sum(1, numeric_only=False)) + lambda x: x.sum(0, numeric_only=False)) def test_omit_nuisance_python_multiple(self): grouped = self.three_group.groupby(['A', 'B']) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 736cdf312b361..14e4e32acae9f 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1,6 +1,7 @@ # pylint: disable=W0612,E1101 from datetime import datetime +from inspect import getargspec import operator import nose @@ -169,6 +170,11 @@ def wrapper(x): self.assertRaises(Exception, f, axis=obj.ndim) + # Unimplemented numeric_only parameter. + if 'numeric_only' in getargspec(f).args: + self.assertRaisesRegexp(NotImplementedError, name, f, + numeric_only=True) + class SafeForSparse(object): _multiprocess_can_split_ = True diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 2d3961a643991..68590e1597bbc 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -5,6 +5,7 @@ from datetime import datetime, timedelta import operator import string +from inspect import getargspec from itertools import product, starmap from distutils.version import LooseVersion @@ -2338,6 +2339,14 @@ def testit(): exp = alternate(s) self.assertEqual(res, exp) + # Invalid axis. + self.assertRaises(ValueError, f, self.series, axis=1) + + # Unimplemented numeric_only parameter. + if 'numeric_only' in getargspec(f).args: + self.assertRaisesRegexp(NotImplementedError, name, f, + self.series, numeric_only=True) + testit() try: