From b709389786bd7a7a5907159d554c4424166a744c Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Thu, 19 Sep 2013 14:58:57 -0400 Subject: [PATCH] API: add whitelist for methods that make sense GroupBy objects --- doc/source/release.rst | 2 ++ pandas/core/groupby.py | 20 ++++++++++++++++++- pandas/tests/test_groupby.py | 37 ++++++++++++++++++++++++++++++++++-- 3 files changed, 56 insertions(+), 3 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 74e54526cfe9a..e49812b207921 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -243,6 +243,8 @@ API Changes - Remove deprecated ``Factor`` (:issue:`3650`) - Remove deprecated ``set_printoptions/reset_printoptions`` (:issue:``3046``) - Remove deprecated ``_verbose_info`` (:issue:`3215`) + - Begin removing methods that don't make sense on ``GroupBy`` objects + (:issue:`4887`). Internal Refactoring ~~~~~~~~~~~~~~~~~~~~ diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 186277777abe8..2e07662bffbfe 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -45,6 +45,16 @@ """ +_apply_whitelist = frozenset(['last', 'first', + 'mean', 'sum', 'min', 'max', + 'head', 'tail', + 'cumsum', 'cumprod', 'cummin', 'cummax', + 'resample', + 'describe', + 'rank', 'quantile', 'count', + 'fillna', 'dtype']) + + class GroupByError(Exception): pass @@ -241,13 +251,21 @@ def __getattr__(self, attr): if hasattr(self.obj, attr) and attr != '_cache': return self._make_wrapper(attr) - raise AttributeError("'%s' object has no attribute '%s'" % + raise AttributeError("%r object has no attribute %r" % (type(self).__name__, attr)) def __getitem__(self, key): raise NotImplementedError def _make_wrapper(self, name): + if name not in _apply_whitelist: + is_callable = callable(getattr(self.obj, name, None)) + kind = ' callable ' if is_callable else ' ' + msg = ("Cannot access{0}attribute {1!r} of {2!r} objects, try " + "using the 'apply' method".format(kind, name, + type(self).__name__)) + raise AttributeError(msg) + f = getattr(self.obj, name) if not isinstance(f, types.MethodType): return self.apply(lambda self: getattr(self, name)) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 02eb4015c133f..46ab0fe022e78 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -9,7 +9,8 @@ from pandas.core.index import Index, MultiIndex from pandas.core.common import rands from pandas.core.api import Categorical, DataFrame -from pandas.core.groupby import GroupByError, SpecificationError, DataError +from pandas.core.groupby import (GroupByError, SpecificationError, DataError, + _apply_whitelist) from pandas.core.series import Series from pandas.util.testing import (assert_panel_equal, assert_frame_equal, assert_series_equal, assert_almost_equal, @@ -2696,8 +2697,40 @@ def test_filter_against_workaround(self): new_way = grouped.filter(lambda x: x['ints'].mean() > N/20) assert_frame_equal(new_way.sort_index(), old_way.sort_index()) + def test_groupby_whitelist(self): + from string import ascii_lowercase + letters = np.array(list(ascii_lowercase)) + N = 10 + random_letters = letters.take(np.random.randint(0, 26, N)) + df = DataFrame({'floats': N / 10 * Series(np.random.random(N)), + 'letters': Series(random_letters)}) + s = df.floats + + blacklist = ['eval', 'query', 'abs', 'shift', 'tshift', 'where', + 'mask', 'align', 'groupby', 'clip', 'astype', + 'at', 'combine', 'consolidate', 'convert_objects', + 'corr', 'corr_with', 'cov'] + to_methods = [method for method in dir(df) if method.startswith('to_')] + + blacklist.extend(to_methods) + + # e.g., to_csv + defined_but_not_allowed = ("(?:^Cannot.+{0!r}.+{1!r}.+try using the " + "'apply' method$)") + + # e.g., query, eval + not_defined = "(?:^{1!r} object has no attribute {0!r}$)" + fmt = defined_but_not_allowed + '|' + not_defined + for bl in blacklist: + for obj in (df, s): + gb = obj.groupby(df.letters) + msg = fmt.format(bl, type(gb).__name__) + with tm.assertRaisesRegexp(AttributeError, msg): + getattr(gb, bl) + + def assert_fp_equal(a, b): - assert((np.abs(a - b) < 1e-12).all()) + assert (np.abs(a - b) < 1e-12).all() def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):