From b709389786bd7a7a5907159d554c4424166a744c Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Thu, 19 Sep 2013 14:58:57 -0400
Subject: [PATCH] API: add whitelist for methods that make sense GroupBy
 objects

---
 doc/source/release.rst       |  2 ++
 pandas/core/groupby.py       | 20 ++++++++++++++++++-
 pandas/tests/test_groupby.py | 37 ++++++++++++++++++++++++++++++++++--
 3 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/doc/source/release.rst b/doc/source/release.rst
index 74e54526cfe9a..e49812b207921 100644
--- a/doc/source/release.rst
+++ b/doc/source/release.rst
@@ -243,6 +243,8 @@ API Changes
   - Remove deprecated ``Factor`` (:issue:`3650`)
   - Remove deprecated ``set_printoptions/reset_printoptions`` (:issue:``3046``)
   - Remove deprecated ``_verbose_info`` (:issue:`3215`)
+  - Begin removing methods that don't make sense on ``GroupBy`` objects
+    (:issue:`4887`).
 
 Internal Refactoring
 ~~~~~~~~~~~~~~~~~~~~
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 186277777abe8..2e07662bffbfe 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -45,6 +45,16 @@
 """
 
 
+_apply_whitelist = frozenset(['last', 'first',
+                              'mean', 'sum', 'min', 'max',
+                              'head', 'tail',
+                              'cumsum', 'cumprod', 'cummin', 'cummax',
+                              'resample',
+                              'describe',
+                              'rank', 'quantile', 'count',
+                              'fillna', 'dtype'])
+
+
 class GroupByError(Exception):
     pass
 
@@ -241,13 +251,21 @@ def __getattr__(self, attr):
         if hasattr(self.obj, attr) and attr != '_cache':
             return self._make_wrapper(attr)
 
-        raise AttributeError("'%s' object has no attribute '%s'" %
+        raise AttributeError("%r object has no attribute %r" %
                              (type(self).__name__, attr))
 
     def __getitem__(self, key):
         raise NotImplementedError
 
     def _make_wrapper(self, name):
+        if name not in _apply_whitelist:
+            is_callable = callable(getattr(self.obj, name, None))
+            kind = ' callable ' if is_callable else ' '
+            msg = ("Cannot access{0}attribute {1!r} of {2!r} objects, try "
+                   "using the 'apply' method".format(kind, name,
+                                                     type(self).__name__))
+            raise AttributeError(msg)
+
         f = getattr(self.obj, name)
         if not isinstance(f, types.MethodType):
             return self.apply(lambda self: getattr(self, name))
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index 02eb4015c133f..46ab0fe022e78 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -9,7 +9,8 @@
 from pandas.core.index import Index, MultiIndex
 from pandas.core.common import rands
 from pandas.core.api import Categorical, DataFrame
-from pandas.core.groupby import GroupByError, SpecificationError, DataError
+from pandas.core.groupby import (GroupByError, SpecificationError, DataError,
+                                 _apply_whitelist)
 from pandas.core.series import Series
 from pandas.util.testing import (assert_panel_equal, assert_frame_equal,
                                  assert_series_equal, assert_almost_equal,
@@ -2696,8 +2697,40 @@ def test_filter_against_workaround(self):
         new_way = grouped.filter(lambda x: x['ints'].mean() > N/20)
         assert_frame_equal(new_way.sort_index(), old_way.sort_index())
 
+    def test_groupby_whitelist(self):
+        from string import ascii_lowercase
+        letters = np.array(list(ascii_lowercase))
+        N = 10
+        random_letters = letters.take(np.random.randint(0, 26, N))
+        df = DataFrame({'floats': N / 10 * Series(np.random.random(N)),
+                        'letters': Series(random_letters)})
+        s = df.floats
+
+        blacklist = ['eval', 'query', 'abs', 'shift', 'tshift', 'where',
+                     'mask', 'align', 'groupby', 'clip', 'astype',
+                     'at', 'combine', 'consolidate', 'convert_objects',
+                     'corr', 'corr_with', 'cov']
+        to_methods = [method for method in dir(df) if method.startswith('to_')]
+
+        blacklist.extend(to_methods)
+
+        # e.g., to_csv
+        defined_but_not_allowed = ("(?:^Cannot.+{0!r}.+{1!r}.+try using the "
+                                   "'apply' method$)")
+
+        # e.g., query, eval
+        not_defined = "(?:^{1!r} object has no attribute {0!r}$)"
+        fmt = defined_but_not_allowed + '|' + not_defined
+        for bl in blacklist:
+            for obj in (df, s):
+                gb = obj.groupby(df.letters)
+                msg = fmt.format(bl, type(gb).__name__)
+                with tm.assertRaisesRegexp(AttributeError, msg):
+                    getattr(gb, bl)
+
+
 def assert_fp_equal(a, b):
-    assert((np.abs(a - b) < 1e-12).all())
+    assert (np.abs(a - b) < 1e-12).all()
 
 
 def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):