Skip to content

Commit 03ac0bf

Browse files
committed
Merge pull request #4887 from cpcloud/groupby-blacklist
API: disable to_csv and friends on GroupBy objects
2 parents 8a9a4f2 + b709389 commit 03ac0bf

File tree

3 files changed

+56
-3
lines changed

3 files changed

+56
-3
lines changed

doc/source/release.rst

+2
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,8 @@ API Changes
243243
- Remove deprecated ``Factor`` (:issue:`3650`)
244244
- Remove deprecated ``set_printoptions/reset_printoptions`` (:issue:``3046``)
245245
- Remove deprecated ``_verbose_info`` (:issue:`3215`)
246+
- Begin removing methods that don't make sense on ``GroupBy`` objects
247+
(:issue:`4887`).
246248

247249
Internal Refactoring
248250
~~~~~~~~~~~~~~~~~~~~

pandas/core/groupby.py

+19-1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,16 @@
4545
"""
4646

4747

48+
_apply_whitelist = frozenset(['last', 'first',
49+
'mean', 'sum', 'min', 'max',
50+
'head', 'tail',
51+
'cumsum', 'cumprod', 'cummin', 'cummax',
52+
'resample',
53+
'describe',
54+
'rank', 'quantile', 'count',
55+
'fillna', 'dtype'])
56+
57+
4858
class GroupByError(Exception):
4959
pass
5060

@@ -241,13 +251,21 @@ def __getattr__(self, attr):
241251
if hasattr(self.obj, attr) and attr != '_cache':
242252
return self._make_wrapper(attr)
243253

244-
raise AttributeError("'%s' object has no attribute '%s'" %
254+
raise AttributeError("%r object has no attribute %r" %
245255
(type(self).__name__, attr))
246256

247257
def __getitem__(self, key):
248258
raise NotImplementedError
249259

250260
def _make_wrapper(self, name):
261+
if name not in _apply_whitelist:
262+
is_callable = callable(getattr(self.obj, name, None))
263+
kind = ' callable ' if is_callable else ' '
264+
msg = ("Cannot access{0}attribute {1!r} of {2!r} objects, try "
265+
"using the 'apply' method".format(kind, name,
266+
type(self).__name__))
267+
raise AttributeError(msg)
268+
251269
f = getattr(self.obj, name)
252270
if not isinstance(f, types.MethodType):
253271
return self.apply(lambda self: getattr(self, name))

pandas/tests/test_groupby.py

+35-2
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
from pandas.core.index import Index, MultiIndex
1010
from pandas.core.common import rands
1111
from pandas.core.api import Categorical, DataFrame
12-
from pandas.core.groupby import GroupByError, SpecificationError, DataError
12+
from pandas.core.groupby import (GroupByError, SpecificationError, DataError,
13+
_apply_whitelist)
1314
from pandas.core.series import Series
1415
from pandas.util.testing import (assert_panel_equal, assert_frame_equal,
1516
assert_series_equal, assert_almost_equal,
@@ -2696,8 +2697,40 @@ def test_filter_against_workaround(self):
26962697
new_way = grouped.filter(lambda x: x['ints'].mean() > N/20)
26972698
assert_frame_equal(new_way.sort_index(), old_way.sort_index())
26982699

2700+
def test_groupby_whitelist(self):
2701+
from string import ascii_lowercase
2702+
letters = np.array(list(ascii_lowercase))
2703+
N = 10
2704+
random_letters = letters.take(np.random.randint(0, 26, N))
2705+
df = DataFrame({'floats': N / 10 * Series(np.random.random(N)),
2706+
'letters': Series(random_letters)})
2707+
s = df.floats
2708+
2709+
blacklist = ['eval', 'query', 'abs', 'shift', 'tshift', 'where',
2710+
'mask', 'align', 'groupby', 'clip', 'astype',
2711+
'at', 'combine', 'consolidate', 'convert_objects',
2712+
'corr', 'corr_with', 'cov']
2713+
to_methods = [method for method in dir(df) if method.startswith('to_')]
2714+
2715+
blacklist.extend(to_methods)
2716+
2717+
# e.g., to_csv
2718+
defined_but_not_allowed = ("(?:^Cannot.+{0!r}.+{1!r}.+try using the "
2719+
"'apply' method$)")
2720+
2721+
# e.g., query, eval
2722+
not_defined = "(?:^{1!r} object has no attribute {0!r}$)"
2723+
fmt = defined_but_not_allowed + '|' + not_defined
2724+
for bl in blacklist:
2725+
for obj in (df, s):
2726+
gb = obj.groupby(df.letters)
2727+
msg = fmt.format(bl, type(gb).__name__)
2728+
with tm.assertRaisesRegexp(AttributeError, msg):
2729+
getattr(gb, bl)
2730+
2731+
26992732
def assert_fp_equal(a, b):
2700-
assert((np.abs(a - b) < 1e-12).all())
2733+
assert (np.abs(a - b) < 1e-12).all()
27012734

27022735

27032736
def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):

0 commit comments

Comments
 (0)