Skip to content

Commit 31fb39a

Browse files
authored
CLN: reorg groupby to multiple modules (pandas-dev#21820)
1 parent 5d0daa0 commit 31fb39a

18 files changed

+3482
-3328
lines changed

pandas/core/api.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from pandas.core.algorithms import factorize, unique, value_counts
88
from pandas.core.dtypes.missing import isna, isnull, notna, notnull
99
from pandas.core.arrays import Categorical
10-
from pandas.core.groupby.groupby import Grouper
10+
from pandas.core.groupby import Grouper
1111
from pandas.io.formats.format import set_eng_float_format
1212
from pandas.core.index import (Index, CategoricalIndex, Int64Index,
1313
UInt64Index, RangeIndex, Float64Index,

pandas/core/base.py

-47
Original file line numberDiff line numberDiff line change
@@ -648,53 +648,6 @@ def _is_builtin_func(self, arg):
648648
return self._builtin_table.get(arg, arg)
649649

650650

651-
class GroupByMixin(object):
652-
""" provide the groupby facilities to the mixed object """
653-
654-
@staticmethod
655-
def _dispatch(name, *args, **kwargs):
656-
""" dispatch to apply """
657-
658-
def outer(self, *args, **kwargs):
659-
def f(x):
660-
x = self._shallow_copy(x, groupby=self._groupby)
661-
return getattr(x, name)(*args, **kwargs)
662-
return self._groupby.apply(f)
663-
outer.__name__ = name
664-
return outer
665-
666-
def _gotitem(self, key, ndim, subset=None):
667-
"""
668-
sub-classes to define
669-
return a sliced object
670-
671-
Parameters
672-
----------
673-
key : string / list of selections
674-
ndim : 1,2
675-
requested ndim of result
676-
subset : object, default None
677-
subset to act on
678-
"""
679-
# create a new object to prevent aliasing
680-
if subset is None:
681-
subset = self.obj
682-
683-
# we need to make a shallow copy of ourselves
684-
# with the same groupby
685-
kwargs = dict([(attr, getattr(self, attr))
686-
for attr in self._attributes])
687-
self = self.__class__(subset,
688-
groupby=self._groupby[key],
689-
parent=self,
690-
**kwargs)
691-
self._reset_cache()
692-
if subset.ndim == 2:
693-
if is_scalar(key) and key in subset or is_list_like(key):
694-
self._selection = key
695-
return self
696-
697-
698651
class IndexOpsMixin(object):
699652
""" common ops mixin to support a unified interface / docs for Series /
700653
Index

pandas/core/groupby/__init__.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# flake8: noqa
2-
from pandas.core.groupby.groupby import (
3-
Grouper, GroupBy, SeriesGroupBy, DataFrameGroupBy
4-
)
1+
from pandas.core.groupby.groupby import GroupBy # flake8: noqa
2+
from pandas.core.groupby.generic import (
3+
SeriesGroupBy, DataFrameGroupBy, PanelGroupBy) # flake8: noqa
4+
from pandas.core.groupby.grouper import Grouper # flake8: noqa

pandas/core/groupby/base.py

+160
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
"""
2+
Provide basic components for groupby. These defintiions
3+
hold the whitelist of methods that are exposed on the
4+
SeriesGroupBy and the DataFrameGroupBy objects.
5+
"""
6+
7+
import types
8+
from pandas.util._decorators import make_signature
9+
from pandas.core.dtypes.common import is_scalar, is_list_like
10+
11+
12+
class GroupByMixin(object):
13+
""" provide the groupby facilities to the mixed object """
14+
15+
@staticmethod
16+
def _dispatch(name, *args, **kwargs):
17+
""" dispatch to apply """
18+
19+
def outer(self, *args, **kwargs):
20+
def f(x):
21+
x = self._shallow_copy(x, groupby=self._groupby)
22+
return getattr(x, name)(*args, **kwargs)
23+
return self._groupby.apply(f)
24+
outer.__name__ = name
25+
return outer
26+
27+
def _gotitem(self, key, ndim, subset=None):
28+
"""
29+
sub-classes to define
30+
return a sliced object
31+
32+
Parameters
33+
----------
34+
key : string / list of selections
35+
ndim : 1,2
36+
requested ndim of result
37+
subset : object, default None
38+
subset to act on
39+
"""
40+
# create a new object to prevent aliasing
41+
if subset is None:
42+
subset = self.obj
43+
44+
# we need to make a shallow copy of ourselves
45+
# with the same groupby
46+
kwargs = dict([(attr, getattr(self, attr))
47+
for attr in self._attributes])
48+
self = self.__class__(subset,
49+
groupby=self._groupby[key],
50+
parent=self,
51+
**kwargs)
52+
self._reset_cache()
53+
if subset.ndim == 2:
54+
if is_scalar(key) and key in subset or is_list_like(key):
55+
self._selection = key
56+
return self
57+
58+
59+
# special case to prevent duplicate plots when catching exceptions when
60+
# forwarding methods from NDFrames
61+
plotting_methods = frozenset(['plot', 'boxplot', 'hist'])
62+
63+
common_apply_whitelist = frozenset([
64+
'last', 'first',
65+
'head', 'tail', 'median',
66+
'mean', 'sum', 'min', 'max',
67+
'cumcount', 'ngroup',
68+
'resample',
69+
'rank', 'quantile',
70+
'fillna',
71+
'mad',
72+
'any', 'all',
73+
'take',
74+
'idxmax', 'idxmin',
75+
'shift', 'tshift',
76+
'ffill', 'bfill',
77+
'pct_change', 'skew',
78+
'corr', 'cov', 'diff',
79+
]) | plotting_methods
80+
81+
series_apply_whitelist = ((common_apply_whitelist |
82+
{'nlargest', 'nsmallest',
83+
'is_monotonic_increasing',
84+
'is_monotonic_decreasing'}) -
85+
{'boxplot'}) | frozenset(['dtype', 'unique'])
86+
87+
dataframe_apply_whitelist = ((common_apply_whitelist |
88+
frozenset(['dtypes', 'corrwith'])) -
89+
{'boxplot'})
90+
91+
cython_transforms = frozenset(['cumprod', 'cumsum', 'shift',
92+
'cummin', 'cummax'])
93+
94+
cython_cast_blacklist = frozenset(['rank', 'count', 'size'])
95+
96+
97+
def whitelist_method_generator(base, klass, whitelist):
98+
"""
99+
Yields all GroupBy member defs for DataFrame/Series names in whitelist.
100+
101+
Parameters
102+
----------
103+
base : class
104+
base class
105+
klass : class
106+
class where members are defined.
107+
Should be Series or DataFrame
108+
whitelist : list
109+
list of names of klass methods to be constructed
110+
111+
Returns
112+
-------
113+
The generator yields a sequence of strings, each suitable for exec'ing,
114+
that define implementations of the named methods for DataFrameGroupBy
115+
or SeriesGroupBy.
116+
117+
Since we don't want to override methods explicitly defined in the
118+
base class, any such name is skipped.
119+
"""
120+
121+
method_wrapper_template = \
122+
"""def %(name)s(%(sig)s) :
123+
\"""
124+
%(doc)s
125+
\"""
126+
f = %(self)s.__getattr__('%(name)s')
127+
return f(%(args)s)"""
128+
property_wrapper_template = \
129+
"""@property
130+
def %(name)s(self) :
131+
\"""
132+
%(doc)s
133+
\"""
134+
return self.__getattr__('%(name)s')"""
135+
136+
for name in whitelist:
137+
# don't override anything that was explicitly defined
138+
# in the base class
139+
if hasattr(base, name):
140+
continue
141+
# ugly, but we need the name string itself in the method.
142+
f = getattr(klass, name)
143+
doc = f.__doc__
144+
doc = doc if type(doc) == str else ''
145+
if isinstance(f, types.MethodType):
146+
wrapper_template = method_wrapper_template
147+
decl, args = make_signature(f)
148+
# pass args by name to f because otherwise
149+
# GroupBy._make_wrapper won't know whether
150+
# we passed in an axis parameter.
151+
args_by_name = ['{0}={0}'.format(arg) for arg in args[1:]]
152+
params = {'name': name,
153+
'doc': doc,
154+
'sig': ','.join(decl),
155+
'self': args[0],
156+
'args': ','.join(args_by_name)}
157+
else:
158+
wrapper_template = property_wrapper_template
159+
params = {'name': name, 'doc': doc}
160+
yield wrapper_template % params

0 commit comments

Comments
 (0)