Skip to content

CLN: reorg groupby to multiple modules #21820

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jul 11, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/core/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from pandas.core.algorithms import factorize, unique, value_counts
from pandas.core.dtypes.missing import isna, isnull, notna, notnull
from pandas.core.arrays import Categorical
from pandas.core.groupby.groupby import Grouper
from pandas.core.groupby import Grouper
from pandas.io.formats.format import set_eng_float_format
from pandas.core.index import (Index, CategoricalIndex, Int64Index,
UInt64Index, RangeIndex, Float64Index,
Expand Down
47 changes: 0 additions & 47 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -648,53 +648,6 @@ def _is_builtin_func(self, arg):
return self._builtin_table.get(arg, arg)


class GroupByMixin(object):
""" provide the groupby facilities to the mixed object """

@staticmethod
def _dispatch(name, *args, **kwargs):
""" dispatch to apply """

def outer(self, *args, **kwargs):
def f(x):
x = self._shallow_copy(x, groupby=self._groupby)
return getattr(x, name)(*args, **kwargs)
return self._groupby.apply(f)
outer.__name__ = name
return outer

def _gotitem(self, key, ndim, subset=None):
"""
sub-classes to define
return a sliced object

Parameters
----------
key : string / list of selections
ndim : 1,2
requested ndim of result
subset : object, default None
subset to act on
"""
# create a new object to prevent aliasing
if subset is None:
subset = self.obj

# we need to make a shallow copy of ourselves
# with the same groupby
kwargs = dict([(attr, getattr(self, attr))
for attr in self._attributes])
self = self.__class__(subset,
groupby=self._groupby[key],
parent=self,
**kwargs)
self._reset_cache()
if subset.ndim == 2:
if is_scalar(key) and key in subset or is_list_like(key):
self._selection = key
return self


class IndexOpsMixin(object):
""" common ops mixin to support a unified interface / docs for Series /
Index
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/groupby/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# flake8: noqa
from pandas.core.groupby.groupby import (
Grouper, GroupBy, SeriesGroupBy, DataFrameGroupBy
)
from pandas.core.groupby.groupby import GroupBy # flake8: noqa
from pandas.core.groupby.generic import (
SeriesGroupBy, DataFrameGroupBy, PanelGroupBy) # flake8: noqa
from pandas.core.groupby.grouper import Grouper # flake8: noqa
160 changes: 160 additions & 0 deletions pandas/core/groupby/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
"""
Provide basic components for groupby. These defintiions
hold the whitelist of methods that are exposed on the
SeriesGroupBy and the DataFrameGroupBy objects.
"""

import types
from pandas.util._decorators import make_signature
from pandas.core.dtypes.common import is_scalar, is_list_like


class GroupByMixin(object):
""" provide the groupby facilities to the mixed object """

@staticmethod
def _dispatch(name, *args, **kwargs):
""" dispatch to apply """

def outer(self, *args, **kwargs):
def f(x):
x = self._shallow_copy(x, groupby=self._groupby)
return getattr(x, name)(*args, **kwargs)
return self._groupby.apply(f)
outer.__name__ = name
return outer

def _gotitem(self, key, ndim, subset=None):
"""
sub-classes to define
return a sliced object

Parameters
----------
key : string / list of selections
ndim : 1,2
requested ndim of result
subset : object, default None
subset to act on
"""
# create a new object to prevent aliasing
if subset is None:
subset = self.obj

# we need to make a shallow copy of ourselves
# with the same groupby
kwargs = dict([(attr, getattr(self, attr))
for attr in self._attributes])
self = self.__class__(subset,
groupby=self._groupby[key],
parent=self,
**kwargs)
self._reset_cache()
if subset.ndim == 2:
if is_scalar(key) and key in subset or is_list_like(key):
self._selection = key
return self


# special case to prevent duplicate plots when catching exceptions when
# forwarding methods from NDFrames
plotting_methods = frozenset(['plot', 'boxplot', 'hist'])

common_apply_whitelist = frozenset([
'last', 'first',
'head', 'tail', 'median',
'mean', 'sum', 'min', 'max',
'cumcount', 'ngroup',
'resample',
'rank', 'quantile',
'fillna',
'mad',
'any', 'all',
'take',
'idxmax', 'idxmin',
'shift', 'tshift',
'ffill', 'bfill',
'pct_change', 'skew',
'corr', 'cov', 'diff',
]) | plotting_methods

series_apply_whitelist = ((common_apply_whitelist |
{'nlargest', 'nsmallest',
'is_monotonic_increasing',
'is_monotonic_decreasing'}) -
{'boxplot'}) | frozenset(['dtype', 'unique'])

dataframe_apply_whitelist = ((common_apply_whitelist |
frozenset(['dtypes', 'corrwith'])) -
{'boxplot'})

cython_transforms = frozenset(['cumprod', 'cumsum', 'shift',
'cummin', 'cummax'])

cython_cast_blacklist = frozenset(['rank', 'count', 'size'])


def whitelist_method_generator(base, klass, whitelist):
"""
Yields all GroupBy member defs for DataFrame/Series names in whitelist.

Parameters
----------
base : class
base class
klass : class
class where members are defined.
Should be Series or DataFrame
whitelist : list
list of names of klass methods to be constructed

Returns
-------
The generator yields a sequence of strings, each suitable for exec'ing,
that define implementations of the named methods for DataFrameGroupBy
or SeriesGroupBy.

Since we don't want to override methods explicitly defined in the
base class, any such name is skipped.
"""

method_wrapper_template = \
"""def %(name)s(%(sig)s) :
\"""
%(doc)s
\"""
f = %(self)s.__getattr__('%(name)s')
return f(%(args)s)"""
property_wrapper_template = \
"""@property
def %(name)s(self) :
\"""
%(doc)s
\"""
return self.__getattr__('%(name)s')"""

for name in whitelist:
# don't override anything that was explicitly defined
# in the base class
if hasattr(base, name):
continue
# ugly, but we need the name string itself in the method.
f = getattr(klass, name)
doc = f.__doc__
doc = doc if type(doc) == str else ''
if isinstance(f, types.MethodType):
wrapper_template = method_wrapper_template
decl, args = make_signature(f)
# pass args by name to f because otherwise
# GroupBy._make_wrapper won't know whether
# we passed in an axis parameter.
args_by_name = ['{0}={0}'.format(arg) for arg in args[1:]]
params = {'name': name,
'doc': doc,
'sig': ','.join(decl),
'self': args[0],
'args': ','.join(args_by_name)}
else:
wrapper_template = property_wrapper_template
params = {'name': name, 'doc': doc}
yield wrapper_template % params
Loading