From ead477b7c0588399aa1656feae892115472871f2 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 5 Aug 2017 21:20:15 -0700 Subject: [PATCH 1/6] Define boxplot inside DataFrameGroupBy definition instead of pinning it on right after the definition; move the import of boxplot_frame_groupby to the top of the module. Start getting rid of the exec(foo) statements. In particular, the ones that define properties. Still need to figure out the ones defining methods. --- pandas/core/groupby.py | 66 +++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 24 deletions(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index a388892e925b6..888c88e910782 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1,5 +1,5 @@ import types -from functools import wraps +from functools import wraps, partial import numpy as np import datetime import collections @@ -63,6 +63,8 @@ import pandas.core.common as com from pandas.core.config import option_context +from pandas.plotting._core import boxplot_frame_groupby + from pandas._libs import lib, groupby as libgroupby, Timestamp, NaT, iNaT from pandas._libs.lib import count_level_2d @@ -950,7 +952,6 @@ def _apply_filter(self, indices, dropna): class GroupBy(_GroupBy): - """ Class for grouping and aggregating relational data. See aggregate, transform, and apply functions on this object. @@ -2742,6 +2743,17 @@ def _convert_grouper(axis, grouper): return grouper +def pin_method(cls, target_cls, name): + items = list(_whitelist_method_generator(target_cls, [name])) + assert len(items) <= 1, items + if items: + func = items[0] + if not isinstance(func, str): + # Note: checking `if callable(func)` fails here on `dtype`. + setattr(cls, name, func) + return + + def _whitelist_method_generator(klass, whitelist): """ Yields all GroupBy member defs for DataFrame/Series names in _whitelist. @@ -2762,20 +2774,12 @@ def _whitelist_method_generator(klass, whitelist): base class, any such name is skipped. """ - method_wrapper_template = \ - """def %(name)s(%(sig)s) : + method_wrapper_template = """def %(name)s(%(sig)s): \""" %(doc)s \""" f = %(self)s.__getattr__('%(name)s') return f(%(args)s)""" - property_wrapper_template = \ - """@property -def %(name)s(self) : - \""" - %(doc)s - \""" - return self.__getattr__('%(name)s')""" for name in whitelist: # don't override anything that was explicitly defined # in the base class @@ -2794,22 +2798,31 @@ def %(name)s(self) : args_by_name = ['{0}={0}'.format(arg) for arg in args[1:]] params = {'name': name, 'doc': doc, - 'sig': ','.join(decl), + 'sig': ', '.join(decl), 'self': args[0], - 'args': ','.join(args_by_name)} + 'args': ', '.join(args_by_name)} + + yield wrapper_template % params + else: - wrapper_template = property_wrapper_template - params = {'name': name, 'doc': doc} - yield wrapper_template % params + + def getter(self): + return self.__getattr__(name) + + getter.__name__ = name + getter.__doc__ = doc + + prop = property(getter, doc=doc) + yield prop class SeriesGroupBy(GroupBy): # # Make class defs of attributes on SeriesGroupBy whitelist _apply_whitelist = _series_apply_whitelist - for _def_str in _whitelist_method_generator(Series, - _series_apply_whitelist): - exec(_def_str) + for _def_str in _whitelist_method_generator(Series, _apply_whitelist): + if isinstance(_def_str, str): + exec(_def_str) @property def _selection_name(self): @@ -3189,7 +3202,6 @@ def describe(self, **kwargs): def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True): - from functools import partial from pandas.core.reshape.tile import cut from pandas.core.reshape.merge import _get_join_indexers @@ -3330,6 +3342,10 @@ def _apply_to_column_groupbys(self, func): return func(self) +for name in SeriesGroupBy._apply_whitelist: + pin_method(SeriesGroupBy, Series, name) + + class NDFrameGroupBy(GroupBy): def _iterate_slices(self): @@ -3967,7 +3983,8 @@ class DataFrameGroupBy(NDFrameGroupBy): # # Make class defs of attributes on DataFrameGroupBy whitelist. for _def_str in _whitelist_method_generator(DataFrame, _apply_whitelist): - exec(_def_str) + if isinstance(_def_str, str): + exec(_def_str) _block_agg_axis = 1 @@ -4203,7 +4220,6 @@ def _apply_to_column_groupbys(self, func): def count(self): """ Compute count of group, excluding missing values """ - from functools import partial from pandas.core.dtypes.missing import _isna_ndarraylike as isna data, _ = self._get_data_to_aggregate() @@ -4283,9 +4299,11 @@ def groupby_series(obj, col=None): results.index = _default_index(len(results)) return results + boxplot = boxplot_frame_groupby + -from pandas.plotting._core import boxplot_frame_groupby # noqa -DataFrameGroupBy.boxplot = boxplot_frame_groupby +for name in DataFrameGroupBy._apply_whitelist: + pin_method(DataFrameGroupBy, DataFrame, name) class PanelGroupBy(NDFrameGroupBy): From 1d59eff63c7bbb01de9d628f28b476d965831be4 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 10 Aug 2017 11:09:52 -0700 Subject: [PATCH 2/6] Dont let pin_method over-write manually-defined methods/attributes --- pandas/core/groupby.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 888c88e910782..2b9c58887b3a6 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2744,13 +2744,15 @@ def _convert_grouper(axis, grouper): def pin_method(cls, target_cls, name): - items = list(_whitelist_method_generator(target_cls, [name])) - assert len(items) <= 1, items - if items: - func = items[0] - if not isinstance(func, str): - # Note: checking `if callable(func)` fails here on `dtype`. - setattr(cls, name, func) + if not hasattr(cls, name): + # Avoid over-writing `DataFrameGroupBy.boxplot` + items = list(_whitelist_method_generator(target_cls, [name])) + assert len(items) <= 1, items + if items: + func = items[0] + if not isinstance(func, str): + # Note: checking `if callable(func)` fails here on `dtype`. + setattr(cls, name, func) return @@ -4306,6 +4308,7 @@ def groupby_series(obj, col=None): pin_method(DataFrameGroupBy, DataFrame, name) + class PanelGroupBy(NDFrameGroupBy): def aggregate(self, arg, *args, **kwargs): From 3627aaf976aeffa66f7f6e95b064062522920acd Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 10 Aug 2017 15:53:44 -0700 Subject: [PATCH 3/6] flake8 fix whitespace --- pandas/core/groupby.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 2b9c58887b3a6..209601b78f30e 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -4308,7 +4308,6 @@ def groupby_series(obj, col=None): pin_method(DataFrameGroupBy, DataFrame, name) - class PanelGroupBy(NDFrameGroupBy): def aggregate(self, arg, *args, **kwargs): From b639c4da9f64cde67ea0abeaa3c1150e40ac7a18 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 11 Aug 2017 14:37:06 -0700 Subject: [PATCH 4/6] Explicitly write out methods instead of generating them with exec --- pandas/core/groupby.py | 321 +++++++++++++++++++++++++++++------------ 1 file changed, 226 insertions(+), 95 deletions(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 209601b78f30e..2ae5a45f368d8 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2554,8 +2554,7 @@ def groups(self): self.group_index)) -def _get_grouper(obj, key=None, axis=0, level=None, sort=True, - mutated=False): +def _get_grouper(obj, key=None, axis=0, level=None, sort=True, mutated=False): """ create and return a BaseGrouper, which is an internal mapping of how to create the grouper indexers. @@ -2743,88 +2742,124 @@ def _convert_grouper(axis, grouper): return grouper -def pin_method(cls, target_cls, name): - if not hasattr(cls, name): - # Avoid over-writing `DataFrameGroupBy.boxplot` - items = list(_whitelist_method_generator(target_cls, [name])) - assert len(items) <= 1, items - if items: - func = items[0] - if not isinstance(func, str): - # Note: checking `if callable(func)` fails here on `dtype`. - setattr(cls, name, func) - return - - -def _whitelist_method_generator(klass, whitelist): - """ - Yields all GroupBy member defs for DataFrame/Series names in _whitelist. - - Parameters - ---------- - klass - class where members are defined. Should be Series or DataFrame - - whitelist - list of names of klass methods to be constructed - - Returns - ------- - The generator yields a sequence of strings, each suitable for exec'ing, - that define implementations of the named methods for DataFrameGroupBy - or SeriesGroupBy. - - Since we don't want to override methods explicitly defined in the - base class, any such name is skipped. - """ - - method_wrapper_template = """def %(name)s(%(sig)s): - \""" - %(doc)s - \""" - f = %(self)s.__getattr__('%(name)s') - return f(%(args)s)""" - for name in whitelist: - # don't override anything that was explicitly defined - # in the base class - if hasattr(GroupBy, name): - continue - # ugly, but we need the name string itself in the method. - f = getattr(klass, name) - doc = f.__doc__ - doc = doc if type(doc) == str else '' - if isinstance(f, types.MethodType): - wrapper_template = method_wrapper_template - decl, args = make_signature(f) - # pass args by name to f because otherwise - # GroupBy._make_wrapper won't know whether - # we passed in an axis parameter. - args_by_name = ['{0}={0}'.format(arg) for arg in args[1:]] - params = {'name': name, - 'doc': doc, - 'sig': ', '.join(decl), - 'self': args[0], - 'args': ', '.join(args_by_name)} - - yield wrapper_template % params - - else: - - def getter(self): - return self.__getattr__(name) - - getter.__name__ = name - getter.__doc__ = doc - - prop = property(getter, doc=doc) - yield prop - - +# TODO: *args/**kwargs get ignored in many of the methods +# inherited from Series/DataFrame. Should they be used, dropped ...? class SeriesGroupBy(GroupBy): - # - # Make class defs of attributes on SeriesGroupBy whitelist _apply_whitelist = _series_apply_whitelist - for _def_str in _whitelist_method_generator(Series, _apply_whitelist): - if isinstance(_def_str, str): - exec(_def_str) + + @Appender(Series.all.__doc__) + def all(self, axis=None, bool_only=None, skipna=None, level=None, + **kwargs): + f = self.__getattr__('all') + return f(axis=axis, bool_only=bool_only, skipna=skipna, level=level) + + @property + def dtype(self): + """ + return the dtype object of the underlying data + """ + return self.__getattr__('dtype') + + @Appender(Series.idxmax.__doc__) + def idxmax(self, axis=None, skipna=True, *args, **kwargs): + f = self.__getattr__('idxmax') + return f(axis=axis, skipna=skipna) + + @Appender(Series.rank.__doc__) + def rank(self, axis=0, method='average', numeric_only=None, + na_option='keep', ascending=True, pct=False): + f = self.__getattr__('rank') + return f(axis=axis, method=method, numeric_only=numeric_only, + na_option=na_option, ascending=ascending, pct=pct) + + @Appender(Series.diff.__doc__) + def diff(self, periods=1): + f = self.__getattr__('diff') + return f(periods=periods) + + @Appender(Series.any.__doc__) + def any(self, axis=None, bool_only=None, skipna=None, level=None, + **kwargs): + f = self.__getattr__('any') + return f(axis=axis, bool_only=bool_only, skipna=skipna, level=level) + + @Appender(Series.nsmallest.__doc__) + def nsmallest(self, n=5, keep='first'): + f = self.__getattr__('nsmallest') + return f(n=n, keep=keep) + + @Appender(Series.quantile.__doc__) + def quantile(self, q=0.5, interpolation='linear'): + f = self.__getattr__('quantile') + return f(q=q, interpolation=interpolation) + + @Appender(Series.hist.__doc__) + def hist(self, by=None, ax=None, grid=True, xlabelsize=None, xrot=None, + ylabelsize=None, yrot=None, figsize=None, bins=10, **kwds): + f = self.__getattr__('hist') + return f(by=by, ax=ax, grid=grid, xlabelsize=xlabelsize, xrot=xrot, + ylabelsize=ylabelsize, yrot=yrot, figsize=figsize, bins=bins) + + @Appender(Series.take.__doc__) + def take(self, indices, axis=0, convert=True, is_copy=False, **kwargs): + f = self.__getattr__('take') + return f(indices=indices, axis=axis, convert=convert, is_copy=is_copy) + + @Appender(Series.mad.__doc__) + def mad(self, axis=None, skipna=None, level=None): + f = self.__getattr__('mad') + return f(axis=axis, skipna=skipna, level=level) + + @Appender(Series.corr.__doc__) + def corr(self, other, method='pearson', min_periods=None): + f = self.__getattr__('corr') + return f(other=other, method=method, min_periods=min_periods) + + @Appender(Series.fillna.__doc__) + def fillna(self, value=None, method=None, axis=None, inplace=False, + limit=None, downcast=None, **kwargs): + f = self.__getattr__('fillna') + return f(value=value, method=method, axis=axis, inplace=inplace, + limit=limit, downcast=downcast) + + @Appender(Series.unique.__doc__) + def unique(self): + f = self.__getattr__('unique') + return f() + + @Appender(Series.idxmin.__doc__) + def idxmin(self, axis=None, skipna=True, *args, **kwargs): + f = self.__getattr__('idxmin') + return f(axis=axis, skipna=skipna) + + @Appender(Series.cov.__doc__) + def cov(self, other, min_periods=None): + f = self.__getattr__('cov') + return f(other=other, min_periods=min_periods) + + @Appender(Series.tshift.__doc__) + def tshift(self, periods=1, freq=None, axis=0): + f = self.__getattr__('tshift') + return f(periods=periods, freq=freq,axis=axis) + + @Appender(Series.pct_change.__doc__) + def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, + **kwargs): + f = self.__getattr__('pct_change') + return f(periods=periods, fill_method=fill_method, limit=limit, + freq=freq) + + @Appender(Series.skew.__doc__) + def skew(self, axis=None, skipna=None, level=None, numeric_only=None, + **kwargs): + f = self.__getattr__('skew') + return f(axis=axis, skipna=skipna, level=level, + numeric_only=numeric_only) + + @Appender(Series.nlargest.__doc__) + def nlargest(self, n=5, keep='first'): + f = self.__getattr__('nlargest') + return f(n=n, keep=keep) @property def _selection_name(self): @@ -3344,9 +3379,6 @@ def _apply_to_column_groupbys(self, func): return func(self) -for name in SeriesGroupBy._apply_whitelist: - pin_method(SeriesGroupBy, Series, name) - class NDFrameGroupBy(GroupBy): @@ -3982,14 +4014,117 @@ def filter(self, func, dropna=True, *args, **kwargs): # noqa class DataFrameGroupBy(NDFrameGroupBy): _apply_whitelist = _dataframe_apply_whitelist - # - # Make class defs of attributes on DataFrameGroupBy whitelist. - for _def_str in _whitelist_method_generator(DataFrame, _apply_whitelist): - if isinstance(_def_str, str): - exec(_def_str) - _block_agg_axis = 1 + @Appender(DataFrame.all.__doc__) + def all(self, axis=None, bool_only=None, skipna=None, level=None, + **kwargs): + f = self.__getattr__('all') + return f(axis=axis, bool_only=bool_only, skipna=skipna, level=level) + + @Appender(DataFrame.idxmax.__doc__) + def idxmax(self, axis=0, skipna=True): + f = self.__getattr__('idxmax') + return f(axis=axis, skipna=skipna) + + @Appender(DataFrame.rank.__doc__) + def rank(self, axis=0, method='average', numeric_only=None, + na_option='keep', ascending=True, pct=False): + f = self.__getattr__('rank') + return f(axis=axis, method=method, numeric_only=numeric_only, + na_option=na_option, ascending=ascending, pct=pct) + + @Appender(DataFrame.diff.__doc__) + def diff(self, periods=1, axis=0): + f = self.__getattr__('diff') + return f(periods=periods, axis=axis) + + @Appender(DataFrame.any.__doc__) + def any(self, axis=None, bool_only=None, skipna=None, level=None, + **kwargs): + f = self.__getattr__('any') + return f(axis=axis, bool_only=bool_only, skipna=skipna, level=level) + + @Appender(DataFrame.quantile.__doc__) + def quantile(self, q=0.5, axis=0, numeric_only=True, + interpolation='linear'): + f = self.__getattr__('quantile') + return f(q=q, axis=axis, numeric_only=numeric_only, + interpolation=interpolation) + + @Appender(DataFrame.hist.__doc__) + def hist(data, column=None, by=None, grid=True, xlabelsize=None, xrot=None, + ylabelsize=None, yrot=None, ax=None, sharex=False, sharey=False, + figsize=None, layout=None, bins=10, **kwds): + f = data.__getattr__('hist') + return f(column=column, by=by, grid=grid, xlabelsize=xlabelsize, + xrot=xrot, ylabelsize=ylabelsize, yrot=yrot, ax=ax, + sharex=sharex, sharey=sharey, figsize=figsize, layout=layout, + bins=bins) + + @Appender(DataFrame.take.__doc__) + def take(self, indices, axis=0, convert=True, is_copy=True, **kwargs): + f = self.__getattr__('take') + return f(indices=indices, axis=axis, convert=convert, is_copy=is_copy) + + @property + def dtypes(self): + """ + Return the dtypes in this object. + """ + return self.__getattr__('dtypes') + + @Appender(DataFrame.mad.__doc__) + def mad(self, axis=None, skipna=None, level=None): + f = self.__getattr__('mad') + return f(axis=axis, skipna=skipna, level=level) + + @Appender(DataFrame.corr.__doc__) + def corr(self, method='pearson', min_periods=1): + f = self.__getattr__('corr') + return f(method=method, min_periods=min_periods) + + @Appender(DataFrame.fillna.__doc__) + def fillna(self, value=None, method=None, axis=None, inplace=False, + limit=None, downcast=None, **kwargs): + f = self.__getattr__('fillna') + return f(value=value, method=method, axis=axis, inplace=inplace, + limit=limit, downcast=downcast) + + @Appender(DataFrame.idxmin.__doc__) + def idxmin(self, axis=0, skipna=True): + f = self.__getattr__('idxmin') + return f(axis=axis, skipna=skipna) + + @Appender(DataFrame.cov.__doc__) + def cov(self, min_periods=None): + f = self.__getattr__('cov') + return f(min_periods=min_periods) + + @Appender(DataFrame.tshift.__doc__) + def tshift(self, periods=1, freq=None, axis=0): + f = self.__getattr__('tshift') + return f(periods=periods, freq=freq, axis=axis) + + @Appender(DataFrame.pct_change.__doc__) + def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, + **kwargs): + f = self.__getattr__('pct_change') + return f(periods=periods, fill_method=fill_method, limit=limit, + freq=freq) + + @Appender(DataFrame.skew.__doc__) + def skew(self, axis=None, skipna=None, level=None, numeric_only=None, + **kwargs): + f = self.__getattr__('skew') + return f(axis=axis, skipna=skipna, level=level, + numeric_only=numeric_only) + + @Appender(DataFrame.corrwith.__doc__) + def corrwith(self, other, axis=0, drop=False): + f = self.__getattr__('corrwith') + return f(other=other, axis=axis, drop=drop) + _agg_doc = dedent(""" Examples -------- @@ -4304,10 +4439,6 @@ def groupby_series(obj, col=None): boxplot = boxplot_frame_groupby -for name in DataFrameGroupBy._apply_whitelist: - pin_method(DataFrameGroupBy, DataFrame, name) - - class PanelGroupBy(NDFrameGroupBy): def aggregate(self, arg, *args, **kwargs): From 58e00a7157614c87c63daf1abe02d5ac6478d38b Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 11 Aug 2017 15:02:52 -0700 Subject: [PATCH 5/6] Flake8 fix missing whitespace --- pandas/core/groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 2ae5a45f368d8..d21950223d38b 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2840,7 +2840,7 @@ def cov(self, other, min_periods=None): @Appender(Series.tshift.__doc__) def tshift(self, periods=1, freq=None, axis=0): f = self.__getattr__('tshift') - return f(periods=periods, freq=freq,axis=axis) + return f(periods=periods, freq=freq, axis=axis) @Appender(Series.pct_change.__doc__) def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, From c3c0e0a6e59f5d919351a86999287434b34cbe4c Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 11 Aug 2017 18:17:38 -0700 Subject: [PATCH 6/6] Flake8 fix whitespace and unused import --- pandas/core/groupby.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index d21950223d38b..1fb5b73d10c3c 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -54,8 +54,7 @@ from pandas.core.sorting import (get_group_index_sorter, get_group_index, compress_group_index, get_flattened_iterator, decons_obs_group_ids, get_indexer_dict) -from pandas.util._decorators import (cache_readonly, Substitution, - Appender, make_signature) +from pandas.util._decorators import cache_readonly, Substitution, Appender from pandas.io.formats.printing import pprint_thing from pandas.util._validators import validate_kwargs @@ -3379,7 +3378,6 @@ def _apply_to_column_groupbys(self, func): return func(self) - class NDFrameGroupBy(GroupBy): def _iterate_slices(self):