Skip to content

Commit 66b4a01

Browse files
committed
ENH: add Series & DataFrame .agg/.aggregate to provide convienent
function application that mimics the groupby(..).agg/.aggregate interface .apply is now a synonym for .agg, and will accept dict/list-likes for aggregations CLN: rename .name attr -> ._selection_name from SeriesGroupby for compat (didn't exist on DataFrameGroupBy) resolves conflicts w.r.t. setting .name on a groupby object closes #1623 closes #14464 custom .describe closes #14483 closes #7014
1 parent 3f523f3 commit 66b4a01

File tree

9 files changed

+538
-45
lines changed

9 files changed

+538
-45
lines changed

pandas/core/base.py

+57-13
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,9 @@ class SelectionMixin(object):
289289
}
290290

291291
@property
292-
def name(self):
292+
def _selection_name(self):
293+
""" return a name for myself; this would ideally be the 'name' property, but
294+
we cannot conflict with the Series.name property which can be set """
293295
if self._selection is None:
294296
return None # 'result'
295297
else:
@@ -404,6 +406,26 @@ def aggregate(self, func, *args, **kwargs):
404406

405407
agg = aggregate
406408

409+
def _try_aggregate_string_function(self, arg, *args, **kwargs):
410+
"""
411+
if arg is a string, then try to operate on it:
412+
- try to find a function on ourselves
413+
- try to find a numpy function
414+
- raise
415+
416+
"""
417+
assert isinstance(arg, compat.string_types)
418+
419+
f = getattr(self, arg, None)
420+
if f is not None:
421+
return f(*args, **kwargs)
422+
423+
f = getattr(np, arg, None)
424+
if f is not None:
425+
return f(self, *args, **kwargs)
426+
427+
raise ValueError("{} is an unknown string function".format(arg))
428+
407429
def _aggregate(self, arg, *args, **kwargs):
408430
"""
409431
provide an implementation for the aggregators
@@ -427,14 +449,19 @@ def _aggregate(self, arg, *args, **kwargs):
427449
is_aggregator = lambda x: isinstance(x, (list, tuple, dict))
428450
is_nested_renamer = False
429451

452+
_axis = kwargs.pop('_axis', None)
453+
if _axis is None:
454+
_axis = getattr(self, 'axis', 0)
430455
_level = kwargs.pop('_level', None)
456+
431457
if isinstance(arg, compat.string_types):
432-
return getattr(self, arg)(*args, **kwargs), None
458+
return self._try_aggregate_string_function(arg, *args,
459+
**kwargs), None
433460

434461
if isinstance(arg, dict):
435462

436463
# aggregate based on the passed dict
437-
if self.axis != 0: # pragma: no cover
464+
if _axis != 0: # pragma: no cover
438465
raise ValueError('Can only pass dict with axis=0')
439466

440467
obj = self._selected_obj
@@ -560,26 +587,33 @@ def _agg(arg, func):
560587
ABCDataFrame):
561588
result = concat([result[k] for k in keys], keys=keys, axis=1)
562589
else:
563-
from pandas import DataFrame
564-
result = DataFrame(result)
590+
from pandas import DataFrame, Series
591+
try:
592+
result = DataFrame(result)
593+
except ValueError:
594+
# we have a dict of scalars
595+
result = Series(result, name=self.name)
565596

566597
return result, True
567-
elif hasattr(arg, '__iter__'):
568-
return self._aggregate_multiple_funcs(arg, _level=_level), None
598+
elif is_list_like(arg) and arg not in compat.string_types:
599+
# we require a list, but not an 'str'
600+
return self._aggregate_multiple_funcs(arg,
601+
_level=_level,
602+
_axis=_axis), None
569603
else:
570604
result = None
571605

572-
cy_func = self._is_cython_func(arg)
573-
if cy_func and not args and not kwargs:
574-
return getattr(self, cy_func)(), None
606+
f = self._is_cython_func(arg)
607+
if f and not args and not kwargs:
608+
return getattr(self, f)(), None
575609

576610
# caller can react
577611
return result, True
578612

579-
def _aggregate_multiple_funcs(self, arg, _level):
613+
def _aggregate_multiple_funcs(self, arg, _level, _axis):
580614
from pandas.tools.merge import concat
581615

582-
if self.axis != 0:
616+
if _axis != 0:
583617
raise NotImplementedError("axis other than 0 is not supported")
584618

585619
if self._selected_obj.ndim == 1:
@@ -617,7 +651,17 @@ def _aggregate_multiple_funcs(self, arg, _level):
617651
except SpecificationError:
618652
raise
619653

620-
return concat(results, keys=keys, axis=1)
654+
try:
655+
return concat(results, keys=keys, axis=1)
656+
except TypeError:
657+
# shape change
658+
from pandas.types.cast import _is_nested_object
659+
from pandas import Series
660+
result = Series(results, index=keys, name=self.name)
661+
if _is_nested_object(result):
662+
raise ValueError("cannot combine transform and "
663+
"aggregation operations")
664+
return result
621665

622666
def _shallow_copy(self, obj=None, obj_type=None, **kwargs):
623667
""" return a new object with the replacement attributes """

pandas/core/frame.py

+43-4
Original file line numberDiff line numberDiff line change
@@ -4073,6 +4073,39 @@ def diff(self, periods=1, axis=0):
40734073
# ----------------------------------------------------------------------
40744074
# Function application
40754075

4076+
def _gotitem(self, key, ndim, subset=None):
4077+
"""
4078+
sub-classes to define
4079+
return a sliced object
4080+
4081+
Parameters
4082+
----------
4083+
key : string / list of selections
4084+
ndim : 1,2
4085+
requested ndim of result
4086+
subset : object, default None
4087+
subset to act on
4088+
"""
4089+
if subset is None:
4090+
subset = self
4091+
4092+
# TODO: _shallow_copy(subset)?
4093+
return self[key]
4094+
4095+
@Appender(_shared_docs['aggregate'] % _shared_doc_kwargs)
4096+
def aggregate(self, func, axis=0, *args, **kwargs):
4097+
axis = self._get_axis_number(axis)
4098+
4099+
# TODO: flipped axis
4100+
result = None
4101+
if axis == 0:
4102+
result, how = self._aggregate(func, axis=0, *args, **kwargs)
4103+
if result is None:
4104+
return self.apply(func, axis=axis, args=args, **kwargs)
4105+
return result
4106+
4107+
agg = aggregate
4108+
40764109
def apply(self, func, axis=0, broadcast=False, raw=False, reduce=None,
40774110
args=(), **kwds):
40784111
"""
@@ -4128,22 +4161,28 @@ def apply(self, func, axis=0, broadcast=False, raw=False, reduce=None,
41284161
See also
41294162
--------
41304163
DataFrame.applymap: For elementwise operations
4164+
DataFrame.agg: only perform aggregating type operations
4165+
DataFrame.transform: only perform transformating type operations
41314166
41324167
Returns
41334168
-------
41344169
applied : Series or DataFrame
41354170
"""
41364171
axis = self._get_axis_number(axis)
4137-
if kwds or args and not isinstance(func, np.ufunc):
41384172

4173+
# dispatch to agg
4174+
if axis == 0 and isinstance(func, (list, dict)):
4175+
return self.aggregate(func, axis=axis, *args, **kwds)
4176+
4177+
if len(self.columns) == 0 and len(self.index) == 0:
4178+
return self._apply_empty_result(func, axis, reduce, *args, **kwds)
4179+
4180+
if kwds or args and not isinstance(func, np.ufunc):
41394181
def f(x):
41404182
return func(x, *args, **kwds)
41414183
else:
41424184
f = func
41434185

4144-
if len(self.columns) == 0 and len(self.index) == 0:
4145-
return self._apply_empty_result(func, axis, reduce, *args, **kwds)
4146-
41474186
if isinstance(f, np.ufunc):
41484187
with np.errstate(all='ignore'):
41494188
results = f(self.values)

pandas/core/generic.py

+78-2
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
SettingWithCopyError, SettingWithCopyWarning,
3434
AbstractMethodError)
3535

36-
from pandas.core.base import PandasObject
36+
from pandas.core.base import PandasObject, SelectionMixin
3737
from pandas.core.index import (Index, MultiIndex, _ensure_index,
3838
InvalidIndexError)
3939
import pandas.core.indexing as indexing
@@ -91,7 +91,7 @@ def _single_replace(self, to_replace, method, inplace, limit):
9191
return result
9292

9393

94-
class NDFrame(PandasObject):
94+
class NDFrame(PandasObject, SelectionMixin):
9595
"""
9696
N-dimensional analogue of DataFrame. Store multi-dimensional in a
9797
size-mutable, labeled data structure
@@ -428,6 +428,16 @@ def size(self):
428428
"""number of elements in the NDFrame"""
429429
return np.prod(self.shape)
430430

431+
@property
432+
def _selected_obj(self):
433+
""" internal compat with SelectionMixin """
434+
return self
435+
436+
@property
437+
def _obj_with_exclusions(self):
438+
""" internal compat with SelectionMixin """
439+
return self
440+
431441
def _expand_axes(self, key):
432442
new_axes = []
433443
for k, ax in zip(key, self.axes):
@@ -2707,6 +2717,61 @@ def pipe(self, func, *args, **kwargs):
27072717
else:
27082718
return func(self, *args, **kwargs)
27092719

2720+
_shared_docs['aggregate'] = ("""
2721+
Aggregate using input function or dict of {column ->
2722+
function}
2723+
2724+
.. versionadded:: 0.19.2
2725+
2726+
Parameters
2727+
----------
2728+
arg : function or dict
2729+
Function to use for aggregating groups. If a function, must either
2730+
work when passed a DataFrame or when passed to DataFrame.apply. If
2731+
passed a dict, the keys must be DataFrame column names.
2732+
2733+
Accepted Combinations are:
2734+
- string cythonized function name
2735+
- function
2736+
- list of functions
2737+
- dict of columns -> functions
2738+
- nested dict of names -> dicts of functions
2739+
2740+
Notes
2741+
-----
2742+
Numpy functions mean/median/prod/sum/std/var are special cased so the
2743+
default behavior is applying the function along axis=0
2744+
(e.g., np.mean(arr_2d, axis=0)) as opposed to
2745+
mimicking the default Numpy behavior (e.g., np.mean(arr_2d)).
2746+
2747+
Returns
2748+
-------
2749+
aggregated : %(klass)s
2750+
2751+
See also
2752+
--------
2753+
""")
2754+
2755+
_shared_docs['transform'] = ("""
2756+
Call function producing a like-indexed %(klass)s
2757+
and return a %(klass)s with the transformed values`
2758+
2759+
.. versionadded:: 0.19.2
2760+
2761+
Parameters
2762+
----------
2763+
func : function, callable or string
2764+
To apply to column
2765+
2766+
Examples
2767+
--------
2768+
>>> grouped.transform(lambda x: (x - x.mean()) / x.std())
2769+
2770+
Returns
2771+
-------
2772+
transformed : %(klass)s
2773+
""")
2774+
27102775
# ----------------------------------------------------------------------
27112776
# Attribute access
27122777

@@ -5517,6 +5582,17 @@ def ewm(self, com=None, span=None, halflife=None, alpha=None,
55175582

55185583
cls.ewm = ewm
55195584

5585+
@Appender(_shared_docs['transform'] % _shared_doc_kwargs)
5586+
def transform(self, func, *args, **kwargs):
5587+
result = self.agg(func, *args, **kwargs)
5588+
if is_scalar(result) or len(result) != len(self):
5589+
raise ValueError("transforms cannot produce"
5590+
"aggregated results")
5591+
5592+
return result
5593+
5594+
cls.transform = transform
5595+
55205596

55215597
def _doc_parms(cls):
55225598
"""Return a tuple of the doc parms."""

0 commit comments

Comments
 (0)