Skip to content

Commit 34816f3

Browse files
committed
DOC: update the .agg doc-string with examples
1 parent de87344 commit 34816f3

File tree

7 files changed

+382
-119
lines changed

7 files changed

+382
-119
lines changed

doc/source/whatsnew/v0.20.0.txt

+3-3
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ users upgrade to this version.
99

1010
Highlights include:
1111

12-
- new ``.agg()`` API for Series/DataFrame similar to the groupby-rolling-resample API's, see :ref:`here <whatsnew_0200.enhancements.agg>`
12+
- New ``.agg()`` API for Series/DataFrame similar to the groupby-rolling-resample API's, see :ref:`here <whatsnew_0200.enhancements.agg>`
1313
- Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here <io.feather>`.
1414
- The ``.ix`` indexer has been deprecated, see :ref:`here <whatsnew_0200.api_breaking.deprecate_ix>`
1515
- ``Panel`` has been deprecated, see :ref:`here <whatsnew_0200.api_breaking.deprecate_panel>`
@@ -45,8 +45,8 @@ New features
4545
^^^^^^^^^^^
4646

4747
Series & DataFrame have been enhanced to support the aggregation API. This is an already familiar API that
48-
is supported for groupby, window operations, and resampling. This allows one to express, possibly multiple,
49-
aggregation operations in a single concise way by using :meth:`~DataFrame.agg`,
48+
is supported for groupby, window operations, and resampling. This allows one to express aggregation operations
49+
in a single concise way by using :meth:`~DataFrame.agg`,
5050
and :meth:`~DataFrame.transform`. The full documentation is :ref:`here <basics.aggregate>` (:issue:`1623`).
5151

5252
Here is a sample

pandas/core/base.py

+10-37
Original file line numberDiff line numberDiff line change
@@ -370,42 +370,6 @@ def _gotitem(self, key, ndim, subset=None):
370370
"""
371371
raise AbstractMethodError(self)
372372

373-
_agg_doc = """Aggregate using input function or dict of {column ->
374-
function}
375-
376-
Parameters
377-
----------
378-
arg : function or dict
379-
Function to use for aggregating groups. If a function, must either
380-
work when passed a DataFrame or when passed to DataFrame.apply. If
381-
passed a dict, the keys must be DataFrame column names.
382-
383-
Accepted Combinations are:
384-
- string cythonized function name
385-
- function
386-
- list of functions
387-
- dict of columns -> functions
388-
- nested dict of names -> dicts of functions
389-
390-
Notes
391-
-----
392-
Numpy functions mean/median/prod/sum/std/var are special cased so the
393-
default behavior is applying the function along axis=0
394-
(e.g., np.mean(arr_2d, axis=0)) as opposed to
395-
mimicking the default Numpy behavior (e.g., np.mean(arr_2d)).
396-
397-
Returns
398-
-------
399-
aggregated : DataFrame
400-
"""
401-
402-
_see_also_template = """
403-
See also
404-
--------
405-
pandas.Series.%(name)s
406-
pandas.DataFrame.%(name)s
407-
"""
408-
409373
def aggregate(self, func, *args, **kwargs):
410374
raise AbstractMethodError(self)
411375

@@ -1150,30 +1114,39 @@ def factorize(self, sort=False, na_sentinel=-1):
11501114
11511115
Examples
11521116
--------
1117+
11531118
>>> x = pd.Series([1, 2, 3])
11541119
>>> x
11551120
0 1
11561121
1 2
11571122
2 3
11581123
dtype: int64
1124+
11591125
>>> x.searchsorted(4)
11601126
array([3])
1127+
11611128
>>> x.searchsorted([0, 4])
11621129
array([0, 3])
1130+
11631131
>>> x.searchsorted([1, 3], side='left')
11641132
array([0, 2])
1133+
11651134
>>> x.searchsorted([1, 3], side='right')
11661135
array([1, 3])
1167-
>>>
1136+
11681137
>>> x = pd.Categorical(['apple', 'bread', 'bread', 'cheese', 'milk' ])
11691138
[apple, bread, bread, cheese, milk]
11701139
Categories (4, object): [apple < bread < cheese < milk]
1140+
11711141
>>> x.searchsorted('bread')
11721142
array([1]) # Note: an array, not a scalar
1143+
11731144
>>> x.searchsorted(['bread'])
11741145
array([1])
1146+
11751147
>>> x.searchsorted(['bread', 'eggs'])
11761148
array([1, 4])
1149+
11771150
>>> x.searchsorted(['bread', 'eggs'], side='right')
11781151
array([3, 4]) # eggs before milk
11791152
""")

pandas/core/frame.py

+35-1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import sys
1919
import types
2020
import warnings
21+
from textwrap import dedent
2122

2223
from numpy import nan as NA
2324
import numpy as np
@@ -4200,7 +4201,40 @@ def _gotitem(self, key, ndim, subset=None):
42004201
# TODO: _shallow_copy(subset)?
42014202
return self[key]
42024203

4203-
@Appender(_shared_docs['aggregate'] % _shared_doc_kwargs)
4204+
_agg_doc = dedent("""
4205+
Examples
4206+
--------
4207+
4208+
>>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'],
4209+
... index=pd.date_range('1/1/2000', periods=10))
4210+
>>> df.iloc[3:7] = np.nan
4211+
4212+
Aggregate these functions across all columns
4213+
4214+
>>> df.agg(['sum', 'min'])
4215+
A B C
4216+
sum -0.182253 -0.614014 -2.909534
4217+
min -1.916563 -1.460076 -1.568297
4218+
4219+
Different aggregations per column
4220+
4221+
>>> df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']})
4222+
A B
4223+
max NaN 1.514318
4224+
min -1.916563 -1.460076
4225+
sum -0.182253 NaN
4226+
4227+
See also
4228+
--------
4229+
pandas.DataFrame.apply
4230+
pandas.DataFrame.transform
4231+
4232+
""")
4233+
4234+
@Appender(_agg_doc)
4235+
@Appender(_shared_docs['aggregate'] % dict(
4236+
versionadded='.. versionadded:: 0.20.0',
4237+
**_shared_doc_kwargs))
42044238
def aggregate(self, func, axis=0, *args, **kwargs):
42054239
axis = self._get_axis_number(axis)
42064240

pandas/core/generic.py

+25-11
Original file line numberDiff line numberDiff line change
@@ -2879,19 +2879,19 @@ def pipe(self, func, *args, **kwargs):
28792879
return func(self, *args, **kwargs)
28802880

28812881
_shared_docs['aggregate'] = ("""
2882-
Aggregate using input function or dict of {column ->
2883-
function}
2882+
Aggregate using callable, string, dict, or list of string/callables
28842883
2885-
.. versionadded:: 0.20.0
2884+
%(versionadded)s
28862885
28872886
Parameters
28882887
----------
28892888
func : callable, string, dictionary, or list of string/callables
28902889
Function to use for aggregating the data. If a function, must either
2891-
work when passed a DataFrame or when passed to DataFrame.apply. If
2892-
passed a dict, the keys must be DataFrame column names.
2890+
work when passed a %(klass)s or when passed to %(klass)s.apply. For
2891+
a DataFrame, can pass a dict, if the keys are DataFrame column names.
28932892
28942893
Accepted Combinations are:
2894+
28952895
- string function name
28962896
- function
28972897
- list of functions
@@ -2907,9 +2907,6 @@ def pipe(self, func, *args, **kwargs):
29072907
Returns
29082908
-------
29092909
aggregated : %(klass)s
2910-
2911-
See also
2912-
--------
29132910
""")
29142911

29152912
_shared_docs['transform'] = ("""
@@ -2924,18 +2921,35 @@ def pipe(self, func, *args, **kwargs):
29242921
To apply to column
29252922
29262923
Accepted Combinations are:
2924+
29272925
- string function name
29282926
- function
29292927
- list of functions
29302928
- dict of column names -> functions (or list of functions)
29312929
2930+
Returns
2931+
-------
2932+
transformed : %(klass)s
2933+
29322934
Examples
29332935
--------
2936+
>>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'],
2937+
... index=pd.date_range('1/1/2000', periods=10))
2938+
df.iloc[3:7] = np.nan
2939+
29342940
>>> df.transform(lambda x: (x - x.mean()) / x.std())
2941+
A B C
2942+
2000-01-01 0.579457 1.236184 0.123424
2943+
2000-01-02 0.370357 -0.605875 -1.231325
2944+
2000-01-03 1.455756 -0.277446 0.288967
2945+
2000-01-04 NaN NaN NaN
2946+
2000-01-05 NaN NaN NaN
2947+
2000-01-06 NaN NaN NaN
2948+
2000-01-07 NaN NaN NaN
2949+
2000-01-08 -0.498658 1.274522 1.642524
2950+
2000-01-09 -0.540524 -1.012676 -0.828968
2951+
2000-01-10 -1.366388 -0.614710 0.005378
29352952
2936-
Returns
2937-
-------
2938-
transformed : %(klass)s
29392953
""")
29402954

29412955
# ----------------------------------------------------------------------

0 commit comments

Comments
 (0)