Skip to content

Commit 20fda22

Browse files
authored
DOC: update the .agg doc-string with examples (pandas-dev#16188)
* DOC: update the .agg doc-string with examples * various updates
1 parent 2471967 commit 20fda22

File tree

8 files changed

+443
-179
lines changed

8 files changed

+443
-179
lines changed

doc/source/whatsnew/v0.20.0.txt

+3-3
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ users upgrade to this version.
99

1010
Highlights include:
1111

12-
- new ``.agg()`` API for Series/DataFrame similar to the groupby-rolling-resample API's, see :ref:`here <whatsnew_0200.enhancements.agg>`
12+
- New ``.agg()`` API for Series/DataFrame similar to the groupby-rolling-resample API's, see :ref:`here <whatsnew_0200.enhancements.agg>`
1313
- Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here <io.feather>`.
1414
- The ``.ix`` indexer has been deprecated, see :ref:`here <whatsnew_0200.api_breaking.deprecate_ix>`
1515
- ``Panel`` has been deprecated, see :ref:`here <whatsnew_0200.api_breaking.deprecate_panel>`
@@ -45,8 +45,8 @@ New features
4545
^^^^^^^^^^^
4646

4747
Series & DataFrame have been enhanced to support the aggregation API. This is an already familiar API that
48-
is supported for groupby, window operations, and resampling. This allows one to express, possibly multiple,
49-
aggregation operations in a single concise way by using :meth:`~DataFrame.agg`,
48+
is supported for groupby, window operations, and resampling. This allows one to express aggregation operations
49+
in a single concise way by using :meth:`~DataFrame.agg`,
5050
and :meth:`~DataFrame.transform`. The full documentation is :ref:`here <basics.aggregate>` (:issue:`1623`).
5151

5252
Here is a sample

pandas/core/base.py

+10-37
Original file line numberDiff line numberDiff line change
@@ -370,42 +370,6 @@ def _gotitem(self, key, ndim, subset=None):
370370
"""
371371
raise AbstractMethodError(self)
372372

373-
_agg_doc = """Aggregate using input function or dict of {column ->
374-
function}
375-
376-
Parameters
377-
----------
378-
arg : function or dict
379-
Function to use for aggregating groups. If a function, must either
380-
work when passed a DataFrame or when passed to DataFrame.apply. If
381-
passed a dict, the keys must be DataFrame column names.
382-
383-
Accepted Combinations are:
384-
- string cythonized function name
385-
- function
386-
- list of functions
387-
- dict of columns -> functions
388-
- nested dict of names -> dicts of functions
389-
390-
Notes
391-
-----
392-
Numpy functions mean/median/prod/sum/std/var are special cased so the
393-
default behavior is applying the function along axis=0
394-
(e.g., np.mean(arr_2d, axis=0)) as opposed to
395-
mimicking the default Numpy behavior (e.g., np.mean(arr_2d)).
396-
397-
Returns
398-
-------
399-
aggregated : DataFrame
400-
"""
401-
402-
_see_also_template = """
403-
See also
404-
--------
405-
pandas.Series.%(name)s
406-
pandas.DataFrame.%(name)s
407-
"""
408-
409373
def aggregate(self, func, *args, **kwargs):
410374
raise AbstractMethodError(self)
411375

@@ -1150,30 +1114,39 @@ def factorize(self, sort=False, na_sentinel=-1):
11501114
11511115
Examples
11521116
--------
1117+
11531118
>>> x = pd.Series([1, 2, 3])
11541119
>>> x
11551120
0 1
11561121
1 2
11571122
2 3
11581123
dtype: int64
1124+
11591125
>>> x.searchsorted(4)
11601126
array([3])
1127+
11611128
>>> x.searchsorted([0, 4])
11621129
array([0, 3])
1130+
11631131
>>> x.searchsorted([1, 3], side='left')
11641132
array([0, 2])
1133+
11651134
>>> x.searchsorted([1, 3], side='right')
11661135
array([1, 3])
1167-
>>>
1136+
11681137
>>> x = pd.Categorical(['apple', 'bread', 'bread', 'cheese', 'milk' ])
11691138
[apple, bread, bread, cheese, milk]
11701139
Categories (4, object): [apple < bread < cheese < milk]
1140+
11711141
>>> x.searchsorted('bread')
11721142
array([1]) # Note: an array, not a scalar
1143+
11731144
>>> x.searchsorted(['bread'])
11741145
array([1])
1146+
11751147
>>> x.searchsorted(['bread', 'eggs'])
11761148
array([1, 4])
1149+
11771150
>>> x.searchsorted(['bread', 'eggs'], side='right')
11781151
array([3, 4]) # eggs before milk
11791152
""")

pandas/core/frame.py

+39-2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import sys
1919
import types
2020
import warnings
21+
from textwrap import dedent
2122

2223
from numpy import nan as NA
2324
import numpy as np
@@ -4200,7 +4201,43 @@ def _gotitem(self, key, ndim, subset=None):
42004201
# TODO: _shallow_copy(subset)?
42014202
return self[key]
42024203

4203-
@Appender(_shared_docs['aggregate'] % _shared_doc_kwargs)
4204+
_agg_doc = dedent("""
4205+
Examples
4206+
--------
4207+
4208+
>>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'],
4209+
... index=pd.date_range('1/1/2000', periods=10))
4210+
>>> df.iloc[3:7] = np.nan
4211+
4212+
Aggregate these functions across all columns
4213+
4214+
>>> df.agg(['sum', 'min'])
4215+
A B C
4216+
sum -0.182253 -0.614014 -2.909534
4217+
min -1.916563 -1.460076 -1.568297
4218+
4219+
Different aggregations per column
4220+
4221+
>>> df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']})
4222+
A B
4223+
max NaN 1.514318
4224+
min -1.916563 -1.460076
4225+
sum -0.182253 NaN
4226+
4227+
See also
4228+
--------
4229+
pandas.DataFrame.apply
4230+
pandas.DataFrame.transform
4231+
pandas.DataFrame.groupby.aggregate
4232+
pandas.DataFrame.resample.aggregate
4233+
pandas.DataFrame.rolling.aggregate
4234+
4235+
""")
4236+
4237+
@Appender(_agg_doc)
4238+
@Appender(_shared_docs['aggregate'] % dict(
4239+
versionadded='.. versionadded:: 0.20.0',
4240+
**_shared_doc_kwargs))
42044241
def aggregate(self, func, axis=0, *args, **kwargs):
42054242
axis = self._get_axis_number(axis)
42064243

@@ -4272,7 +4309,7 @@ def apply(self, func, axis=0, broadcast=False, raw=False, reduce=None,
42724309
See also
42734310
--------
42744311
DataFrame.applymap: For elementwise operations
4275-
DataFrame.agg: only perform aggregating type operations
4312+
DataFrame.aggregate: only perform aggregating type operations
42764313
DataFrame.transform: only perform transformating type operations
42774314
42784315
Returns

pandas/core/generic.py

+32-11
Original file line numberDiff line numberDiff line change
@@ -2854,19 +2854,19 @@ def pipe(self, func, *args, **kwargs):
28542854
return func(self, *args, **kwargs)
28552855

28562856
_shared_docs['aggregate'] = ("""
2857-
Aggregate using input function or dict of {column ->
2858-
function}
2857+
Aggregate using callable, string, dict, or list of string/callables
28592858
2860-
.. versionadded:: 0.20.0
2859+
%(versionadded)s
28612860
28622861
Parameters
28632862
----------
28642863
func : callable, string, dictionary, or list of string/callables
28652864
Function to use for aggregating the data. If a function, must either
2866-
work when passed a DataFrame or when passed to DataFrame.apply. If
2867-
passed a dict, the keys must be DataFrame column names.
2865+
work when passed a %(klass)s or when passed to %(klass)s.apply. For
2866+
a DataFrame, can pass a dict, if the keys are DataFrame column names.
28682867
28692868
Accepted Combinations are:
2869+
28702870
- string function name
28712871
- function
28722872
- list of functions
@@ -2879,12 +2879,11 @@ def pipe(self, func, *args, **kwargs):
28792879
(e.g., np.mean(arr_2d, axis=0)) as opposed to
28802880
mimicking the default Numpy behavior (e.g., np.mean(arr_2d)).
28812881
2882+
agg is an alias for aggregate. Use it.
2883+
28822884
Returns
28832885
-------
28842886
aggregated : %(klass)s
2885-
2886-
See also
2887-
--------
28882887
""")
28892888

28902889
_shared_docs['transform'] = ("""
@@ -2899,18 +2898,40 @@ def pipe(self, func, *args, **kwargs):
28992898
To apply to column
29002899
29012900
Accepted Combinations are:
2901+
29022902
- string function name
29032903
- function
29042904
- list of functions
29052905
- dict of column names -> functions (or list of functions)
29062906
2907+
Returns
2908+
-------
2909+
transformed : %(klass)s
2910+
29072911
Examples
29082912
--------
2913+
>>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'],
2914+
... index=pd.date_range('1/1/2000', periods=10))
2915+
df.iloc[3:7] = np.nan
2916+
29092917
>>> df.transform(lambda x: (x - x.mean()) / x.std())
2918+
A B C
2919+
2000-01-01 0.579457 1.236184 0.123424
2920+
2000-01-02 0.370357 -0.605875 -1.231325
2921+
2000-01-03 1.455756 -0.277446 0.288967
2922+
2000-01-04 NaN NaN NaN
2923+
2000-01-05 NaN NaN NaN
2924+
2000-01-06 NaN NaN NaN
2925+
2000-01-07 NaN NaN NaN
2926+
2000-01-08 -0.498658 1.274522 1.642524
2927+
2000-01-09 -0.540524 -1.012676 -0.828968
2928+
2000-01-10 -1.366388 -0.614710 0.005378
2929+
2930+
See also
2931+
--------
2932+
pandas.%(klass)s.aggregate
2933+
pandas.%(klass)s.apply
29102934
2911-
Returns
2912-
-------
2913-
transformed : %(klass)s
29142935
""")
29152936

29162937
# ----------------------------------------------------------------------

0 commit comments

Comments
 (0)