Skip to content

Commit 45f9e57

Browse files
albertvillanovaTomAugspurger
authored andcommitted
DOC: update the aggregate docstring (#20276)
1 parent 51765d0 commit 45f9e57

File tree

6 files changed

+83
-45
lines changed

6 files changed

+83
-45
lines changed

pandas/core/frame.py

+49-22
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,10 @@
107107
_shared_doc_kwargs = dict(
108108
axes='index, columns', klass='DataFrame',
109109
axes_single_arg="{0 or 'index', 1 or 'columns'}",
110+
axis="""
111+
axis : {0 or 'index', 1 or 'columns'}, default 0
112+
- 0 or 'index': apply function to each column.
113+
- 1 or 'columns': apply function to each row.""",
110114
optional_by="""
111115
by : str or list of str
112116
Name or list of names to sort by.
@@ -4460,9 +4464,9 @@ def pivot(self, index=None, columns=None, values=None):
44604464
44614465
Reshape data (produce a "pivot" table) based on column values. Uses
44624466
unique values from specified `index` / `columns` to form axes of the
4463-
resulting DataFrame. This function does not support data aggregation,
4464-
multiple values will result in a MultiIndex in the columns. See the
4465-
:ref:`User Guide <reshaping>` for more on reshaping.
4467+
resulting DataFrame. This function does not support data
4468+
aggregation, multiple values will result in a MultiIndex in the
4469+
columns. See the :ref:`User Guide <reshaping>` for more on reshaping.
44664470
44674471
Parameters
44684472
----------
@@ -4980,36 +4984,59 @@ def _gotitem(self, key, ndim, subset=None):
49804984
return self[key]
49814985

49824986
_agg_doc = dedent("""
4987+
Notes
4988+
-----
4989+
The aggregation operations are always performed over an axis, either the
4990+
index (default) or the column axis. This behavior is different from
4991+
`numpy` aggregation functions (`mean`, `median`, `prod`, `sum`, `std`,
4992+
`var`), where the default is to compute the aggregation of the flattened
4993+
array, e.g., ``numpy.mean(arr_2d)`` as opposed to ``numpy.mean(arr_2d,
4994+
axis=0)``.
4995+
4996+
`agg` is an alias for `aggregate`. Use the alias.
4997+
49834998
Examples
49844999
--------
5000+
>>> df = pd.DataFrame([[1, 2, 3],
5001+
... [4, 5, 6],
5002+
... [7, 8, 9],
5003+
... [np.nan, np.nan, np.nan]],
5004+
... columns=['A', 'B', 'C'])
49855005
4986-
>>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'],
4987-
... index=pd.date_range('1/1/2000', periods=10))
4988-
>>> df.iloc[3:7] = np.nan
4989-
4990-
Aggregate these functions across all columns
5006+
Aggregate these functions over the rows.
49915007
49925008
>>> df.agg(['sum', 'min'])
4993-
A B C
4994-
sum -0.182253 -0.614014 -2.909534
4995-
min -1.916563 -1.460076 -1.568297
5009+
A B C
5010+
sum 12.0 15.0 18.0
5011+
min 1.0 2.0 3.0
49965012
4997-
Different aggregations per column
5013+
Different aggregations per column.
49985014
49995015
>>> df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']})
5000-
A B
5001-
max NaN 1.514318
5002-
min -1.916563 -1.460076
5003-
sum -0.182253 NaN
5016+
A B
5017+
max NaN 8.0
5018+
min 1.0 2.0
5019+
sum 12.0 NaN
5020+
5021+
Aggregate over the columns.
5022+
5023+
>>> df.agg("mean", axis="columns")
5024+
0 2.0
5025+
1 5.0
5026+
2 8.0
5027+
3 NaN
5028+
dtype: float64
50045029
50055030
See also
50065031
--------
5007-
pandas.DataFrame.apply
5008-
pandas.DataFrame.transform
5009-
pandas.DataFrame.groupby.aggregate
5010-
pandas.DataFrame.resample.aggregate
5011-
pandas.DataFrame.rolling.aggregate
5012-
5032+
DataFrame.apply : Perform any type of operations.
5033+
DataFrame.transform : Perform transformation type operations.
5034+
pandas.core.groupby.GroupBy : Perform operations over groups.
5035+
pandas.core.resample.Resampler : Perform operations over resampled bins.
5036+
pandas.core.window.Rolling : Perform operations over rolling window.
5037+
pandas.core.window.Expanding : Perform operations over expanding window.
5038+
pandas.core.window.EWM : Perform operation over exponential weighted
5039+
window.
50135040
""")
50145041

50155042
@Appender(_agg_doc)

pandas/core/generic.py

+16-16
Original file line numberDiff line numberDiff line change
@@ -3937,36 +3937,37 @@ def pipe(self, func, *args, **kwargs):
39373937
return com._pipe(self, func, *args, **kwargs)
39383938

39393939
_shared_docs['aggregate'] = ("""
3940-
Aggregate using callable, string, dict, or list of string/callables
3940+
Aggregate using one or more operations over the specified axis.
39413941
39423942
%(versionadded)s
39433943
39443944
Parameters
39453945
----------
3946-
func : callable, string, dictionary, or list of string/callables
3946+
func : function, string, dictionary, or list of string/functions
39473947
Function to use for aggregating the data. If a function, must either
39483948
work when passed a %(klass)s or when passed to %(klass)s.apply. For
39493949
a DataFrame, can pass a dict, if the keys are DataFrame column names.
39503950
3951-
Accepted Combinations are:
3951+
Accepted combinations are:
39523952
3953-
- string function name
3954-
- function
3955-
- list of functions
3956-
- dict of column names -> functions (or list of functions)
3953+
- string function name.
3954+
- function.
3955+
- list of functions.
3956+
- dict of column names -> functions (or list of functions).
39573957
3958-
Notes
3959-
-----
3960-
Numpy functions mean/median/prod/sum/std/var are special cased so the
3961-
default behavior is applying the function along axis=0
3962-
(e.g., np.mean(arr_2d, axis=0)) as opposed to
3963-
mimicking the default Numpy behavior (e.g., np.mean(arr_2d)).
3964-
3965-
`agg` is an alias for `aggregate`. Use the alias.
3958+
%(axis)s
3959+
*args
3960+
Positional arguments to pass to `func`.
3961+
**kwargs
3962+
Keyword arguments to pass to `func`.
39663963
39673964
Returns
39683965
-------
39693966
aggregated : %(klass)s
3967+
3968+
Notes
3969+
-----
3970+
`agg` is an alias for `aggregate`. Use the alias.
39703971
""")
39713972

39723973
_shared_docs['transform'] = ("""
@@ -4014,7 +4015,6 @@ def pipe(self, func, *args, **kwargs):
40144015
--------
40154016
pandas.%(klass)s.aggregate
40164017
pandas.%(klass)s.apply
4017-
40184018
""")
40194019

40204020
# ----------------------------------------------------------------------

pandas/core/groupby.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -3432,7 +3432,8 @@ def apply(self, func, *args, **kwargs):
34323432
@Appender(_agg_doc)
34333433
@Appender(_shared_docs['aggregate'] % dict(
34343434
klass='Series',
3435-
versionadded=''))
3435+
versionadded='',
3436+
axis=''))
34363437
def aggregate(self, func_or_funcs, *args, **kwargs):
34373438
_level = kwargs.pop('_level', None)
34383439
if isinstance(func_or_funcs, compat.string_types):
@@ -4611,7 +4612,8 @@ class DataFrameGroupBy(NDFrameGroupBy):
46114612
@Appender(_agg_doc)
46124613
@Appender(_shared_docs['aggregate'] % dict(
46134614
klass='DataFrame',
4614-
versionadded=''))
4615+
versionadded='',
4616+
axis=''))
46154617
def aggregate(self, arg, *args, **kwargs):
46164618
return super(DataFrameGroupBy, self).aggregate(arg, *args, **kwargs)
46174619

pandas/core/resample.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,8 @@ def plot(self, *args, **kwargs):
334334
@Appender(_agg_doc)
335335
@Appender(_shared_docs['aggregate'] % dict(
336336
klass='DataFrame',
337-
versionadded=''))
337+
versionadded='',
338+
axis=''))
338339
def aggregate(self, arg, *args, **kwargs):
339340

340341
self._set_binner()

pandas/core/series.py

+4
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,10 @@
7777

7878
_shared_doc_kwargs = dict(
7979
axes='index', klass='Series', axes_single_arg="{0 or 'index'}",
80+
axis="""
81+
axis : {0 or 'index'}
82+
Parameter needed for compatibility with DataFrame.
83+
""",
8084
inplace="""inplace : boolean, default False
8185
If True, performs operation inplace and returns None.""",
8286
unique='np.ndarray', duplicated='Series',

pandas/core/window.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -626,7 +626,8 @@ def f(arg, *args, **kwargs):
626626
@Appender(_agg_doc)
627627
@Appender(_shared_docs['aggregate'] % dict(
628628
versionadded='',
629-
klass='Series/DataFrame'))
629+
klass='Series/DataFrame',
630+
axis=''))
630631
def aggregate(self, arg, *args, **kwargs):
631632
result, how = self._aggregate(arg, *args, **kwargs)
632633
if result is None:
@@ -1300,7 +1301,8 @@ def _validate_freq(self):
13001301
@Appender(_agg_doc)
13011302
@Appender(_shared_docs['aggregate'] % dict(
13021303
versionadded='',
1303-
klass='Series/DataFrame'))
1304+
klass='Series/DataFrame',
1305+
axis=''))
13041306
def aggregate(self, arg, *args, **kwargs):
13051307
return super(Rolling, self).aggregate(arg, *args, **kwargs)
13061308

@@ -1566,7 +1568,8 @@ def _get_window(self, other=None):
15661568
@Appender(_agg_doc)
15671569
@Appender(_shared_docs['aggregate'] % dict(
15681570
versionadded='',
1569-
klass='Series/DataFrame'))
1571+
klass='Series/DataFrame',
1572+
axis=''))
15701573
def aggregate(self, arg, *args, **kwargs):
15711574
return super(Expanding, self).aggregate(arg, *args, **kwargs)
15721575

@@ -1869,7 +1872,8 @@ def _constructor(self):
18691872
@Appender(_agg_doc)
18701873
@Appender(_shared_docs['aggregate'] % dict(
18711874
versionadded='',
1872-
klass='Series/DataFrame'))
1875+
klass='Series/DataFrame',
1876+
axis=''))
18731877
def aggregate(self, arg, *args, **kwargs):
18741878
return super(EWM, self).aggregate(arg, *args, **kwargs)
18751879

0 commit comments

Comments
 (0)