DOC: update the aggregate docstring (#20276)

albertvillanova · TomAugspurger · commit 45f9e577680e · 2018-03-13T09:11:24.000-05:00
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -107,6 +107,10 @@
 _shared_doc_kwargs = dict(
     axes='index, columns', klass='DataFrame',
     axes_single_arg="{0 or 'index', 1 or 'columns'}",
+    axis="""
+    axis : {0 or 'index', 1 or 'columns'}, default 0
+        - 0 or 'index': apply function to each column.
+        - 1 or 'columns': apply function to each row.""",
     optional_by="""
         by : str or list of str
             Name or list of names to sort by.
@@ -4460,9 +4464,9 @@ def pivot(self, index=None, columns=None, values=None):
 
         Reshape data (produce a "pivot" table) based on column values. Uses
         unique values from specified `index` / `columns` to form axes of the
-        resulting DataFrame. This function does not support data aggregation,
-        multiple values will result in a MultiIndex in the columns. See the
-        :ref:`User Guide <reshaping>` for more on reshaping.
+        resulting DataFrame. This function does not support data
+        aggregation, multiple values will result in a MultiIndex in the
+        columns. See the :ref:`User Guide <reshaping>` for more on reshaping.
 
         Parameters
         ----------
@@ -4980,36 +4984,59 @@ def _gotitem(self, key, ndim, subset=None):
         return self[key]
 
     _agg_doc = dedent("""
+    Notes
+    -----
+    The aggregation operations are always performed over an axis, either the
+    index (default) or the column axis. This behavior is different from
+    `numpy` aggregation functions (`mean`, `median`, `prod`, `sum`, `std`,
+    `var`), where the default is to compute the aggregation of the flattened
+    array, e.g., ``numpy.mean(arr_2d)`` as opposed to ``numpy.mean(arr_2d,
+    axis=0)``.
+
+    `agg` is an alias for `aggregate`. Use the alias.
+
     Examples
     --------
+    >>> df = pd.DataFrame([[1, 2, 3],
+    ...                    [4, 5, 6],
+    ...                    [7, 8, 9],
+    ...                    [np.nan, np.nan, np.nan]],
+    ...                   columns=['A', 'B', 'C'])
 
-    >>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'],
-    ...                   index=pd.date_range('1/1/2000', periods=10))
-    >>> df.iloc[3:7] = np.nan
-
-    Aggregate these functions across all columns
+    Aggregate these functions over the rows.
 
     >>> df.agg(['sum', 'min'])
-                A         B         C
-    sum -0.182253 -0.614014 -2.909534
-    min -1.916563 -1.460076 -1.568297
+            A     B     C
+    sum  12.0  15.0  18.0
+    min   1.0   2.0   3.0
 
-    Different aggregations per column
+    Different aggregations per column.
 
     >>> df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']})
-                A         B
-    max       NaN  1.514318
-    min -1.916563 -1.460076
-    sum -0.182253       NaN
+            A    B
+    max   NaN  8.0
+    min   1.0  2.0
+    sum  12.0  NaN
+
+    Aggregate over the columns.
+
+    >>> df.agg("mean", axis="columns")
+    0    2.0
+    1    5.0
+    2    8.0
+    3    NaN
+    dtype: float64
 
     See also
     --------
-    pandas.DataFrame.apply
-    pandas.DataFrame.transform
-    pandas.DataFrame.groupby.aggregate
-    pandas.DataFrame.resample.aggregate
-    pandas.DataFrame.rolling.aggregate
-
+    DataFrame.apply : Perform any type of operations.
+    DataFrame.transform : Perform transformation type operations.
+    pandas.core.groupby.GroupBy : Perform operations over groups.
+    pandas.core.resample.Resampler : Perform operations over resampled bins.
+    pandas.core.window.Rolling : Perform operations over rolling window.
+    pandas.core.window.Expanding : Perform operations over expanding window.
+    pandas.core.window.EWM : Perform operation over exponential weighted
+        window.
     """)
 
     @Appender(_agg_doc)
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -3937,36 +3937,37 @@ def pipe(self, func, *args, **kwargs):
         return com._pipe(self, func, *args, **kwargs)
 
     _shared_docs['aggregate'] = ("""
-    Aggregate using callable, string, dict, or list of string/callables
+    Aggregate using one or more operations over the specified axis.
 
     %(versionadded)s
 
     Parameters
     ----------
-    func : callable, string, dictionary, or list of string/callables
+    func : function, string, dictionary, or list of string/functions
         Function to use for aggregating the data. If a function, must either
         work when passed a %(klass)s or when passed to %(klass)s.apply. For
         a DataFrame, can pass a dict, if the keys are DataFrame column names.
 
-        Accepted Combinations are:
+        Accepted combinations are:
 
-        - string function name
-        - function
-        - list of functions
-        - dict of column names -> functions (or list of functions)
+        - string function name.
+        - function.
+        - list of functions.
+        - dict of column names -> functions (or list of functions).
 
-    Notes
-    -----
-    Numpy functions mean/median/prod/sum/std/var are special cased so the
-    default behavior is applying the function along axis=0
-    (e.g., np.mean(arr_2d, axis=0)) as opposed to
-    mimicking the default Numpy behavior (e.g., np.mean(arr_2d)).
-
-    `agg` is an alias for `aggregate`. Use the alias.
+    %(axis)s
+    *args
+        Positional arguments to pass to `func`.
+    **kwargs
+        Keyword arguments to pass to `func`.
 
     Returns
     -------
     aggregated : %(klass)s
+
+    Notes
+    -----
+    `agg` is an alias for `aggregate`. Use the alias.
     """)
 
     _shared_docs['transform'] = ("""
@@ -4014,7 +4015,6 @@ def pipe(self, func, *args, **kwargs):
     --------
     pandas.%(klass)s.aggregate
     pandas.%(klass)s.apply
-
     """)
 
     # ----------------------------------------------------------------------
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -3432,7 +3432,8 @@ def apply(self, func, *args, **kwargs):
     @Appender(_agg_doc)
     @Appender(_shared_docs['aggregate'] % dict(
         klass='Series',
-        versionadded=''))
+        versionadded='',
+        axis=''))
     def aggregate(self, func_or_funcs, *args, **kwargs):
         _level = kwargs.pop('_level', None)
         if isinstance(func_or_funcs, compat.string_types):
@@ -4611,7 +4612,8 @@ class DataFrameGroupBy(NDFrameGroupBy):
     @Appender(_agg_doc)
     @Appender(_shared_docs['aggregate'] % dict(
         klass='DataFrame',
-        versionadded=''))
+        versionadded='',
+        axis=''))
     def aggregate(self, arg, *args, **kwargs):
         return super(DataFrameGroupBy, self).aggregate(arg, *args, **kwargs)
 
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
@@ -334,7 +334,8 @@ def plot(self, *args, **kwargs):
     @Appender(_agg_doc)
     @Appender(_shared_docs['aggregate'] % dict(
         klass='DataFrame',
-        versionadded=''))
+        versionadded='',
+        axis=''))
     def aggregate(self, arg, *args, **kwargs):
 
         self._set_binner()
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -77,6 +77,10 @@
 
 _shared_doc_kwargs = dict(
     axes='index', klass='Series', axes_single_arg="{0 or 'index'}",
+    axis="""
+    axis : {0 or 'index'}
+        Parameter needed for compatibility with DataFrame.
+    """,
     inplace="""inplace : boolean, default False
         If True, performs operation inplace and returns None.""",
     unique='np.ndarray', duplicated='Series',
diff --git a/pandas/core/window.py b/pandas/core/window.py
@@ -626,7 +626,8 @@ def f(arg, *args, **kwargs):
     @Appender(_agg_doc)
     @Appender(_shared_docs['aggregate'] % dict(
         versionadded='',
-        klass='Series/DataFrame'))
+        klass='Series/DataFrame',
+        axis=''))
     def aggregate(self, arg, *args, **kwargs):
         result, how = self._aggregate(arg, *args, **kwargs)
         if result is None:
@@ -1300,7 +1301,8 @@ def _validate_freq(self):
     @Appender(_agg_doc)
     @Appender(_shared_docs['aggregate'] % dict(
         versionadded='',
-        klass='Series/DataFrame'))
+        klass='Series/DataFrame',
+        axis=''))
     def aggregate(self, arg, *args, **kwargs):
         return super(Rolling, self).aggregate(arg, *args, **kwargs)
 
@@ -1566,7 +1568,8 @@ def _get_window(self, other=None):
     @Appender(_agg_doc)
     @Appender(_shared_docs['aggregate'] % dict(
         versionadded='',
-        klass='Series/DataFrame'))
+        klass='Series/DataFrame',
+        axis=''))
     def aggregate(self, arg, *args, **kwargs):
         return super(Expanding, self).aggregate(arg, *args, **kwargs)
 
@@ -1869,7 +1872,8 @@ def _constructor(self):
     @Appender(_agg_doc)
     @Appender(_shared_docs['aggregate'] % dict(
         versionadded='',
-        klass='Series/DataFrame'))
+        klass='Series/DataFrame',
+        axis=''))
     def aggregate(self, arg, *args, **kwargs):
         return super(EWM, self).aggregate(arg, *args, **kwargs)