pandas-dev · jorisvandenbossche · Jan 2, 2017 · Dec 27, 2016 · Dec 27, 2016 · Dec 27, 2016
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -5201,60 +5201,209 @@ def abs(self):
         """
         return np.abs(self)
 
-    _shared_docs['describe'] = """
-        Generate various summary statistics, excluding NaN values.
+    def describe(self, percentiles=None, include=None, exclude=None):
+        """
+        Generates descriptive statistics that summarize the central tendency,
+        dispersion and shape of a dataset's distribution, excluding
+        ``NaN`` values.
 
-        Parameters
-        ----------
-        percentiles : array-like, optional
-            The percentiles to include in the output. Should all
-            be in the interval [0, 1]. By default `percentiles` is
-            [.25, .5, .75], returning the 25th, 50th, and 75th percentiles.
-        include, exclude : list-like, 'all', or None (default)
-            Specify the form of the returned result. Either:
-
-            - None to both (default). The result will include only
-              numeric-typed columns or, if none are, only categorical columns.
-            - A list of dtypes or strings to be included/excluded.
-              To select all numeric types use numpy numpy.number. To select
-              categorical objects use type object. See also the select_dtypes
-              documentation. eg. df.describe(include=['O'])
-            - If include is the string 'all', the output column-set will
-              match the input one.
+        Analyzes both ``numeric`` and ``object`` series, as well
+        as `DataFrame` column sets of mixed data types.
 
-        Returns
-        -------
-        summary: %(klass)s of summary statistics
+        For ``numeric`` data, the result's index will include ``count``,
+        ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and
+        upper percentiles. By default the lower percentile is ``25`` and the
+        upper percentile is ``75``. The ``50`` percentile is typically the
+        same as the median.
 
-        Notes
-        -----
-        The output DataFrame index depends on the requested dtypes:
+        For ``object`` data (e.g. strings or timestamps), the result's index
+        will include ``count``, ``unique``, ``top``, and ``freq``. The ``top``
+        is the most common value. The ``freq`` is the most common value's
+        frequency. Timestamps also include the ``first`` and ``last`` items.
 
-        For numeric dtypes, it will include: count, mean, std, min,
-        max, and lower, 50, and upper percentiles.
+        If multiple ``object`` values have the highest count, then the
+        ``count`` and ``top`` results will be arbitrarily chosen from
+        among those with the highest count.
 
-        For object dtypes (e.g. timestamps or strings), the index
-        will include the count, unique, most common, and frequency of the
-        most common. Timestamps also include the first and last items.
+        For mixed data types provided via a `DataFrame`, the result will
+        include a union of attributes of each type.
 
-        For mixed dtypes, the index will be the union of the corresponding
-        output types. Non-applicable entries will be filled with NaN.
-        Note that mixed-dtype outputs can only be returned from mixed-dtype
-        inputs and appropriate use of the include/exclude arguments.
+        The `include` and `exclude` parameters can be used to limit
+        which columns in a `DataFrame` are analyzed for the output.
+        The parameters are ignored when analyzing a `Series`.
 
-        If multiple values have the highest count, then the
-        `count` and `most common` pair will be arbitrarily chosen from
-        among those with the highest count.
+        Parameters
+        ----------
+        percentiles : list of numbers, optional
+            The percentiles to include in the output. All should
+            fall between 0 and 1. The default is
+            ``[.25, .5, .75]``, which returns the 25th, 50th, and
+            75th percentiles.
+        include : None (default), 'all', or list of dtypes or strings, optional
+            A white list of data types to include in the result. Ignored
+            for `Series`. Here are the options:
+
+            - None (default). The result will include all ``numeric`` columns.
+            - 'all'. All columns on the input will be included in the output.
+            - A list of dtypes or strings. Limits the results to the
+              provided data types.
+              To limit the result to numeric types submit
+              ``numpy.number``. To limit it instead to categorical
+              objects submit the data type ``object``. Strings
+              can also be used in the style of
+              `select_dtypes` (e.g. df.describe(include=['O']))
+        exclude : None (default) or a list of dtypes or strings, optional,
+            A black list of data types to omit from the result. Ignored
+            for `Series`. Here are the options:
+
+            - None (default). The result will exclude nothing.
+            - A list of dtypes or strings. Excludes the provided data types
+              from the result. To select numeric types submit
+              ``numpy.number``. To select categorical objects submut the data
+              type ``object``. Strings can also be used in the style of
+              `select_dtypes` (e.g. df.describe(include=['O']))
+
+        Returns
+        -------
+        summary: NDFrame of summary statistics
+
+        Examples
+        --------
+        Describing a numeric `Series`.
 
-        The include, exclude arguments are ignored for Series.
+        >>> import pandas as pd
+        >>> s = pd.Series([1, 2, 3])
+        >>> s.describe()
+        count    3.0
+        mean     2.0
+        std      1.0
+        min      1.0
+        25%      1.5
+        50%      2.0
+        75%      2.5
+        max      3.0
+
+        Describing a categorical `Series`.
+
+        >>> s = pd.Series(['a', 'a', 'b', 'c'])
+        >>> s.describe()
+        count     4
+        unique    3
+        top       a
+        freq      2
+        dtype: object
+
+        Describing a timestamp `Series`.
+
+        >>> import numpy as np
+        >>> s = pd.Series([
+        ..    np.datetime64("2000-01-01"),
+        ..    np.datetime64("2010-01-01"),
+        ..    np.datetime64("2010-01-01")
+        ..  ])
+        >>> s.describe()
+        count                       3
+        unique                      2
+        top       2010-01-01 00:00:00
+        freq                        2
+        first     2000-01-01 00:00:00
+        last      2010-01-01 00:00:00
+        dtype: object
+
+        Describing a `DataFrame`. By default only numeric fields are returned.
+
+        >>> df = pd.DataFrame(
+        ..    [[1, 'a'], [2, 'b'], [3, 'c']],
+        ..    columns=['numeric', 'object']
+        ..  )
+        >>> df.describe()
+               numeric
+        count      3.0
+        mean       2.0
+        std        1.0
+        min        1.0
+        25%        1.5
+        50%        2.0
+        75%        2.5
+        max        3.0
+
+        Describing all columns of a `DataFrame` regardless of data type.
+
+        >>> df.describe(include='all')
+                numeric object
+        count       3.0      3
+        unique      NaN      3
+        top         NaN      b
+        freq        NaN      1
+        mean        2.0    NaN
+        std         1.0    NaN
+        min         1.0    NaN
+        25%         1.5    NaN
+        50%         2.0    NaN
+        75%         2.5    NaN
+        max         3.0    NaN
+
+        Describing a column from a `DataFrame` by accessing it as an attribute.
+
+        >>> df.numeric.describe()
+        count    3.0
+        mean     2.0
+        std      1.0
+        min      1.0
+        25%      1.5
+        50%      2.0
+        75%      2.5
+        max      3.0
+        Name: numeric, dtype: float64
+
+        Including only ``numeric`` columns in a `DataFrame` description.
+
+        >>> df.describe(include=[np.number])
+               numeric
+        count      3.0
+        mean       2.0
+        std        1.0
+        min        1.0
+        25%        1.5
+        50%        2.0
+        75%        2.5
+        max        3.0
+
+        Including only ``string`` columns in a `DataFrame` description.
+
+        >>> df.describe(include=[np.object])
+               object
+        count       3
+        unique      3
+        top         b
+        freq        1
+
+        Excluding ``numeric`` columns from a `DataFrame` description.
+
+        >>> df.describe(exclude=[np.number])
+               object
+        count       3
+        unique      3
+        top         b
+        freq        1
+
+        Excluding ``object`` columns from a `DataFrame` description.
+
+        >>> df.describe(exclude=[np.object])
+               numeric
+        count      3.0
+        mean       2.0
+        std        1.0
+        min        1.0
+        25%        1.5
+        50%        2.0
+        75%        2.5
+        max        3.0
 
         See Also
         --------
         DataFrame.select_dtypes
         """
-
-    @Appender(_shared_docs['describe'] % _shared_doc_kwargs)
-    def describe(self, percentiles=None, include=None, exclude=None):
         if self.ndim >= 3:
             msg = "describe is not implemented on Panel or PanelND objects."
             raise NotImplementedError(msg)