From 1d6aa0e8dc9f4455d1a6edd5c1be43ad7f13cc80 Mon Sep 17 00:00:00 2001
From: palewire <ben.welsh@gmail.com>
Date: Mon, 26 Dec 2016 20:43:18 -0800
Subject: [PATCH 1/9] DOC: Clarified and expanded describe documentation

---
 pandas/core/generic.py | 230 +++++++++++++++++++++++++++++++++--------
 1 file changed, 189 insertions(+), 41 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 77c2699f5a432..adcb73acff2fe 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -5201,60 +5201,208 @@ def abs(self):
         """
         return np.abs(self)
 
-    _shared_docs['describe'] = """
-        Generate various summary statistics, excluding NaN values.
+    def describe(self, percentiles=None, include=None, exclude=None):
+        """
+        Generates descriptive statistics that summarize the central tendency,
+        dispersion and shape of a dataset's distribution, excluding ``NaN`` values.
 
-        Parameters
-        ----------
-        percentiles : array-like, optional
-            The percentiles to include in the output. Should all
-            be in the interval [0, 1]. By default `percentiles` is
-            [.25, .5, .75], returning the 25th, 50th, and 75th percentiles.
-        include, exclude : list-like, 'all', or None (default)
-            Specify the form of the returned result. Either:
-
-            - None to both (default). The result will include only
-              numeric-typed columns or, if none are, only categorical columns.
-            - A list of dtypes or strings to be included/excluded.
-              To select all numeric types use numpy numpy.number. To select
-              categorical objects use type object. See also the select_dtypes
-              documentation. eg. df.describe(include=['O'])
-            - If include is the string 'all', the output column-set will
-              match the input one.
+        Analyzes both ``numeric`` and ``object`` series, as well
+        as `DataFrame` column sets of mixed data types.
 
-        Returns
-        -------
-        summary: %(klass)s of summary statistics
+        For ``numeric`` data, the result's index will include ``count``,
+        ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and
+        upper percentiles. By default the lower percentile is `25` and the
+        upper percentile is ``75``. The `50` percentile is typically the
+        same as the median.
 
-        Notes
-        -----
-        The output DataFrame index depends on the requested dtypes:
+        For ``object`` data (e.g. strings or timestamps), the result's index
+        will include ``count``, ``unique``, ``top``, and ``freq``. The ``top``
+        is the most common value. The ``freq`` is the most common value's
+        frequency. Timestamps also include the ``first`` and ``last`` items.
 
-        For numeric dtypes, it will include: count, mean, std, min,
-        max, and lower, 50, and upper percentiles.
+        If multiple ``object`` values have the highest count, then the
+        ``count`` and ``top`` results will be arbitrarily chosen from
+        among those with the highest count.
 
-        For object dtypes (e.g. timestamps or strings), the index
-        will include the count, unique, most common, and frequency of the
-        most common. Timestamps also include the first and last items.
+        For mixed data types provided via a `DataFrame`, the result will
+        include a union of attributes of each type.
 
-        For mixed dtypes, the index will be the union of the corresponding
-        output types. Non-applicable entries will be filled with NaN.
-        Note that mixed-dtype outputs can only be returned from mixed-dtype
-        inputs and appropriate use of the include/exclude arguments.
+        The `include` and `exclude` parameters can be used to limit
+        which columns in a `DataFrame` are analyzed for the output.
+        The parameters are ignored when analyzing a `Series`.
 
-        If multiple values have the highest count, then the
-        `count` and `most common` pair will be arbitrarily chosen from
-        among those with the highest count.
+        Parameters
+        ----------
+        percentiles : list of numbers, optional
+            The percentiles to include in the output. All should
+            fall between 0 and 1. The default is
+            ``[.25, .5, .75]``, which returns the 25th, 50th, and
+            75th percentiles.
+        include : None (default), 'all', or list of dtypes or strings, optional
+            A white list of data types to include in the result. Ignored
+            for `Series`. Here are the options:
+
+            - None (default). The result will include all ``numeric`` columns.
+            - 'all'. All columns on the input will be included in the output.
+            - A list of dtypes or strings. Limits the results to the
+              provided data types.
+              To limit the result to numeric types submit
+              ``numpy.number``. To limit it instead to categorical
+              objects submit the data type ``object``. Strings
+              can also be used in the style of
+              `select_dtypes` (e.g. df.describe(include=['O']))
+        exclude : None (default) or a list of dtypes or strings, optional,
+            A black list of data types to omit from the result. Ignored
+            for `Series`. Here are the options:
+
+            - None (default). The result will exclude nothing.
+            - A list of dtypes or strings. Excludes the provided data types
+              from the result. To select numeric types submit
+              ``numpy.number``. To select categorical objects submut the data
+              type ``object``. Strings can also be used in the style of
+              `select_dtypes` (e.g. df.describe(include=['O']))
+
+        Returns
+        -------
+        summary: NDFrame of summary statistics
+
+        Examples
+        --------
+        Describing a numeric `Series`.
 
-        The include, exclude arguments are ignored for Series.
+        >>> import pandas as pd
+        >>> s = pd.Series([1, 2, 3])
+        >>> s.describe()
+        count    3.0
+        mean     2.0
+        std      1.0
+        min      1.0
+        25%      1.5
+        50%      2.0
+        75%      2.5
+        max      3.0
+
+        Describing a categorical `Series`.
+
+        >>> s = pd.Series(['a', 'a', 'b', 'c'])
+        >>> s.describe()
+        count     4
+        unique    3
+        top       a
+        freq      2
+        dtype: object
+
+        Describing a timestamp `Series`.
+
+        >>> import numpy as np
+        >>> s = pd.Series([
+        ..    np.datetime64("2000-01-01"),
+        ..    np.datetime64("2010-01-01"),
+        ..    np.datetime64("2010-01-01")
+        ..  ])
+        >>> s.describe()
+        count                       3
+        unique                      2
+        top       2010-01-01 00:00:00
+        freq                        2
+        first     2000-01-01 00:00:00
+        last      2010-01-01 00:00:00
+        dtype: object
+
+        Describing a `DataFrame`. By default only numeric fields are returned.
+
+        >>> df = pd.DataFrame(
+        ..    [[1, 'a'], [2, 'b'], [3, 'c']],
+        ..    columns=['numeric', 'object']
+        ..  )
+        >>> df.describe()
+               numeric
+        count      3.0
+        mean       2.0
+        std        1.0
+        min        1.0
+        25%        1.5
+        50%        2.0
+        75%        2.5
+        max        3.0
+
+        Describing all columns of a `DataFrame` regardless of data type.
+
+        >>> df.describe(include='all')
+                numeric object
+        count       3.0      3
+        unique      NaN      3
+        top         NaN      b
+        freq        NaN      1
+        mean        2.0    NaN
+        std         1.0    NaN
+        min         1.0    NaN
+        25%         1.5    NaN
+        50%         2.0    NaN
+        75%         2.5    NaN
+        max         3.0    NaN
+
+        Describing a column from a `DataFrame` by accessing it as an attribute.
+
+        >>> df.numeric.describe()
+        count    3.0
+        mean     2.0
+        std      1.0
+        min      1.0
+        25%      1.5
+        50%      2.0
+        75%      2.5
+        max      3.0
+        Name: numeric, dtype: float64
+
+        Including only ``numeric`` columns in a `DataFrame` description.
+
+        >>> df.describe(include=[np.number])
+               numeric
+        count      3.0
+        mean       2.0
+        std        1.0
+        min        1.0
+        25%        1.5
+        50%        2.0
+        75%        2.5
+        max        3.0
+
+        Including only ``string`` columns in a `DataFrame` description.
+
+        >>> df.describe(include=[np.object])
+               object
+        count       3
+        unique      3
+        top         b
+        freq        1
+
+        Excluding ``numeric`` columns from a `DataFrame` description.
+
+        >>> df.describe(exclude=[np.number])
+               object
+        count       3
+        unique      3
+        top         b
+        freq        1
+
+        Excluding ``object`` columns from a `DataFrame` description.
+
+        >>> df.describe(exclude=[np.object])
+               numeric
+        count      3.0
+        mean       2.0
+        std        1.0
+        min        1.0
+        25%        1.5
+        50%        2.0
+        75%        2.5
+        max        3.0
 
         See Also
         --------
         DataFrame.select_dtypes
         """
-
-    @Appender(_shared_docs['describe'] % _shared_doc_kwargs)
-    def describe(self, percentiles=None, include=None, exclude=None):
         if self.ndim >= 3:
             msg = "describe is not implemented on Panel or PanelND objects."
             raise NotImplementedError(msg)

From a445d2af38b7babbc21294a1218dba7fbf31a211 Mon Sep 17 00:00:00 2001
From: palewire <ben.welsh@gmail.com>
Date: Mon, 26 Dec 2016 20:45:29 -0800
Subject: [PATCH 2/9] PEP8 fix

---
 pandas/core/generic.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index adcb73acff2fe..84d8f4acf53a7 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -5204,7 +5204,8 @@ def abs(self):
     def describe(self, percentiles=None, include=None, exclude=None):
         """
         Generates descriptive statistics that summarize the central tendency,
-        dispersion and shape of a dataset's distribution, excluding ``NaN`` values.
+        dispersion and shape of a dataset's distribution, excluding
+        ``NaN`` values.
 
         Analyzes both ``numeric`` and ``object`` series, as well
         as `DataFrame` column sets of mixed data types.

From 55cf4ec31a19b0a5a6cde94fef8a2d6762e929fb Mon Sep 17 00:00:00 2001
From: palewire <ben.welsh@gmail.com>
Date: Mon, 26 Dec 2016 21:09:16 -0800
Subject: [PATCH 3/9] Slight change for consistency

---
 pandas/core/generic.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 84d8f4acf53a7..6000b17591b59 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -5212,8 +5212,8 @@ def describe(self, percentiles=None, include=None, exclude=None):
 
         For ``numeric`` data, the result's index will include ``count``,
         ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and
-        upper percentiles. By default the lower percentile is `25` and the
-        upper percentile is ``75``. The `50` percentile is typically the
+        upper percentiles. By default the lower percentile is ``25`` and the
+        upper percentile is ``75``. The ``50`` percentile is typically the
         same as the median.
 
         For ``object`` data (e.g. strings or timestamps), the result's index

From 0161a57c7f00c7b408c4ff12345b82fc5513b26d Mon Sep 17 00:00:00 2001
From: palewire <ben.welsh@gmail.com>
Date: Tue, 27 Dec 2016 10:47:04 -0800
Subject: [PATCH 4/9] Removed a bunch of tick marks and moved the extended
 description down to the Notes section

---
 pandas/core/generic.py | 88 +++++++++++++++++++++++-------------------
 1 file changed, 48 insertions(+), 40 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 6000b17591b59..ad2de0ef48ea8 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -5207,31 +5207,6 @@ def describe(self, percentiles=None, include=None, exclude=None):
         dispersion and shape of a dataset's distribution, excluding
         ``NaN`` values.
 
-        Analyzes both ``numeric`` and ``object`` series, as well
-        as `DataFrame` column sets of mixed data types.
-
-        For ``numeric`` data, the result's index will include ``count``,
-        ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and
-        upper percentiles. By default the lower percentile is ``25`` and the
-        upper percentile is ``75``. The ``50`` percentile is typically the
-        same as the median.
-
-        For ``object`` data (e.g. strings or timestamps), the result's index
-        will include ``count``, ``unique``, ``top``, and ``freq``. The ``top``
-        is the most common value. The ``freq`` is the most common value's
-        frequency. Timestamps also include the ``first`` and ``last`` items.
-
-        If multiple ``object`` values have the highest count, then the
-        ``count`` and ``top`` results will be arbitrarily chosen from
-        among those with the highest count.
-
-        For mixed data types provided via a `DataFrame`, the result will
-        include a union of attributes of each type.
-
-        The `include` and `exclude` parameters can be used to limit
-        which columns in a `DataFrame` are analyzed for the output.
-        The parameters are ignored when analyzing a `Series`.
-
         Parameters
         ----------
         percentiles : list of numbers, optional
@@ -5243,33 +5218,61 @@ def describe(self, percentiles=None, include=None, exclude=None):
             A white list of data types to include in the result. Ignored
             for `Series`. Here are the options:
 
-            - None (default). The result will include all ``numeric`` columns.
+            - None (default). The result will include all numeric columns.
             - 'all'. All columns on the input will be included in the output.
             - A list of dtypes or strings. Limits the results to the
               provided data types.
               To limit the result to numeric types submit
               ``numpy.number``. To limit it instead to categorical
-              objects submit the data type ``object``. Strings
+              objects submit the object data type. Strings
               can also be used in the style of
-              `select_dtypes` (e.g. df.describe(include=['O']))
+              ``select_dtypes`` (e.g. ``df.describe(include=['O'])``)
         exclude : None (default) or a list of dtypes or strings, optional,
             A black list of data types to omit from the result. Ignored
-            for `Series`. Here are the options:
+            for Series. Here are the options:
 
             - None (default). The result will exclude nothing.
             - A list of dtypes or strings. Excludes the provided data types
               from the result. To select numeric types submit
               ``numpy.number``. To select categorical objects submut the data
               type ``object``. Strings can also be used in the style of
-              `select_dtypes` (e.g. df.describe(include=['O']))
+              ``select_dtypes`` (e.g. ``df.describe(include=['O'])``)
 
         Returns
         -------
         summary: NDFrame of summary statistics
 
+        Notes
+        -----
+
+        Analyzes both numeric and object series, as well
+        as DataFrame column sets of mixed data types.
+
+        For numeric data, the result's index will include ``count``,
+        ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and
+        upper percentiles. By default the lower percentile is ``25`` and the
+        upper percentile is ``75``. The ``50`` percentile is typically the
+        same as the median.
+
+        For object data (e.g. strings or timestamps), the result's index
+        will include ``count``, ``unique``, ``top``, and ``freq``. The ``top``
+        is the most common value. The ``freq`` is the most common value's
+        frequency. Timestamps also include the ``first`` and ``last`` items.
+
+        If multiple object values have the highest count, then the
+        ``count`` and ``top`` results will be arbitrarily chosen from
+        among those with the highest count.
+
+        For mixed data types provided via a DataFrame, the result will
+        include a union of attributes of each type.
+
+        The `include` and `exclude` parameters can be used to limit
+        which columns in a DataFrame are analyzed for the output.
+        The parameters are ignored when analyzing a Series.
+
         Examples
         --------
-        Describing a numeric `Series`.
+        Describing a numeric Series.
 
         >>> import pandas as pd
         >>> s = pd.Series([1, 2, 3])
@@ -5283,7 +5286,7 @@ def describe(self, percentiles=None, include=None, exclude=None):
         75%      2.5
         max      3.0
 
-        Describing a categorical `Series`.
+        Describing a categorical Series.
 
         >>> s = pd.Series(['a', 'a', 'b', 'c'])
         >>> s.describe()
@@ -5293,7 +5296,7 @@ def describe(self, percentiles=None, include=None, exclude=None):
         freq      2
         dtype: object
 
-        Describing a timestamp `Series`.
+        Describing a timestamp Series.
 
         >>> import numpy as np
         >>> s = pd.Series([
@@ -5310,7 +5313,7 @@ def describe(self, percentiles=None, include=None, exclude=None):
         last      2010-01-01 00:00:00
         dtype: object
 
-        Describing a `DataFrame`. By default only numeric fields are returned.
+        Describing a DataFrame. By default only numeric fields are returned.
 
         >>> df = pd.DataFrame(
         ..    [[1, 'a'], [2, 'b'], [3, 'c']],
@@ -5327,7 +5330,7 @@ def describe(self, percentiles=None, include=None, exclude=None):
         75%        2.5
         max        3.0
 
-        Describing all columns of a `DataFrame` regardless of data type.
+        Describing all columns of a DataFrame regardless of data type.
 
         >>> df.describe(include='all')
                 numeric object
@@ -5343,7 +5346,7 @@ def describe(self, percentiles=None, include=None, exclude=None):
         75%         2.5    NaN
         max         3.0    NaN
 
-        Describing a column from a `DataFrame` by accessing it as an attribute.
+        Describing a column from a DataFrame by accessing it as an attribute.
 
         >>> df.numeric.describe()
         count    3.0
@@ -5356,7 +5359,7 @@ def describe(self, percentiles=None, include=None, exclude=None):
         max      3.0
         Name: numeric, dtype: float64
 
-        Including only ``numeric`` columns in a `DataFrame` description.
+        Including only numeric columns in a DataFrame description.
 
         >>> df.describe(include=[np.number])
                numeric
@@ -5369,7 +5372,7 @@ def describe(self, percentiles=None, include=None, exclude=None):
         75%        2.5
         max        3.0
 
-        Including only ``string`` columns in a `DataFrame` description.
+        Including only string columns in a DataFrame description.
 
         >>> df.describe(include=[np.object])
                object
@@ -5378,7 +5381,7 @@ def describe(self, percentiles=None, include=None, exclude=None):
         top         b
         freq        1
 
-        Excluding ``numeric`` columns from a `DataFrame` description.
+        Excluding numeric columns from a DataFrame description.
 
         >>> df.describe(exclude=[np.number])
                object
@@ -5387,7 +5390,7 @@ def describe(self, percentiles=None, include=None, exclude=None):
         top         b
         freq        1
 
-        Excluding ``object`` columns from a `DataFrame` description.
+        Excluding object columns from a DataFrame description.
 
         >>> df.describe(exclude=[np.object])
                numeric
@@ -5402,6 +5405,11 @@ def describe(self, percentiles=None, include=None, exclude=None):
 
         See Also
         --------
+        DataFrame.count
+        DataFrame.max
+        DataFrame.min
+        DataFrame.mean
+        DataFrame.std
         DataFrame.select_dtypes
         """
         if self.ndim >= 3:

From 38015cee6d681f0391926c3ede830f23bdfe1f55 Mon Sep 17 00:00:00 2001
From: palewire <ben.welsh@gmail.com>
Date: Tue, 27 Dec 2016 10:49:32 -0800
Subject: [PATCH 5/9] Typo fix

---
 pandas/core/generic.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index ad2de0ef48ea8..71a54422c3fb5 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -5224,7 +5224,7 @@ def describe(self, percentiles=None, include=None, exclude=None):
               provided data types.
               To limit the result to numeric types submit
               ``numpy.number``. To limit it instead to categorical
-              objects submit the object data type. Strings
+              objects submit the ``numpy.object`` data type. Strings
               can also be used in the style of
               ``select_dtypes`` (e.g. ``df.describe(include=['O'])``)
         exclude : None (default) or a list of dtypes or strings, optional,
@@ -5234,8 +5234,8 @@ def describe(self, percentiles=None, include=None, exclude=None):
             - None (default). The result will exclude nothing.
             - A list of dtypes or strings. Excludes the provided data types
               from the result. To select numeric types submit
-              ``numpy.number``. To select categorical objects submut the data
-              type ``object``. Strings can also be used in the style of
+              ``numpy.number``. To select categorical objects submit the data
+              type ``numpy.object``. Strings can also be used in the style of
               ``select_dtypes`` (e.g. ``df.describe(include=['O'])``)
 
         Returns

From 86dd44a7afd2c5a389502eb77be31bdf7ce4ceb8 Mon Sep 17 00:00:00 2001
From: palewire <ben.welsh@gmail.com>
Date: Wed, 28 Dec 2016 08:30:56 -0800
Subject: [PATCH 6/9] Further improvements to describe documentation

---
 pandas/core/generic.py | 62 ++++++++++++++++++++++--------------------
 1 file changed, 33 insertions(+), 29 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 71a54422c3fb5..ddf3490c5a295 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -5207,51 +5207,53 @@ def describe(self, percentiles=None, include=None, exclude=None):
         dispersion and shape of a dataset's distribution, excluding
         ``NaN`` values.
 
+        Analyzes both numeric and object series, as well
+        as ``DataFrame`` column sets of mixed data types. The output
+        will vary depending on what is provided. Refer to the notes
+        below for more detail.
+
         Parameters
         ----------
-        percentiles : list of numbers, optional
+        percentiles : list-like of numbers, optional
             The percentiles to include in the output. All should
             fall between 0 and 1. The default is
             ``[.25, .5, .75]``, which returns the 25th, 50th, and
             75th percentiles.
-        include : None (default), 'all', or list of dtypes or strings, optional
+        include : 'all' , list-like of dtypes or strings, or None (default), optional
             A white list of data types to include in the result. Ignored
-            for `Series`. Here are the options:
+            for ``Series``. Here are the options:
 
-            - None (default). The result will include all numeric columns.
-            - 'all'. All columns on the input will be included in the output.
-            - A list of dtypes or strings. Limits the results to the
+            - 'all' : All columns of the input will be included in the output.
+            - A list-like of dtypes or strings : Limits the results to the
               provided data types.
               To limit the result to numeric types submit
               ``numpy.number``. To limit it instead to categorical
               objects submit the ``numpy.object`` data type. Strings
               can also be used in the style of
               ``select_dtypes`` (e.g. ``df.describe(include=['O'])``)
-        exclude : None (default) or a list of dtypes or strings, optional,
+            - None (default) : The result will include all numeric columns.
+        exclude : list-like of dtypes or strings, or None (default), optional,
             A black list of data types to omit from the result. Ignored
-            for Series. Here are the options:
+            for ``Series``. Here are the options:
 
-            - None (default). The result will exclude nothing.
-            - A list of dtypes or strings. Excludes the provided data types
+            - A list-like of dtypes or strings : Excludes the provided data types
               from the result. To select numeric types submit
               ``numpy.number``. To select categorical objects submit the data
               type ``numpy.object``. Strings can also be used in the style of
               ``select_dtypes`` (e.g. ``df.describe(include=['O'])``)
+            - None (default) : The result will exclude nothing.
 
         Returns
         -------
-        summary: NDFrame of summary statistics
+        summary:  Series/DataFrame of summary statistics
 
         Notes
         -----
 
-        Analyzes both numeric and object series, as well
-        as DataFrame column sets of mixed data types.
-
         For numeric data, the result's index will include ``count``,
         ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and
         upper percentiles. By default the lower percentile is ``25`` and the
-        upper percentile is ``75``. The ``50`` percentile is typically the
+        upper percentile is ``75``. The ``50`` percentile is the
         same as the median.
 
         For object data (e.g. strings or timestamps), the result's index
@@ -5263,16 +5265,18 @@ def describe(self, percentiles=None, include=None, exclude=None):
         ``count`` and ``top`` results will be arbitrarily chosen from
         among those with the highest count.
 
-        For mixed data types provided via a DataFrame, the result will
-        include a union of attributes of each type.
+        For mixed data types provided via a ``DataFrame``, the default is to
+        return only an analysis of numeric columns. If ``include='all'``
+        is provided as an option, the result will include a union of
+        attributes of each type.
 
         The `include` and `exclude` parameters can be used to limit
-        which columns in a DataFrame are analyzed for the output.
-        The parameters are ignored when analyzing a Series.
+        which columns in a ``DataFrame`` are analyzed for the output.
+        The parameters are ignored when analyzing a ``Series``.
 
         Examples
         --------
-        Describing a numeric Series.
+        Describing a numeric ``Series``.
 
         >>> import pandas as pd
         >>> s = pd.Series([1, 2, 3])
@@ -5286,7 +5290,7 @@ def describe(self, percentiles=None, include=None, exclude=None):
         75%      2.5
         max      3.0
 
-        Describing a categorical Series.
+        Describing a categorical ``Series``.
 
         >>> s = pd.Series(['a', 'a', 'b', 'c'])
         >>> s.describe()
@@ -5296,7 +5300,7 @@ def describe(self, percentiles=None, include=None, exclude=None):
         freq      2
         dtype: object
 
-        Describing a timestamp Series.
+        Describing a timestamp ``Series``.
 
         >>> import numpy as np
         >>> s = pd.Series([
@@ -5313,7 +5317,7 @@ def describe(self, percentiles=None, include=None, exclude=None):
         last      2010-01-01 00:00:00
         dtype: object
 
-        Describing a DataFrame. By default only numeric fields are returned.
+        Describing a ``DataFrame``. By default only numeric fields are returned.
 
         >>> df = pd.DataFrame(
         ..    [[1, 'a'], [2, 'b'], [3, 'c']],
@@ -5330,7 +5334,7 @@ def describe(self, percentiles=None, include=None, exclude=None):
         75%        2.5
         max        3.0
 
-        Describing all columns of a DataFrame regardless of data type.
+        Describing all columns of a ``DataFrame`` regardless of data type.
 
         >>> df.describe(include='all')
                 numeric object
@@ -5346,7 +5350,7 @@ def describe(self, percentiles=None, include=None, exclude=None):
         75%         2.5    NaN
         max         3.0    NaN
 
-        Describing a column from a DataFrame by accessing it as an attribute.
+        Describing a column from a ``DataFrame`` by accessing it as an attribute.
 
         >>> df.numeric.describe()
         count    3.0
@@ -5359,7 +5363,7 @@ def describe(self, percentiles=None, include=None, exclude=None):
         max      3.0
         Name: numeric, dtype: float64
 
-        Including only numeric columns in a DataFrame description.
+        Including only numeric columns in a ``DataFrame`` description.
 
         >>> df.describe(include=[np.number])
                numeric
@@ -5372,7 +5376,7 @@ def describe(self, percentiles=None, include=None, exclude=None):
         75%        2.5
         max        3.0
 
-        Including only string columns in a DataFrame description.
+        Including only string columns in a ``DataFrame`` description.
 
         >>> df.describe(include=[np.object])
                object
@@ -5381,7 +5385,7 @@ def describe(self, percentiles=None, include=None, exclude=None):
         top         b
         freq        1
 
-        Excluding numeric columns from a DataFrame description.
+        Excluding numeric columns from a ``DataFrame`` description.
 
         >>> df.describe(exclude=[np.number])
                object
@@ -5390,7 +5394,7 @@ def describe(self, percentiles=None, include=None, exclude=None):
         top         b
         freq        1
 
-        Excluding object columns from a DataFrame description.
+        Excluding object columns from a ``DataFrame`` description.
 
         >>> df.describe(exclude=[np.object])
                numeric

From 8880a89889e16ced7c0b6201f40981b7e8b51b68 Mon Sep 17 00:00:00 2001
From: palewire <ben.welsh@gmail.com>
Date: Sat, 31 Dec 2016 10:58:02 -0800
Subject: [PATCH 7/9] PEP8 fixes

---
 pandas/core/generic.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index ddf3490c5a295..935b24c11ae87 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -5219,12 +5219,12 @@ def describe(self, percentiles=None, include=None, exclude=None):
             fall between 0 and 1. The default is
             ``[.25, .5, .75]``, which returns the 25th, 50th, and
             75th percentiles.
-        include : 'all' , list-like of dtypes or strings, or None (default), optional
+        include : 'all', list-like of dtypes or None (default), optional
             A white list of data types to include in the result. Ignored
             for ``Series``. Here are the options:
 
             - 'all' : All columns of the input will be included in the output.
-            - A list-like of dtypes or strings : Limits the results to the
+            - A list-like of dtypes : Limits the results to the
               provided data types.
               To limit the result to numeric types submit
               ``numpy.number``. To limit it instead to categorical
@@ -5232,11 +5232,11 @@ def describe(self, percentiles=None, include=None, exclude=None):
               can also be used in the style of
               ``select_dtypes`` (e.g. ``df.describe(include=['O'])``)
             - None (default) : The result will include all numeric columns.
-        exclude : list-like of dtypes or strings, or None (default), optional,
+        exclude : list-like of dtypes or None (default), optional,
             A black list of data types to omit from the result. Ignored
             for ``Series``. Here are the options:
 
-            - A list-like of dtypes or strings : Excludes the provided data types
+            - A list-like of dtypes : Excludes the provided data types
               from the result. To select numeric types submit
               ``numpy.number``. To select categorical objects submit the data
               type ``numpy.object``. Strings can also be used in the style of
@@ -5317,7 +5317,8 @@ def describe(self, percentiles=None, include=None, exclude=None):
         last      2010-01-01 00:00:00
         dtype: object
 
-        Describing a ``DataFrame``. By default only numeric fields are returned.
+        Describing a ``DataFrame``. By default only numeric fields
+        are returned.
 
         >>> df = pd.DataFrame(
         ..    [[1, 'a'], [2, 'b'], [3, 'c']],
@@ -5350,7 +5351,8 @@ def describe(self, percentiles=None, include=None, exclude=None):
         75%         2.5    NaN
         max         3.0    NaN
 
-        Describing a column from a ``DataFrame`` by accessing it as an attribute.
+        Describing a column from a ``DataFrame`` by accessing it as
+        an attribute.
 
         >>> df.numeric.describe()
         count    3.0

From dff88bb123232bb3e4d25ae3ad16e0dc2cbd2be5 Mon Sep 17 00:00:00 2001
From: Ben Welsh <ben.welsh@gmail.com>
Date: Sat, 31 Dec 2016 17:55:44 -0800
Subject: [PATCH 8/9] Fixed whitespace

---
 pandas/core/generic.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 935b24c11ae87..c313fc9069072 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -5249,7 +5249,6 @@ def describe(self, percentiles=None, include=None, exclude=None):
 
         Notes
         -----
-
         For numeric data, the result's index will include ``count``,
         ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and
         upper percentiles. By default the lower percentile is ``25`` and the

From d97df49f4bce2cce600b8b73207543cbff540f93 Mon Sep 17 00:00:00 2001
From: palewire <ben.welsh@gmail.com>
Date: Sat, 31 Dec 2016 18:22:05 -0800
Subject: [PATCH 9/9] A third dot

---
 pandas/core/generic.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 935b24c11ae87..674210ea1553d 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -5304,10 +5304,10 @@ def describe(self, percentiles=None, include=None, exclude=None):
 
         >>> import numpy as np
         >>> s = pd.Series([
-        ..    np.datetime64("2000-01-01"),
-        ..    np.datetime64("2010-01-01"),
-        ..    np.datetime64("2010-01-01")
-        ..  ])
+        ...   np.datetime64("2000-01-01"),
+        ...   np.datetime64("2010-01-01"),
+        ...   np.datetime64("2010-01-01")
+        ... ])
         >>> s.describe()
         count                       3
         unique                      2
@@ -5321,9 +5321,9 @@ def describe(self, percentiles=None, include=None, exclude=None):
         are returned.
 
         >>> df = pd.DataFrame(
-        ..    [[1, 'a'], [2, 'b'], [3, 'c']],
-        ..    columns=['numeric', 'object']
-        ..  )
+        ...   [[1, 'a'], [2, 'b'], [3, 'c']],
+        ...   columns=['numeric', 'object']
+        ... )
         >>> df.describe()
                numeric
         count      3.0