From 4d7afe5b527acc66261497b0af5fcbfacbf00230 Mon Sep 17 00:00:00 2001
From: Paul Reidy <paul_reidy@outlook.com>
Date: Tue, 3 Oct 2017 23:41:48 +0100
Subject: [PATCH 1/2] DOC: Clarifying use of categorical data in describe
 docstring (#16722)

---
 pandas/core/generic.py | 98 +++++++++++++++++++++++++-----------------
 1 file changed, 58 insertions(+), 40 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 5dd770b2600a0..ae01f7a3e6acb 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -6352,20 +6352,22 @@ def describe(self, percentiles=None, include=None, exclude=None):
             - A list-like of dtypes : Limits the results to the
               provided data types.
               To limit the result to numeric types submit
-              ``numpy.number``. To limit it instead to categorical
-              objects submit the ``numpy.object`` data type. Strings
+              ``numpy.number``. To limit it instead to object columns submit 
+              the ``numpy.object`` data type. Strings
               can also be used in the style of
-              ``select_dtypes`` (e.g. ``df.describe(include=['O'])``)
+              ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To 
+              select pandas categorical columns, use ``'category'``
             - None (default) : The result will include all numeric columns.
         exclude : list-like of dtypes or None (default), optional,
             A black list of data types to omit from the result. Ignored
             for ``Series``. Here are the options:
 
             - A list-like of dtypes : Excludes the provided data types
-              from the result. To select numeric types submit
-              ``numpy.number``. To select categorical objects submit the data
+              from the result. To exclude numeric types submit
+              ``numpy.number``. To exclude object columns submit the data
               type ``numpy.object``. Strings can also be used in the style of
-              ``select_dtypes`` (e.g. ``df.describe(include=['O'])``)
+              ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To 
+              exclude pandas categorical columns, use ``'category'``
             - None (default) : The result will exclude nothing.
 
         Returns
@@ -6390,9 +6392,11 @@ def describe(self, percentiles=None, include=None, exclude=None):
         among those with the highest count.
 
         For mixed data types provided via a ``DataFrame``, the default is to
-        return only an analysis of numeric columns. If ``include='all'``
-        is provided as an option, the result will include a union of
-        attributes of each type.
+        return only an analysis of numeric columns. If the dataframe consists
+        only of object and categorical data without any numeric columns, the 
+        default is to return an analysis of both the object and categorical 
+        columns. If ``include='all'`` is provided as an option, the result 
+        will include a union of attributes of each type.
 
         The `include` and `exclude` parameters can be used to limit
         which columns in a ``DataFrame`` are analyzed for the output.
@@ -6442,8 +6446,10 @@ def describe(self, percentiles=None, include=None, exclude=None):
         Describing a ``DataFrame``. By default only numeric fields
         are returned.
 
-        >>> df = pd.DataFrame([[1, 'a'], [2, 'b'], [3, 'c']],
-        ...                   columns=['numeric', 'object'])
+        >>> df = pd.DataFrame({ 'object': ['a', 'b', 'c'],
+        ...                     'numeric': [1, 2, 3],
+        ...                     'categorical': pd.Categorical(['d','e','f']) 
+        ...                   })
         >>> df.describe()
                numeric
         count      3.0
@@ -6457,19 +6463,19 @@ def describe(self, percentiles=None, include=None, exclude=None):
 
         Describing all columns of a ``DataFrame`` regardless of data type.
 
-        >>> df.describe(include='all')
-                numeric object
-        count       3.0      3
-        unique      NaN      3
-        top         NaN      b
-        freq        NaN      1
-        mean        2.0    NaN
-        std         1.0    NaN
-        min         1.0    NaN
-        25%         1.5    NaN
-        50%         2.0    NaN
-        75%         2.5    NaN
-        max         3.0    NaN
+        >>> df.describe(include='all')        
+                categorical  numeric object
+        count            3      3.0      3
+        unique           3      NaN      3
+        top              f      NaN      c
+        freq             1      NaN      1
+        mean           NaN      2.0    NaN
+        std            NaN      1.0    NaN
+        min            NaN      1.0    NaN
+        25%            NaN      1.5    NaN
+        50%            NaN      2.0    NaN
+        75%            NaN      2.5    NaN
+        max            NaN      3.0    NaN
 
         Describing a column from a ``DataFrame`` by accessing it as
         an attribute.
@@ -6504,31 +6510,43 @@ def describe(self, percentiles=None, include=None, exclude=None):
                object
         count       3
         unique      3
-        top         b
+        top         c
         freq        1
 
+        Including only categorical columns from a ``DataFrame`` description.
+        
+        >>> df.describe(include=['category'])
+               categorical
+        count            3
+        unique           3
+        top              f
+        freq             1
+
         Excluding numeric columns from a ``DataFrame`` description.
 
         >>> df.describe(exclude=[np.number])
-               object
-        count       3
-        unique      3
-        top         b
-        freq        1
+               categorical object
+        count            3      3
+        unique           3      3
+        top              f      c
+        freq             1      1
 
         Excluding object columns from a ``DataFrame`` description.
 
         >>> df.describe(exclude=[np.object])
-               numeric
-        count      3.0
-        mean       2.0
-        std        1.0
-        min        1.0
-        25%        1.5
-        50%        2.0
-        75%        2.5
-        max        3.0
-
+                categorical  numeric
+        count            3      3.0
+        unique           3      NaN
+        top              f      NaN
+        freq             1      NaN
+        mean           NaN      2.0
+        std            NaN      1.0
+        min            NaN      1.0
+        25%            NaN      1.5
+        50%            NaN      2.0
+        75%            NaN      2.5
+        max            NaN      3.0
+    
         See Also
         --------
         DataFrame.count

From b300fb57bd4bec794d7a5086de14c01e90b16105 Mon Sep 17 00:00:00 2001
From: Paul Reidy <paul_reidy@outlook.com>
Date: Wed, 4 Oct 2017 21:50:22 +0100
Subject: [PATCH 2/2] Fix pep8 issues

---
 pandas/core/generic.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index ae01f7a3e6acb..ed2a592a64efe 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -6352,10 +6352,10 @@ def describe(self, percentiles=None, include=None, exclude=None):
             - A list-like of dtypes : Limits the results to the
               provided data types.
               To limit the result to numeric types submit
-              ``numpy.number``. To limit it instead to object columns submit 
+              ``numpy.number``. To limit it instead to object columns submit
               the ``numpy.object`` data type. Strings
               can also be used in the style of
-              ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To 
+              ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To
               select pandas categorical columns, use ``'category'``
             - None (default) : The result will include all numeric columns.
         exclude : list-like of dtypes or None (default), optional,
@@ -6366,7 +6366,7 @@ def describe(self, percentiles=None, include=None, exclude=None):
               from the result. To exclude numeric types submit
               ``numpy.number``. To exclude object columns submit the data
               type ``numpy.object``. Strings can also be used in the style of
-              ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To 
+              ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To
               exclude pandas categorical columns, use ``'category'``
             - None (default) : The result will exclude nothing.
 
@@ -6393,9 +6393,9 @@ def describe(self, percentiles=None, include=None, exclude=None):
 
         For mixed data types provided via a ``DataFrame``, the default is to
         return only an analysis of numeric columns. If the dataframe consists
-        only of object and categorical data without any numeric columns, the 
-        default is to return an analysis of both the object and categorical 
-        columns. If ``include='all'`` is provided as an option, the result 
+        only of object and categorical data without any numeric columns, the
+        default is to return an analysis of both the object and categorical
+        columns. If ``include='all'`` is provided as an option, the result
         will include a union of attributes of each type.
 
         The `include` and `exclude` parameters can be used to limit
@@ -6448,7 +6448,7 @@ def describe(self, percentiles=None, include=None, exclude=None):
 
         >>> df = pd.DataFrame({ 'object': ['a', 'b', 'c'],
         ...                     'numeric': [1, 2, 3],
-        ...                     'categorical': pd.Categorical(['d','e','f']) 
+        ...                     'categorical': pd.Categorical(['d','e','f'])
         ...                   })
         >>> df.describe()
                numeric
@@ -6463,7 +6463,7 @@ def describe(self, percentiles=None, include=None, exclude=None):
 
         Describing all columns of a ``DataFrame`` regardless of data type.
 
-        >>> df.describe(include='all')        
+        >>> df.describe(include='all')
                 categorical  numeric object
         count            3      3.0      3
         unique           3      NaN      3
@@ -6514,7 +6514,7 @@ def describe(self, percentiles=None, include=None, exclude=None):
         freq        1
 
         Including only categorical columns from a ``DataFrame`` description.
-        
+
         >>> df.describe(include=['category'])
                categorical
         count            3
@@ -6546,7 +6546,7 @@ def describe(self, percentiles=None, include=None, exclude=None):
         50%            NaN      2.0
         75%            NaN      2.5
         max            NaN      3.0
-    
+
         See Also
         --------
         DataFrame.count