From e3d1a79a76b4a29663b5cdb8b0701a878722e6e1 Mon Sep 17 00:00:00 2001
From: Tony Tao <tonytao@utexas.edu>
Date: Mon, 1 Oct 2018 00:30:23 -0500
Subject: [PATCH 1/9] DOC GH22893 Fix docstring of groupby in
 pandas/core/generic.py

---
 pandas/core/generic.py | 63 ++++++++++++++++++++++++++++++++----------
 1 file changed, 48 insertions(+), 15 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 393e7caae5fab..c56c55c213acd 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -7063,8 +7063,10 @@ def clip_lower(self, threshold, axis=None, inplace=False):
     def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
                 group_keys=True, squeeze=False, observed=False, **kwargs):
         """
-        Group series using mapper (dict or key function, apply given function
-        to group, return result as series) or by a series of columns.
+        Group series using a mapper or by a series of columns.
+
+        The mapper is a dict or key function that applies the given function
+        to group and return result as series.
 
         Parameters
         ----------
@@ -7078,26 +7080,29 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
             labels may be passed to group by the columns in ``self``. Notice
             that a tuple is interpreted a (single) key.
         axis : int, default 0
+            If 0, split by rows. If 1, split by columns.
         level : int, level name, or sequence of such, default None
             If the axis is a MultiIndex (hierarchical), group by a particular
-            level or levels
+            level or levels.
         as_index : boolean, default True
             For aggregated output, return object with group labels as the
             index. Only relevant for DataFrame input. as_index=False is
-            effectively "SQL-style" grouped output
+            effectively "SQL-style" grouped output.
         sort : boolean, default True
             Sort group keys. Get better performance by turning this off.
             Note this does not influence the order of observations within each
             group.  groupby preserves the order of rows within each group.
         group_keys : boolean, default True
-            When calling apply, add group keys to index to identify pieces
+            When calling apply, add group keys to index to identify pieces.
         squeeze : boolean, default False
-            reduce the dimensionality of the return type if possible,
-            otherwise return a consistent type
+            Reduce the dimensionality of the return type if possible,
+            otherwise return a consistent type.
         observed : boolean, default False
             This only applies if any of the groupers are Categoricals
             If True: only show observed values for categorical groupers.
             If False: show all values for categorical groupers.
+        **kwargs
+            Only accepts argument 'mutated'.
 
             .. versionadded:: 0.23.0
 
@@ -7107,14 +7112,42 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
 
         Examples
         --------
-        DataFrame results
-
-        >>> data.groupby(func, axis=0).mean()
-        >>> data.groupby(['col1', 'col2'])['col3'].mean()
-
-        DataFrame with hierarchical index
-
-        >>> data.groupby(['col1', 'col2']).mean()
+        >>> df = pd.DataFrame({'col1' : ['A', 'A', 'B', 'B'],
+        ...                    'col2' : [1, 2, 3, 4]})
+        >>> df
+          col1  col2
+        0    A     1
+        1    A     2
+        2    B     3
+        3    B     4
+        >>> df.groupby(['col1']).mean()
+              col2
+        col1
+        A      1.5
+        B      3.5
+
+        **Hierarchical indexes**
+
+        We can groupby different levels of a hierarchical index
+        using the `level` parameter:
+
+        >>> arrays = [np.array(['A', 'A', 'B', 'B']),
+        ...           np.array(['foo', 'bar', 'foo', 'bar'])]
+        >>> df = pd.DataFrame(np.array([1, 2, 3, 4]), index=arrays)
+        >>> df
+               0
+        A foo  1
+          bar  2
+        B foo  3
+          bar  4
+        >>> df.groupby(level=0).mean()
+             0
+        A  1.5
+        B  3.5
+        >>> df.groupby(level=1).mean()
+             0
+        bar  3
+        foo  2
 
         Notes
         -----

From 62c9c3ad6024c80f297d12639fc00b3db5e97faa Mon Sep 17 00:00:00 2001
From: Tony Tao <tonytao@utexas.edu>
Date: Mon, 1 Oct 2018 00:48:14 -0500
Subject: [PATCH 2/9] DOC GH22893 Fix docstring of groupby in
 pandas/core/generic.py

---
 pandas/core/generic.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index c56c55c213acd..8308c5e94c85b 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -7066,7 +7066,7 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
         Group series using a mapper or by a series of columns.
 
         The mapper is a dict or key function that applies the given function
-        to group and return result as series.
+        on the selected axis and returns the result as a series.
 
         Parameters
         ----------
@@ -7080,7 +7080,7 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
             labels may be passed to group by the columns in ``self``. Notice
             that a tuple is interpreted a (single) key.
         axis : int, default 0
-            If 0, split by rows. If 1, split by columns.
+            If 0, group by rows. If 1, group by columns.
         level : int, level name, or sequence of such, default None
             If the axis is a MultiIndex (hierarchical), group by a particular
             level or levels.

From a02652fc4c3fd54991ce161c34be8e9e6abfbcf5 Mon Sep 17 00:00:00 2001
From: Tony Tao <tonytao@utexas.edu>
Date: Mon, 1 Oct 2018 02:15:38 -0500
Subject: [PATCH 3/9] Minor fixes

---
 pandas/core/generic.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 8308c5e94c85b..d4e044fcee753 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -7091,18 +7091,19 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
         sort : boolean, default True
             Sort group keys. Get better performance by turning this off.
             Note this does not influence the order of observations within each
-            group.  groupby preserves the order of rows within each group.
+            group. Groupby preserves the order of rows within each group.
         group_keys : boolean, default True
             When calling apply, add group keys to index to identify pieces.
         squeeze : boolean, default False
             Reduce the dimensionality of the return type if possible,
             otherwise return a consistent type.
         observed : boolean, default False
-            This only applies if any of the groupers are Categoricals
+            This only applies if any of the groupers are Categoricals.
             If True: only show observed values for categorical groupers.
             If False: show all values for categorical groupers.
         **kwargs
-            Only accepts argument 'mutated'.
+            Optional, only accepts keyword argument 'mutated'
+            and is passed to groupby.
 
             .. versionadded:: 0.23.0
 

From 3b09a713ada839e23362e234694b6cac974235ba Mon Sep 17 00:00:00 2001
From: Tony Tao <tonytao@utexas.edu>
Date: Mon, 1 Oct 2018 04:19:00 -0500
Subject: [PATCH 4/9] Minor fixes and updated description

---
 pandas/core/generic.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index d4e044fcee753..53a96ee56095a 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -7065,8 +7065,10 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
         """
         Group series using a mapper or by a series of columns.
 
-        The mapper is a dict or key function that applies the given function
-        on the selected axis and returns the result as a series.
+        Any groupby operation involves some combination of splitting the
+        object, applying a function, and combining the results. This can be
+        used to group large amounts of data and compute operations on these
+        groups.
 
         Parameters
         ----------
@@ -7079,8 +7081,8 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
             values are used as-is determine the groups. A label or list of
             labels may be passed to group by the columns in ``self``. Notice
             that a tuple is interpreted a (single) key.
-        axis : int, default 0
-            If 0, group by rows. If 1, group by columns.
+        axis : {0 or 'index', 1 or 'columns', None}
+            Split along rows (0) or columns (1).
         level : int, level name, or sequence of such, default None
             If the axis is a MultiIndex (hierarchical), group by a particular
             level or levels.
@@ -7101,12 +7103,13 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
             This only applies if any of the groupers are Categoricals.
             If True: only show observed values for categorical groupers.
             If False: show all values for categorical groupers.
+
+            .. versionadded:: 0.23.0
+
         **kwargs
             Optional, only accepts keyword argument 'mutated'
             and is passed to groupby.
 
-            .. versionadded:: 0.23.0
-
         Returns
         -------
         GroupBy object

From 1579ba18ad81411acc554e52d03effabc2d937d1 Mon Sep 17 00:00:00 2001
From: Tony Tao <tonytao@utexas.edu>
Date: Mon, 1 Oct 2018 04:47:44 -0500
Subject: [PATCH 5/9] Minor fixes, updated return description

---
 pandas/core/generic.py | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 53a96ee56095a..6e4ea9e4377e6 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -7065,7 +7065,7 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
         """
         Group series using a mapper or by a series of columns.
 
-        Any groupby operation involves some combination of splitting the
+        A groupby operation involves some combination of splitting the
         object, applying a function, and combining the results. This can be
         used to group large amounts of data and compute operations on these
         groups.
@@ -7081,25 +7081,25 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
             values are used as-is determine the groups. A label or list of
             labels may be passed to group by the columns in ``self``. Notice
             that a tuple is interpreted a (single) key.
-        axis : {0 or 'index', 1 or 'columns', None}
+        axis : {0 or 'index', 1 or 'columns'}
             Split along rows (0) or columns (1).
         level : int, level name, or sequence of such, default None
             If the axis is a MultiIndex (hierarchical), group by a particular
             level or levels.
-        as_index : boolean, default True
+        as_index : bool, default True
             For aggregated output, return object with group labels as the
             index. Only relevant for DataFrame input. as_index=False is
             effectively "SQL-style" grouped output.
-        sort : boolean, default True
+        sort : bool, default True
             Sort group keys. Get better performance by turning this off.
             Note this does not influence the order of observations within each
             group. Groupby preserves the order of rows within each group.
-        group_keys : boolean, default True
+        group_keys : bool, default True
             When calling apply, add group keys to index to identify pieces.
-        squeeze : boolean, default False
+        squeeze : bool, default False
             Reduce the dimensionality of the return type if possible,
             otherwise return a consistent type.
-        observed : boolean, default False
+        observed : bool, default False
             This only applies if any of the groupers are Categoricals.
             If True: only show observed values for categorical groupers.
             If False: show all values for categorical groupers.
@@ -7107,12 +7107,18 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
             .. versionadded:: 0.23.0
 
         **kwargs
-            Optional, only accepts keyword argument 'mutated'
-            and is passed to groupby.
+            Optional, only accepts keyword argument 'mutated' and is passed
+            to groupby.
 
         Returns
         -------
-        GroupBy object
+        DataFrameGroupBy object
+            An object that contains information about the groups.
+
+        See Also
+        --------
+        resample : Convenience method for frequency conversion and resampling
+            of time series.
 
         Examples
         --------
@@ -7130,7 +7136,7 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
         A      1.5
         B      3.5
 
-        **Hierarchical indexes**
+        **Hierarchical Indexes**
 
         We can groupby different levels of a hierarchical index
         using the `level` parameter:
@@ -7157,11 +7163,6 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
         -----
         See the `user guide
         <http://pandas.pydata.org/pandas-docs/stable/groupby.html>`_ for more.
-
-        See also
-        --------
-        resample : Convenience method for frequency conversion and resampling
-            of time series.
         """
         from pandas.core.groupby.groupby import groupby
 

From bc75b8abbcdcbe645e9e41f904768c79be489c8f Mon Sep 17 00:00:00 2001
From: Tony Tao <tonytao@utexas.edu>
Date: Mon, 1 Oct 2018 16:33:29 -0500
Subject: [PATCH 6/9] Various fixes, meaningful examples added

---
 pandas/core/generic.py | 78 +++++++++++++++++++++++-------------------
 1 file changed, 42 insertions(+), 36 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 6e4ea9e4377e6..32fdb6a04aeff 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -7063,7 +7063,7 @@ def clip_lower(self, threshold, axis=None, inplace=False):
     def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
                 group_keys=True, squeeze=False, observed=False, **kwargs):
         """
-        Group series using a mapper or by a series of columns.
+        Group dataframe or series using a mapper or by a series of columns.
 
         A groupby operation involves some combination of splitting the
         object, applying a function, and combining the results. This can be
@@ -7081,7 +7081,7 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
             values are used as-is determine the groups. A label or list of
             labels may be passed to group by the columns in ``self``. Notice
             that a tuple is interpreted a (single) key.
-        axis : {0 or 'index', 1 or 'columns'}
+        axis : {0 or 'index', 1 or 'columns'}, default 0
             Split along rows (0) or columns (1).
         level : int, level name, or sequence of such, default None
             If the axis is a MultiIndex (hierarchical), group by a particular
@@ -7112,57 +7112,63 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
 
         Returns
         -------
-        DataFrameGroupBy object
-            An object that contains information about the groups.
+        DataFrameGroupBy or SeriesGroupBy
+            Depends on the calling object and returns groupby object that 
+            contains information about the groups.
 
         See Also
         --------
         resample : Convenience method for frequency conversion and resampling
             of time series.
 
+        Notes
+        -----
+        See the `user guide
+        <http://pandas.pydata.org/pandas-docs/stable/groupby.html>`_ for more.
+
         Examples
         --------
-        >>> df = pd.DataFrame({'col1' : ['A', 'A', 'B', 'B'],
-        ...                    'col2' : [1, 2, 3, 4]})
+        >>> df = pd.DataFrame({'Student' : ['Bob', 'Bob', 'Mary', 'Mary'],
+        ...                    'Grade' : [100, 92, 82, 85]})
         >>> df
-          col1  col2
-        0    A     1
-        1    A     2
-        2    B     3
-        3    B     4
-        >>> df.groupby(['col1']).mean()
-              col2
-        col1
-        A      1.5
-        B      3.5
+          Student  Grade
+        0     Bob    100
+        1     Bob     92
+        2    Mary     82
+        3    Mary     85
+        >>> df.groupby(['Student']).mean()
+                 Grade
+        Student       
+        Bob       96.0
+        Mary      83.5
 
         **Hierarchical Indexes**
 
         We can groupby different levels of a hierarchical index
         using the `level` parameter:
 
-        >>> arrays = [np.array(['A', 'A', 'B', 'B']),
-        ...           np.array(['foo', 'bar', 'foo', 'bar'])]
-        >>> df = pd.DataFrame(np.array([1, 2, 3, 4]), index=arrays)
+        >>> arrays = [['TX', 'TX', 'NY', 'NY'],
+        ...           ['Urban', 'Rural', 'Urban', 'Rural']]
+        >>> index = pd.MultiIndex.from_arrays(arrays, names=('State', 'Type'))
+        >>> df = pd.DataFrame({'Pop %' : [84.7, 15.3, 87.9, 12.1]},
+        ...                    index=index)
         >>> df
-               0
-        A foo  1
-          bar  2
-        B foo  3
-          bar  4
-        >>> df.groupby(level=0).mean()
-             0
-        A  1.5
-        B  3.5
+                     Pop %
+        State Type        
+        TX    Urban   84.7
+              Rural   15.3
+        NY    Urban   87.9
+              Rural   12.1
+        >>> df.groupby(level=0).sum()
+               Pop %
+        State       
+        NY      100.0
+        TX      100.0
         >>> df.groupby(level=1).mean()
-             0
-        bar  3
-        foo  2
-
-        Notes
-        -----
-        See the `user guide
-        <http://pandas.pydata.org/pandas-docs/stable/groupby.html>`_ for more.
+               Pop %
+        Type        
+        Rural   13.7
+        Urban   86.3
         """
         from pandas.core.groupby.groupby import groupby
 

From d44a867d02d9b74c164e3556974ec93af02474c8 Mon Sep 17 00:00:00 2001
From: Tony Tao <tonytao@utexas.edu>
Date: Mon, 1 Oct 2018 16:36:00 -0500
Subject: [PATCH 7/9] Removed trailing whitespaces

---
 pandas/core/generic.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 32fdb6a04aeff..15eb2d6dc839e 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -7113,7 +7113,7 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
         Returns
         -------
         DataFrameGroupBy or SeriesGroupBy
-            Depends on the calling object and returns groupby object that 
+            Depends on the calling object and returns groupby object that
             contains information about the groups.
 
         See Also
@@ -7138,7 +7138,7 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
         3    Mary     85
         >>> df.groupby(['Student']).mean()
                  Grade
-        Student       
+        Student
         Bob       96.0
         Mary      83.5
 
@@ -7154,19 +7154,19 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
         ...                    index=index)
         >>> df
                      Pop %
-        State Type        
+        State Type
         TX    Urban   84.7
               Rural   15.3
         NY    Urban   87.9
               Rural   12.1
         >>> df.groupby(level=0).sum()
                Pop %
-        State       
+        State
         NY      100.0
         TX      100.0
         >>> df.groupby(level=1).mean()
                Pop %
-        Type        
+        Type
         Rural   13.7
         Urban   86.3
         """

From 3f748f07005a6e0cdd03b99eb4a8e7f11f5f7a6f Mon Sep 17 00:00:00 2001
From: Tony Tao <tonytao@utexas.edu>
Date: Mon, 1 Oct 2018 17:14:22 -0500
Subject: [PATCH 8/9] Redid examples to fit convention

---
 pandas/core/generic.py | 61 +++++++++++++++++++++---------------------
 1 file changed, 31 insertions(+), 30 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 15eb2d6dc839e..5edc3142f1726 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -7128,47 +7128,48 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
 
         Examples
         --------
-        >>> df = pd.DataFrame({'Student' : ['Bob', 'Bob', 'Mary', 'Mary'],
-        ...                    'Grade' : [100, 92, 82, 85]})
+        >>> df = pd.DataFrame({'Animal' : ['Falcon', 'Falcon',
+        ...                                'Parrot', 'Parrot'],
+        ...                    'Max Speed' : [380., 370., 24., 26.]})
         >>> df
-          Student  Grade
-        0     Bob    100
-        1     Bob     92
-        2    Mary     82
-        3    Mary     85
-        >>> df.groupby(['Student']).mean()
-                 Grade
-        Student
-        Bob       96.0
-        Mary      83.5
+           Animal  Max Speed
+        0  Falcon      380.0
+        1  Falcon      370.0
+        2  Parrot       24.0
+        3  Parrot       26.0
+        >>> df.groupby(['Animal']).mean()
+                Max Speed
+        Animal
+        Falcon      375.0
+        Parrot       25.0
 
         **Hierarchical Indexes**
 
         We can groupby different levels of a hierarchical index
         using the `level` parameter:
 
-        >>> arrays = [['TX', 'TX', 'NY', 'NY'],
-        ...           ['Urban', 'Rural', 'Urban', 'Rural']]
-        >>> index = pd.MultiIndex.from_arrays(arrays, names=('State', 'Type'))
-        >>> df = pd.DataFrame({'Pop %' : [84.7, 15.3, 87.9, 12.1]},
+        >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
+        ...           ['Capitve', 'Wild', 'Capitve', 'Wild']]
+        >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
+        >>> df = pd.DataFrame({'Max Speed' : [390., 350., 30., 20.]},
         ...                    index=index)
         >>> df
-                     Pop %
-        State Type
-        TX    Urban   84.7
-              Rural   15.3
-        NY    Urban   87.9
-              Rural   12.1
-        >>> df.groupby(level=0).sum()
-               Pop %
-        State
-        NY      100.0
-        TX      100.0
+                        Max Speed
+        Animal Type
+        Falcon Capitve      390.0
+               Wild         350.0
+        Parrot Capitve       30.0
+               Wild          20.0
+        >>> df.groupby(level=0).mean()
+                Max Speed
+        Animal
+        Falcon      370.0
+        Parrot       25.0
         >>> df.groupby(level=1).mean()
-               Pop %
+                 Max Speed
         Type
-        Rural   13.7
-        Urban   86.3
+        Capitve      210.0
+        Wild         185.0
         """
         from pandas.core.groupby.groupby import groupby
 

From 1a7237d25bf7d14fbbe879fc27608a6eeaad3cd1 Mon Sep 17 00:00:00 2001
From: William Ayd <william.ayd@icloud.com>
Date: Tue, 2 Oct 2018 17:00:41 -0700
Subject: [PATCH 9/9] Update generic.py

---
 pandas/core/generic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 5edc3142f1726..5b4ce5a382324 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -7063,7 +7063,7 @@ def clip_lower(self, threshold, axis=None, inplace=False):
     def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
                 group_keys=True, squeeze=False, observed=False, **kwargs):
         """
-        Group dataframe or series using a mapper or by a series of columns.
+        Group DataFrame or Series using a mapper or by a Series of columns.
 
         A groupby operation involves some combination of splitting the
         object, applying a function, and combining the results. This can be