From ccd751805d90ad8182b723e4ea3427721ef4b77b Mon Sep 17 00:00:00 2001
From: ed_abati <edoardo.abati@hotmail.com>
Date: Sun, 21 Oct 2018 15:50:21 +0100
Subject: [PATCH 1/4] DOC: Updated the docstring of Series.rank /
 DataFrame.rank

---
 pandas/core/generic.py | 107 ++++++++++++++++++++++++++++++++++-------
 1 file changed, 90 insertions(+), 17 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 31b700abcfdb3..d58b3bb378362 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -7715,34 +7715,107 @@ def last(self, offset):
     def rank(self, axis=0, method='average', numeric_only=None,
              na_option='keep', ascending=True, pct=False):
         """
-        Compute numerical data ranks (1 through n) along axis. Equal values are
-        assigned a rank that is the average of the ranks of those values
+        Compute numerical data ranks (1 through n) along axis.
+
+        By default, equal values are assigned a rank that is the average of the
+        ranks of those values.
 
         Parameters
         ----------
         axis : {0 or 'index', 1 or 'columns'}, default 0
-            index to direct ranking
-        method : {'average', 'min', 'max', 'first', 'dense'}
-            * average: average rank of group
-            * min: lowest rank in group
-            * max: highest rank in group
-            * first: ranks assigned in order they appear in the array
-            * dense: like 'min', but rank always increases by 1 between groups
+            Index to direct ranking.
+        method : {'average', 'min', 'max', 'first', 'dense'}, default 'average'
+            Which method to use to rank equal values:
+            * average: average rank of group.
+            * min: lowest rank in group.
+            * max: highest rank in group.
+            * first: ranks assigned in order they appear in the array.
+            * dense: like 'min', but rank always increases by 1 between groups.
         numeric_only : boolean, default None
             Include only float, int, boolean data. Valid only for DataFrame or
-            Panel objects
-        na_option : {'keep', 'top', 'bottom'}
-            * keep: leave NA values where they are
-            * top: smallest rank if ascending
-            * bottom: smallest rank if descending
+            Panel objects.
+        na_option : {'keep', 'top', 'bottom'}, default 'keep'
+            How to rank NaN values:
+            * keep: assign NaN rank to NaN values.
+            * top: assign smallest rank to NaN values if ascending.
+            * bottom: assign highest rank to NaN values if ascending.
         ascending : boolean, default True
-            False for ranks by high (1) to low (N)
+            False for ranks by high (1) to low (N).
         pct : boolean, default False
-            Computes percentage rank of data
+            Computes percentage rank of data.
 
         Returns
         -------
-        ranks : same type as caller
+        ranks : Series or DataFrame
+
+        Examples
+        --------
+
+        The default behaviour returns average ranks of every columns
+
+        >>> df = pd.DataFrame(data={'Customer':['A','B','C','D','E'],
+        ...                         'Tot_Spend':[12,20,20,18,16]})
+        >>> df.rank()
+           Customer  Tot_Spend
+        0       1.0        1.0
+        1       2.0        4.5
+        2       3.0        4.5
+        3       4.0        3.0
+        4       5.0        2.0
+
+        The argument numeric_only will only return rank for float, int and
+        boolean data
+
+        >>> df.rank(numeric_only=True)
+           Tot_Spend
+        0        1.0
+        1        4.5
+        2        4.5
+        3        3.0
+        4        2.0
+
+        The following examples show how rank behaves with every different
+        method and setting pct = True
+
+        >>> df['default_rank'] = df['Tot_Spend'].rank()
+        >>> df['min_rank'] = df['Tot_Spend'].rank(method='min')
+        >>> df['max_rank'] = df['Tot_Spend'].rank(method='max')
+        >>> df['dense_rank'] = df['Tot_Spend'].rank(method='dense')
+        >>> df[['Tot_Spend','default_rank','min_rank','max_rank','dense_rank']]
+           Tot_Spend  default_rank  min_rank  max_rank  dense_rank
+        0         12           1.0       1.0       1.0         1.0
+        1         20           4.5       4.0       5.0         4.0
+        2         20           4.5       4.0       5.0         4.0
+        3         18           3.0       3.0       3.0         3.0
+        4         16           2.0       2.0       2.0         2.0
+        >>> df['default_rank'] = df['Tot_Spend'].rank()
+        >>> df['pct_rank'] = df['Tot_Spend'].rank(pct=True)
+        >>> df[['Tot_Spend','default_rank','pct_rank']]
+           Tot_Spend  default_rank  pct_rank
+        0         12           1.0       0.2
+        1         20           4.5       0.9
+        2         20           4.5       0.9
+        3         18           3.0       0.6
+        4         16           2.0       0.4
+
+        The following example shows how rank behaves with NAs
+
+        >>> df = pd.DataFrame(data={'Student':['A','B','C','D','E'],
+        ...                         'Score':[78, np.nan, 68, 90, 68]})
+        >>> df['NA_keep'] = df['Score'].rank()
+        >>> df['NA_min'] = df['Score'].rank(na_option='bottom')
+        >>> df['NA_max'] = df['Score'].rank(na_option='top')
+        >>> df
+          Student  Score  NA_keep  NA_min  NA_max
+        0       A   78.0      3.0     3.0     4.0
+        1       B    NaN      NaN     5.0     1.0
+        2       C   68.0      1.5     1.5     2.5
+        3       D   90.0      4.0     4.0     5.0
+        4       E   68.0      1.5     1.5     2.5
+
+        See also
+        --------
+        GroupBy.rank : Rank of values within each group.
         """
         axis = self._get_axis_number(axis)
 

From 363ccc0ba9edb667f403f940412cc26d29e4d881 Mon Sep 17 00:00:00 2001
From: ed_abati <edoardo.abati@hotmail.com>
Date: Sun, 21 Oct 2018 17:59:11 +0100
Subject: [PATCH 2/4] DOC: updated the rank docstring with proposed changes

---
 pandas/core/generic.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index d58b3bb378362..c06ce35cbdc0a 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -7731,7 +7731,7 @@ def rank(self, axis=0, method='average', numeric_only=None,
             * max: highest rank in group.
             * first: ranks assigned in order they appear in the array.
             * dense: like 'min', but rank always increases by 1 between groups.
-        numeric_only : boolean, default None
+        numeric_only : bool, default None
             Include only float, int, boolean data. Valid only for DataFrame or
             Panel objects.
         na_option : {'keep', 'top', 'bottom'}, default 'keep'
@@ -7739,9 +7739,9 @@ def rank(self, axis=0, method='average', numeric_only=None,
             * keep: assign NaN rank to NaN values.
             * top: assign smallest rank to NaN values if ascending.
             * bottom: assign highest rank to NaN values if ascending.
-        ascending : boolean, default True
+        ascending : bool, default True
             False for ranks by high (1) to low (N).
-        pct : boolean, default False
+        pct : bool, default False
             Computes percentage rank of data.
 
         Returns
@@ -7751,7 +7751,7 @@ def rank(self, axis=0, method='average', numeric_only=None,
         Examples
         --------
 
-        The default behaviour returns average ranks of every columns
+        The default behavior returns average ranks of every columns
 
         >>> df = pd.DataFrame(data={'Customer':['A','B','C','D','E'],
         ...                         'Tot_Spend':[12,20,20,18,16]})

From 878c82999d58610ce0ff9d17a5d194de0917b76f Mon Sep 17 00:00:00 2001
From: ed_abati <edoardo.abati@hotmail.com>
Date: Sat, 27 Oct 2018 19:29:23 +0100
Subject: [PATCH 3/4] fixed punctuation, pep8 in examples, see also, removed
 numeric_only example

---
 pandas/core/generic.py | 65 ++++++++++++++++++------------------------
 1 file changed, 28 insertions(+), 37 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index c06ce35cbdc0a..68fbdbeb17af7 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -7725,20 +7725,20 @@ def rank(self, axis=0, method='average', numeric_only=None,
         axis : {0 or 'index', 1 or 'columns'}, default 0
             Index to direct ranking.
         method : {'average', 'min', 'max', 'first', 'dense'}, default 'average'
-            Which method to use to rank equal values:
-            * average: average rank of group.
-            * min: lowest rank in group.
-            * max: highest rank in group.
-            * first: ranks assigned in order they appear in the array.
-            * dense: like 'min', but rank always increases by 1 between groups.
-        numeric_only : bool, default None
-            Include only float, int, boolean data. Valid only for DataFrame or
-            Panel objects.
+            * average: average rank of group
+            * min: lowest rank in group
+            * max: highest rank in group
+            * first: ranks assigned in order they appear in the array
+            * dense: like 'min', but rank always increases by 1 between groups
+        numeric_only : bool, optional
+            If True, rank only float, int and boolean data.
+            Valid only for DataFrame or Panel objects.
         na_option : {'keep', 'top', 'bottom'}, default 'keep'
             How to rank NaN values:
-            * keep: assign NaN rank to NaN values.
-            * top: assign smallest rank to NaN values if ascending.
-            * bottom: assign highest rank to NaN values if ascending.
+
+            * keep: assign NaN rank to NaN values
+            * top: assign smallest rank to NaN values if ascending
+            * bottom: assign highest rank to NaN values if ascending
         ascending : bool, default True
             False for ranks by high (1) to low (N).
         pct : bool, default False
@@ -7746,15 +7746,19 @@ def rank(self, axis=0, method='average', numeric_only=None,
 
         Returns
         -------
-        ranks : Series or DataFrame
+        ranks : same type as caller
+            Return a Series or DataFrame with data ranks as values
 
-        Examples
+        See Also
         --------
+        pandas.core.groupby.GroupBy.rank : Rank of values within each group.
 
-        The default behavior returns average ranks of every columns
+        Examples
+        --------
+        The default behavior returns average ranks of every column
 
-        >>> df = pd.DataFrame(data={'Customer':['A','B','C','D','E'],
-        ...                         'Tot_Spend':[12,20,20,18,16]})
+        >>> df = pd.DataFrame(data={'Customer': [1, 2, 3, 4, 5],
+        ...                         'Tot_Spend': [12, 20, 20, 18, 16]})
         >>> df.rank()
            Customer  Tot_Spend
         0       1.0        1.0
@@ -7763,34 +7767,25 @@ def rank(self, axis=0, method='average', numeric_only=None,
         3       4.0        3.0
         4       5.0        2.0
 
-        The argument numeric_only will only return rank for float, int and
-        boolean data
-
-        >>> df.rank(numeric_only=True)
-           Tot_Spend
-        0        1.0
-        1        4.5
-        2        4.5
-        3        3.0
-        4        2.0
-
         The following examples show how rank behaves with every different
-        method and setting pct = True
+        method and when setting pct = True
 
         >>> df['default_rank'] = df['Tot_Spend'].rank()
         >>> df['min_rank'] = df['Tot_Spend'].rank(method='min')
         >>> df['max_rank'] = df['Tot_Spend'].rank(method='max')
         >>> df['dense_rank'] = df['Tot_Spend'].rank(method='dense')
-        >>> df[['Tot_Spend','default_rank','min_rank','max_rank','dense_rank']]
+        >>> df[['Tot_Spend', 'default_rank', 'min_rank', 'max_rank',
+        ...     'dense_rank']]
            Tot_Spend  default_rank  min_rank  max_rank  dense_rank
         0         12           1.0       1.0       1.0         1.0
         1         20           4.5       4.0       5.0         4.0
         2         20           4.5       4.0       5.0         4.0
         3         18           3.0       3.0       3.0         3.0
         4         16           2.0       2.0       2.0         2.0
+
         >>> df['default_rank'] = df['Tot_Spend'].rank()
         >>> df['pct_rank'] = df['Tot_Spend'].rank(pct=True)
-        >>> df[['Tot_Spend','default_rank','pct_rank']]
+        >>> df[['Tot_Spend', 'default_rank', 'pct_rank']]
            Tot_Spend  default_rank  pct_rank
         0         12           1.0       0.2
         1         20           4.5       0.9
@@ -7800,8 +7795,8 @@ def rank(self, axis=0, method='average', numeric_only=None,
 
         The following example shows how rank behaves with NAs
 
-        >>> df = pd.DataFrame(data={'Student':['A','B','C','D','E'],
-        ...                         'Score':[78, np.nan, 68, 90, 68]})
+        >>> df = pd.DataFrame(data={'Student': ['A', 'B', 'C', 'D', 'E'],
+        ...                         'Score': [78, np.nan, 68, 90, 68]})
         >>> df['NA_keep'] = df['Score'].rank()
         >>> df['NA_min'] = df['Score'].rank(na_option='bottom')
         >>> df['NA_max'] = df['Score'].rank(na_option='top')
@@ -7812,10 +7807,6 @@ def rank(self, axis=0, method='average', numeric_only=None,
         2       C   68.0      1.5     1.5     2.5
         3       D   90.0      4.0     4.0     5.0
         4       E   68.0      1.5     1.5     2.5
-
-        See also
-        --------
-        GroupBy.rank : Rank of values within each group.
         """
         axis = self._get_axis_number(axis)
 

From 57f566ba29d545a009f0408040ef5befc02affad Mon Sep 17 00:00:00 2001
From: ed_abati <edoardo.abati@hotmail.com>
Date: Wed, 14 Nov 2018 23:55:02 +0000
Subject: [PATCH 4/4] Adjusted numeric_only description

---
 pandas/core/generic.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 0187026702268..7efd5d4c1e03a 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -7938,8 +7938,7 @@ def rank(self, axis=0, method='average', numeric_only=None,
             * first: ranks assigned in order they appear in the array
             * dense: like 'min', but rank always increases by 1 between groups
         numeric_only : bool, optional
-            If True, rank only float, int and boolean data.
-            Valid only for DataFrame or Panel objects.
+            For DataFrame objects, rank only numeric columns if set to True.
         na_option : {'keep', 'top', 'bottom'}, default 'keep'
             How to rank NaN values: