From 372198783e3d3d7645d75c5ec23ce23c0347bcbf Mon Sep 17 00:00:00 2001
From: Koustav Samaddar <koustav201197@gmail.com>
Date: Thu, 22 Nov 2018 18:10:37 -0500
Subject: [PATCH 01/11] BUG: Fixed groupby.rank w/ method="dense", pct=True if
 any grp count is 1

For some reason the grp_size is stored as 0 instead of 1 in this example
causing the error.
---
 pandas/_libs/groupby_helper.pxi.in | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
index 523d43f893aad..ffab9b69e1047 100644
--- a/pandas/_libs/groupby_helper.pxi.in
+++ b/pandas/_libs/groupby_helper.pxi.in
@@ -588,7 +588,16 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
                 if out[i, 0] != out[i, 0] or out[i, 0] == NAN:
                     out[i, 0] = NAN
                 else:
-                    out[i, 0] = out[i, 0] / grp_sizes[i, 0]
+                    with gil:
+                        try:
+                            out[i, 0] = out[i, 0] / grp_sizes[i, 0]
+                        except ZeroDivisionError:
+                            # BUG 23666 - tie-breaking for dense causes the
+                            # group sizes for groups of count 1 to become 0
+                            # causing a divide by zero error. The correct
+                            # behaviour should be divide by 1 a.k.a no change
+                            # to out[i, 0]
+                            pass
 {{endif}}
 {{endfor}}
 

From a1ba61ee208d7f3110e4cd048591f70685a7ac70 Mon Sep 17 00:00:00 2001
From: Koustav Samaddar <koustav201197@gmail.com>
Date: Thu, 22 Nov 2018 20:18:33 -0500
Subject: [PATCH 02/11] BUG: Fixed groupby.rank w/ method="dense", pct=True

Added change to the what's new file.
---
 doc/source/whatsnew/v0.24.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index 4ff3cc728f7f7..6777dcc448355 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1420,6 +1420,7 @@ Groupby/Resample/Rolling
 - Bug in :meth:`DataFrame.expanding` in which the ``axis`` argument was not being respected during aggregations (:issue:`23372`)
 - Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` which caused missing values when the input function can accept a :class:`DataFrame` but renames it (:issue:`23455`).
 - Bug in :func:`pandas.core.groupby.GroupBy.nth` where column order was not always preserved (:issue:`20760`)
+- Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.rank` with ``method='dense'`` and ``pct=True`` when a group has only one member would raise a ZeroDivisionError (:issue:`23666`).
 
 Reshaping
 ^^^^^^^^^

From b8d343cea6c35961b8833da5cfd64a636a77869b Mon Sep 17 00:00:00 2001
From: Koustav Samaddar <koustav201197@gmail.com>
Date: Thu, 22 Nov 2018 21:17:53 -0500
Subject: [PATCH 03/11] BUG 23666

Moved my test case from local file to approporiate test file in the repo.
---
 pandas/tests/groupby/test_rank.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py
index e7e91572c56d1..820e2f37d6f3d 100644
--- a/pandas/tests/groupby/test_rank.py
+++ b/pandas/tests/groupby/test_rank.py
@@ -290,3 +290,16 @@ def test_rank_empty_group():
     result = df.groupby(column).rank(pct=True)
     expected = DataFrame({"B": [0.5, np.nan, 1.0]})
     tm.assert_frame_equal(result, expected)
+
+
+def test_rank_zero_div():
+    # GH 23666
+    df = pd.DataFrame({
+                   "A": [1, 1, 1, 2, 2, 2],
+                   "B": [1, 1, 1, 1, 2, 2],
+                   "C": [1, 2, 1, 1, 1, 2]})
+
+    try:
+        df.groupby(["A", "B"])["C"].rank(pct=True, method="dense")
+    except ZeroDivisionError:
+        pytest.fail("Unexpected zero division error with groupby rank")

From 1be7457925ccecfcc129041d3c6339b22111516b Mon Sep 17 00:00:00 2001
From: Koustav Samaddar <koustav201197@gmail.com>
Date: Thu, 22 Nov 2018 21:41:01 -0500
Subject: [PATCH 04/11] Fixed pep8 in the tests file

---
 pandas/tests/groupby/test_rank.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py
index 820e2f37d6f3d..b83fc423b1b96 100644
--- a/pandas/tests/groupby/test_rank.py
+++ b/pandas/tests/groupby/test_rank.py
@@ -295,9 +295,10 @@ def test_rank_empty_group():
 def test_rank_zero_div():
     # GH 23666
     df = pd.DataFrame({
-                   "A": [1, 1, 1, 2, 2, 2],
-                   "B": [1, 1, 1, 1, 2, 2],
-                   "C": [1, 2, 1, 1, 1, 2]})
+        "A": [1, 1, 1, 2, 2, 2],
+        "B": [1, 1, 1, 1, 2, 2],
+        "C": [1, 2, 1, 1, 1, 2]
+    })
 
     try:
         df.groupby(["A", "B"])["C"].rank(pct=True, method="dense")

From f1db09c1ab899474a1439a9a20d0c371e4b16671 Mon Sep 17 00:00:00 2001
From: Koustav Samaddar <koustav201197@gmail.com>
Date: Thu, 22 Nov 2018 22:22:13 -0500
Subject: [PATCH 05/11] Made the requested modifications

---
 doc/source/whatsnew/v0.24.0.rst    |  2 +-
 pandas/_libs/groupby_helper.pxi.in | 17 +++++++----------
 pandas/tests/groupby/test_rank.py  | 10 +++-------
 3 files changed, 11 insertions(+), 18 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index 6777dcc448355..d36ddc075b91e 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1420,7 +1420,7 @@ Groupby/Resample/Rolling
 - Bug in :meth:`DataFrame.expanding` in which the ``axis`` argument was not being respected during aggregations (:issue:`23372`)
 - Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` which caused missing values when the input function can accept a :class:`DataFrame` but renames it (:issue:`23455`).
 - Bug in :func:`pandas.core.groupby.GroupBy.nth` where column order was not always preserved (:issue:`20760`)
-- Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.rank` with ``method='dense'`` and ``pct=True`` when a group has only one member would raise a ZeroDivisionError (:issue:`23666`).
+- Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.rank` with ``method='dense'`` and ``pct=True`` when a group has only one member would raise a ``ZeroDivisionError`` (:issue:`23666`).
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
index ffab9b69e1047..0b7e2035787c7 100644
--- a/pandas/_libs/groupby_helper.pxi.in
+++ b/pandas/_libs/groupby_helper.pxi.in
@@ -588,16 +588,13 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
                 if out[i, 0] != out[i, 0] or out[i, 0] == NAN:
                     out[i, 0] = NAN
                 else:
-                    with gil:
-                        try:
-                            out[i, 0] = out[i, 0] / grp_sizes[i, 0]
-                        except ZeroDivisionError:
-                            # BUG 23666 - tie-breaking for dense causes the
-                            # group sizes for groups of count 1 to become 0
-                            # causing a divide by zero error. The correct
-                            # behaviour should be divide by 1 a.k.a no change
-                            # to out[i, 0]
-                            pass
+                    if grp_sizes[i, 0] != 0:
+                        # BUG 23666 - tie-breaking for dense causes the
+                        # group sizes for groups of count 1 to become 0
+                        # causing a divide by zero error. The correct
+                        # behaviour should be divide by 1 a.k.a no change
+                        # to out[i, 0]
+                        out[i, 0] = out[i, 0] / grp_sizes[i, 0]
 {{endif}}
 {{endfor}}
 
diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py
index b83fc423b1b96..5328edf081b28 100644
--- a/pandas/tests/groupby/test_rank.py
+++ b/pandas/tests/groupby/test_rank.py
@@ -295,12 +295,8 @@ def test_rank_empty_group():
 def test_rank_zero_div():
     # GH 23666
     df = pd.DataFrame({
-        "A": [1, 1, 1, 2, 2, 2],
-        "B": [1, 1, 1, 1, 2, 2],
-        "C": [1, 2, 1, 1, 1, 2]
+        "A": [1, 2],
+        "B": [1, 1]
     })
 
-    try:
-        df.groupby(["A", "B"])["C"].rank(pct=True, method="dense")
-    except ZeroDivisionError:
-        pytest.fail("Unexpected zero division error with groupby rank")
+    df.groupby("A")["B"].rank(pct=True, method="dense")

From 7ce206cd8e224ae57b14fe92ee86da94c32a0cd6 Mon Sep 17 00:00:00 2001
From: Koustav Samaddar <koustav201197@gmail.com>
Date: Fri, 23 Nov 2018 11:16:41 -0500
Subject: [PATCH 06/11] Updated test case to assert expected result

---
 pandas/tests/groupby/test_rank.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py
index 5328edf081b28..d6285679ccb50 100644
--- a/pandas/tests/groupby/test_rank.py
+++ b/pandas/tests/groupby/test_rank.py
@@ -294,9 +294,11 @@ def test_rank_empty_group():
 
 def test_rank_zero_div():
     # GH 23666
-    df = pd.DataFrame({
+    df = DataFrame({
         "A": [1, 2],
         "B": [1, 1]
     })
 
-    df.groupby("A")["B"].rank(pct=True, method="dense")
+    result = df.groupby("A").rank(pct=True, method="dense")
+    expected = DataFrame({"B": [1.0, 1.0]})
+    tm.assert_frame_equal(result, expected)
\ No newline at end of file

From 976ecccec26369b615c6670ce55dbf3d473f368c Mon Sep 17 00:00:00 2001
From: Koustav Samaddar <koustav201197@gmail.com>
Date: Fri, 23 Nov 2018 13:06:32 -0500
Subject: [PATCH 07/11] Moved & shrunk comment block outside if block

---
 pandas/_libs/groupby_helper.pxi.in | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
index 0b7e2035787c7..98008ebab0ae9 100644
--- a/pandas/_libs/groupby_helper.pxi.in
+++ b/pandas/_libs/groupby_helper.pxi.in
@@ -588,12 +588,9 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
                 if out[i, 0] != out[i, 0] or out[i, 0] == NAN:
                     out[i, 0] = NAN
                 else:
+                    # BUG 23666 - grp_sizes[i, 0] should never be 0
+                    # If it's zero, it means to be 1 a.k.a. no change
                     if grp_sizes[i, 0] != 0:
-                        # BUG 23666 - tie-breaking for dense causes the
-                        # group sizes for groups of count 1 to become 0
-                        # causing a divide by zero error. The correct
-                        # behaviour should be divide by 1 a.k.a no change
-                        # to out[i, 0]
                         out[i, 0] = out[i, 0] / grp_sizes[i, 0]
 {{endif}}
 {{endfor}}

From 87ee207470cbf277ccb272d9952d14307a0b55c9 Mon Sep 17 00:00:00 2001
From: Koustav Samaddar <ksamadda@purdue.edu>
Date: Sat, 24 Nov 2018 16:20:04 -0500
Subject: [PATCH 08/11] Fixed pep8 formatting to have empty line at end of file

---
 pandas/tests/groupby/test_rank.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py
index d6285679ccb50..868166815d62b 100644
--- a/pandas/tests/groupby/test_rank.py
+++ b/pandas/tests/groupby/test_rank.py
@@ -301,4 +301,4 @@ def test_rank_zero_div():
 
     result = df.groupby("A").rank(pct=True, method="dense")
     expected = DataFrame({"B": [1.0, 1.0]})
-    tm.assert_frame_equal(result, expected)
\ No newline at end of file
+    tm.assert_frame_equal(result, expected)

From 34047dac5c7eae2c252cde120ba8fa14aa3324c4 Mon Sep 17 00:00:00 2001
From: Koustav Samaddar <ksamadda@purdue.edu>
Date: Sat, 24 Nov 2018 16:22:10 -0500
Subject: [PATCH 09/11] Updated to have more test cases for better coverage

---
 pandas/tests/groupby/test_rank.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py
index 868166815d62b..3caf48dc19461 100644
--- a/pandas/tests/groupby/test_rank.py
+++ b/pandas/tests/groupby/test_rank.py
@@ -292,13 +292,16 @@ def test_rank_empty_group():
     tm.assert_frame_equal(result, expected)
 
 
-def test_rank_zero_div():
+@pytest.mark.parametrize("df_dicts", [
+    ({"A": [1, 2], "B": [1, 1]}, {"B": [1.0, 1.0]}),
+    ({"A": [1, 1, 2, 2], "B": [1, 2, 1, 2]}, {"B": [0.5, 1.0, 0.5, 1.0]}),
+    ({"A": [1, 1, 2, 2], "B": [1, 2, 1, np.nan]}, {"B": [0.5, 1.0, 1.0, np.nan]}),
+    ({"A": [1, 1, 2], "B": [1, 2, np.nan]}, {"B": [0.5, 1.0, np.nan]})
+])
+def test_rank_zero_div(df_dicts):
     # GH 23666
-    df = DataFrame({
-        "A": [1, 2],
-        "B": [1, 1]
-    })
+    df = DataFrame(df_dicts[0])
 
-    result = df.groupby("A").rank(pct=True, method="dense")
-    expected = DataFrame({"B": [1.0, 1.0]})
+    result = df.groupby("A").rank(method="dense", pct=True)
+    expected = DataFrame(df_dicts[1])
     tm.assert_frame_equal(result, expected)

From 88f5cb46af13af7aeb1c9143196491d41e467b83 Mon Sep 17 00:00:00 2001
From: Koustav Samaddar <ksamadda@purdue.edu>
Date: Sat, 1 Dec 2018 15:51:22 -0500
Subject: [PATCH 10/11] Made stylistic changes to code and test according to
 request

---
 pandas/_libs/groupby_helper.pxi.in |  9 ++++-----
 pandas/tests/groupby/test_rank.py  | 16 ++++++++--------
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
index 98008ebab0ae9..20c1a2646cab1 100644
--- a/pandas/_libs/groupby_helper.pxi.in
+++ b/pandas/_libs/groupby_helper.pxi.in
@@ -587,11 +587,10 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
                 # rankings, so we assign them percentages of NaN.
                 if out[i, 0] != out[i, 0] or out[i, 0] == NAN:
                     out[i, 0] = NAN
-                else:
-                    # BUG 23666 - grp_sizes[i, 0] should never be 0
-                    # If it's zero, it means to be 1 a.k.a. no change
-                    if grp_sizes[i, 0] != 0:
-                        out[i, 0] = out[i, 0] / grp_sizes[i, 0]
+                # BUG 23666 - grp_sizes[i, 0] should never be 0
+                # If it's zero, it means to be 1 a.k.a. no change
+                elif grp_sizes[i, 0] != 0:
+                    out[i, 0] = out[i, 0] / grp_sizes[i, 0]
 {{endif}}
 {{endfor}}
 
diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py
index 3caf48dc19461..aaac614761083 100644
--- a/pandas/tests/groupby/test_rank.py
+++ b/pandas/tests/groupby/test_rank.py
@@ -292,16 +292,16 @@ def test_rank_empty_group():
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.parametrize("df_dicts", [
-    ({"A": [1, 2], "B": [1, 1]}, {"B": [1.0, 1.0]}),
-    ({"A": [1, 1, 2, 2], "B": [1, 2, 1, 2]}, {"B": [0.5, 1.0, 0.5, 1.0]}),
-    ({"A": [1, 1, 2, 2], "B": [1, 2, 1, np.nan]}, {"B": [0.5, 1.0, 1.0, np.nan]}),
-    ({"A": [1, 1, 2], "B": [1, 2, np.nan]}, {"B": [0.5, 1.0, np.nan]})
+@pytest.mark.parametrize("input_key,input_value,output_value", [
+    ([1, 2], [1, 1], [1.0, 1.0]),
+    ([1, 1, 2, 2], [1, 2, 1, 2], [0.5, 1.0, 0.5, 1.0]),
+    ([1, 1, 2, 2], [1, 2, 1, np.nan], [0.5, 1.0, 1.0, np.nan]),
+    ([1, 1, 2], [1, 2, np.nan], [0.5, 1.0, np.nan])
 ])
-def test_rank_zero_div(df_dicts):
+def test_rank_zero_div(input_key, input_value, output_value):
     # GH 23666
-    df = DataFrame(df_dicts[0])
+    df = DataFrame({"A": input_key, "B": input_value})
 
     result = df.groupby("A").rank(method="dense", pct=True)
-    expected = DataFrame(df_dicts[1])
+    expected = DataFrame({"B": output_value})
     tm.assert_frame_equal(result, expected)

From 74f9e0b9283700e546fc8899b8329a8bc88e981c Mon Sep 17 00:00:00 2001
From: Koustav Samaddar <ksamadda@purdue.edu>
Date: Sat, 1 Dec 2018 15:53:48 -0500
Subject: [PATCH 11/11] Removed comment describing bug

---
 pandas/_libs/groupby_helper.pxi.in | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
index 20c1a2646cab1..abac9f147848e 100644
--- a/pandas/_libs/groupby_helper.pxi.in
+++ b/pandas/_libs/groupby_helper.pxi.in
@@ -587,8 +587,6 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
                 # rankings, so we assign them percentages of NaN.
                 if out[i, 0] != out[i, 0] or out[i, 0] == NAN:
                     out[i, 0] = NAN
-                # BUG 23666 - grp_sizes[i, 0] should never be 0
-                # If it's zero, it means to be 1 a.k.a. no change
                 elif grp_sizes[i, 0] != 0:
                     out[i, 0] = out[i, 0] / grp_sizes[i, 0]
 {{endif}}