From 11ba53510098618f4c2441e4df7f24b7b80164e3 Mon Sep 17 00:00:00 2001
From: Nick Crews <nicholas.b.crews@gmail.com>
Date: Sat, 3 Dec 2022 19:41:10 -0900
Subject: [PATCH 1/4] DOC: Improve groupby().ngroup() explanation for missing
 groups

---
 pandas/core/groupby/groupby.py | 37 ++++++++++++++++++++++++++--------
 1 file changed, 29 insertions(+), 8 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 659ca228bdcb0..d475aa31e7c4b 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -3212,6 +3212,9 @@ def ngroup(self, ascending: bool = True):
         would be seen when iterating over the groupby object, not the
         order they are first observed.
 
+        If a group would be excluded (due to null keys) then that
+        group is labeled as np.nan. See examples below.
+
         Parameters
         ----------
         ascending : bool, default True
@@ -3228,15 +3231,17 @@ def ngroup(self, ascending: bool = True):
 
         Examples
         --------
-        >>> df = pd.DataFrame({"A": list("aaabba")})
+        >>> df = pd.DataFrame()
+        >>> df["A"] = ["a", "a",  "a", "b", "b", "a"]
+        >>> df["B"] = ["a", None, "a", "b", "b", "a"]
         >>> df
-           A
-        0  a
-        1  a
-        2  a
-        3  b
-        4  b
-        5  a
+        A     B
+        0  a     a
+        1  a  None
+        2  a     a
+        3  b     b
+        4  b     b
+        5  a     a
         >>> df.groupby('A').ngroup()
         0    0
         1    0
@@ -3261,6 +3266,22 @@ def ngroup(self, ascending: bool = True):
         4    2
         5    0
         dtype: int64
+        >>> df.groupby("B").ngroup()
+        0    0.0
+        1    NaN
+        2    0.0
+        3    1.0
+        4    1.0
+        5    0.0
+        dtype: float64
+        >>> df.groupby("B", dropna=False).ngroup()
+        0    0
+        1    2
+        2    0
+        3    1
+        4    1
+        5    0
+        dtype: int64
         """
         with self._group_selection_context():
             index = self._selected_obj.index

From 9466d4d5747f034737dd2ffb546e97ec500abbad Mon Sep 17 00:00:00 2001
From: Nick Crews <nicholas.b.crews@gmail.com>
Date: Fri, 30 Dec 2022 15:32:59 -0900
Subject: [PATCH 2/4] DOC: fixup PR suggestions

Per comments at
https://github.com/pandas-dev/pandas/pull/50049
---
 pandas/core/groupby/groupby.py | 50 ++++++++--------------------------
 1 file changed, 12 insertions(+), 38 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index d475aa31e7c4b..82b83da519e1e 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -3212,8 +3212,8 @@ def ngroup(self, ascending: bool = True):
         would be seen when iterating over the groupby object, not the
         order they are first observed.
 
-        If a group would be excluded (due to null keys) then that
-        group is labeled as np.nan. See examples below.
+        Groups with missing keys (where `pd.isna()` is True) will be labeled with `NaN`
+        and will be skipped from the count.
 
         Parameters
         ----------
@@ -3231,42 +3231,16 @@ def ngroup(self, ascending: bool = True):
 
         Examples
         --------
-        >>> df = pd.DataFrame()
-        >>> df["A"] = ["a", "a",  "a", "b", "b", "a"]
-        >>> df["B"] = ["a", None, "a", "b", "b", "a"]
+        >>> df = pd.DataFrame({"color": ["red", None, "red", "blue", "blue", "red"]})
         >>> df
-        A     B
-        0  a     a
-        1  a  None
-        2  a     a
-        3  b     b
-        4  b     b
-        5  a     a
-        >>> df.groupby('A').ngroup()
-        0    0
-        1    0
-        2    0
-        3    1
-        4    1
-        5    0
-        dtype: int64
-        >>> df.groupby('A').ngroup(ascending=False)
-        0    1
-        1    1
-        2    1
-        3    0
-        4    0
-        5    1
-        dtype: int64
-        >>> df.groupby(["A", [1,1,2,3,2,1]]).ngroup()
-        0    0
-        1    0
-        2    1
-        3    3
-        4    2
-        5    0
-        dtype: int64
-        >>> df.groupby("B").ngroup()
+           color
+        0    red
+        1   None
+        2    red
+        3   blue
+        4   blue
+        5    red
+        >>> df.groupby("color").ngroup()
         0    0.0
         1    NaN
         2    0.0
@@ -3274,7 +3248,7 @@ def ngroup(self, ascending: bool = True):
         4    1.0
         5    0.0
         dtype: float64
-        >>> df.groupby("B", dropna=False).ngroup()
+        >>> df.groupby("color", dropna=False).ngroup()
         0    0
         1    2
         2    0

From 66ae02eb6ebd754f87b779f635058e02aa155f88 Mon Sep 17 00:00:00 2001
From: Nick Crews <nicholas.b.crews@gmail.com>
Date: Sun, 1 Jan 2023 20:41:41 -0900
Subject: [PATCH 3/4] DOC: fixup: update order of labels

Now I guess since the groups
are lexicographically sorted,
and we are using "red" and "blue" instead of "a" and "b",
the ngroup labels have swapped order.

I think therefore that this should be deterministic
and not flaky.
---
 pandas/core/groupby/groupby.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 82b83da519e1e..b2659a1d019b5 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -3241,20 +3241,20 @@ def ngroup(self, ascending: bool = True):
         4   blue
         5    red
         >>> df.groupby("color").ngroup()
-        0    0.0
+        0    1.0
         1    NaN
-        2    0.0
-        3    1.0
-        4    1.0
-        5    0.0
+        2    1.0
+        3    0.0
+        4    0.0
+        5    1.0
         dtype: float64
         >>> df.groupby("color", dropna=False).ngroup()
-        0    0
+        0    1
         1    2
-        2    0
-        3    1
-        4    1
-        5    0
+        2    1
+        3    0
+        4    0
+        5    1
         dtype: int64
         """
         with self._group_selection_context():

From e5d0075b216c15c1cc4c508e78f8239ab3070569 Mon Sep 17 00:00:00 2001
From: Nick Crews <nicholas.b.crews@gmail.com>
Date: Mon, 2 Jan 2023 22:09:46 -0900
Subject: [PATCH 4/4] DOC: fixup: restore ascending=false example in docstring

I chose to use `dropna=False` because I wanted to show
that NA keys are placed
BEFORE other keys. I figured
the `dropna=True` example was obvious enough from this and
I didn't need that one as well, otherwise I thought things
got very verbose.
---
 pandas/core/groupby/groupby.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index b2659a1d019b5..9a813e866e8d0 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -3256,6 +3256,14 @@ def ngroup(self, ascending: bool = True):
         4    0
         5    1
         dtype: int64
+        >>> df.groupby("color", dropna=False).ngroup(ascending=False)
+        0    1
+        1    0
+        2    1
+        3    2
+        4    2
+        5    1
+        dtype: int64
         """
         with self._group_selection_context():
             index = self._selected_obj.index