From 7778129e16c1b6979f95dd49b9bd0a5361107958 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <dsaxton@pm.me>
Date: Wed, 15 Apr 2020 13:47:29 -0500
Subject: [PATCH 1/6] BUG: Debug grouped quantile with NA values

---
 doc/source/whatsnew/v1.1.0.rst        |  1 +
 pandas/_libs/groupby.pyx              |  2 +-
 pandas/tests/groupby/test_function.py | 31 ++++++++++++++++++++++-----
 3 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 82c43811c0444..c1cd582be6b56 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -551,6 +551,7 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrameGroupBy.agg` with dictionary input losing ``ExtensionArray`` dtypes (:issue:`32194`)
 - Bug in :meth:`DataFrame.resample` where an ``AmbiguousTimeError`` would be raised when the resulting timezone aware :class:`DatetimeIndex` had a DST transition at midnight (:issue:`25758`)
 - Bug in :meth:`DataFrame.groupby` where a ``ValueError`` would be raised when grouping by a categorical column with read-only categories and ``sort=False`` (:issue:`33410`)
+- Bug in :meth:`DataFrameGroupBy.quantile` where incorrect values would be returned when missing group keys were present (:issue:`33569`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index e7ac3b8442c6d..2439c7f9c27af 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -779,7 +779,7 @@ def group_quantile(ndarray[float64_t] out,
                 non_na_counts[lab] += 1
 
     # Get an index of values sorted by labels and then values
-    order = (values, labels)
+    order = (values, np.where(labels == -1, labels.max() + 1, labels))
     sort_arr = np.lexsort(order).astype(np.int64, copy=False)
 
     with nogil:
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 346de55f551df..2a50262362da2 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -1507,14 +1507,35 @@ def test_quantile_missing_group_values_no_segfaults():
         grp.quantile()
 
 
-def test_quantile_missing_group_values_correct_results():
+@pytest.mark.parametrize(
+    "key, value",
+    [
+        (["a"] * 4 + ["b"] * 4, np.array([1.0, 2.0, 3.0, np.nan] * 2)),
+        (["a"] * 4 + ["b"] * 3 + [np.nan], np.array([1.0, 2.0, 3.0, np.nan] * 2)),
+        (["a"] * 4 + [np.nan] + ["b"] * 3, np.array([np.nan, 1.0, 2.0, 3.0] * 2)),
+        (
+            ["a"] * 3 + [np.nan] + ["b"] * 3 + [np.nan],
+            np.array([1.0, 2.0, 3.0, np.nan] * 2),
+        ),
+        (
+            ["a"] * 3 + [np.nan] + ["b"] * 3 + [np.nan],
+            np.array([1.0, 2.0, 3.0, 4.0] * 2),
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "quantile, expected_value", [(0.0, 1.0), (0.5, 2.0), (1.0, 3.0)]
+)
+def test_quantile_missing_group_values_correct_results(
+    key, value, quantile, expected_value
+):
     # GH 28662
-    data = np.array([1.0, np.nan, 3.0, np.nan])
-    df = pd.DataFrame(dict(key=data, val=range(4)))
+    # https://github.com/pandas-dev/pandas/issues/33569
+    df = pd.DataFrame({"key": key, "value": value})
 
-    result = df.groupby("key").quantile()
+    result = df.groupby("key").quantile(quantile)
     expected = pd.DataFrame(
-        [1.0, 3.0], index=pd.Index([1.0, 3.0], name="key"), columns=["val"]
+        [expected_value] * 2, index=pd.Index(["a", "b"], name="key"), columns=["value"]
     )
     tm.assert_frame_equal(result, expected)
 

From 1297438895af5f35ee14647ab172c8301d41494a Mon Sep 17 00:00:00 2001
From: Daniel Saxton <dsaxton@pm.me>
Date: Wed, 15 Apr 2020 16:55:10 -0500
Subject: [PATCH 2/6] Comment

---
 pandas/_libs/groupby.pyx | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 2439c7f9c27af..25fde87690310 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -778,7 +778,8 @@ def group_quantile(ndarray[float64_t] out,
             if not mask[i]:
                 non_na_counts[lab] += 1
 
-    # Get an index of values sorted by labels and then values
+    # Get an index of values sorted by labels and then values,
+    # make sure missing labels sort to the back of the array
     order = (values, np.where(labels == -1, labels.max() + 1, labels))
     sort_arr = np.lexsort(order).astype(np.int64, copy=False)
 

From db39b0a2065aa0b324b290782fb118f2a5d67127 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <dsaxton@pm.me>
Date: Wed, 15 Apr 2020 17:21:41 -0500
Subject: [PATCH 3/6] Fixup

---
 pandas/_libs/groupby.pyx | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 25fde87690310..9ea780e0b9259 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -780,7 +780,11 @@ def group_quantile(ndarray[float64_t] out,
 
     # Get an index of values sorted by labels and then values,
     # make sure missing labels sort to the back of the array
-    order = (values, np.where(labels == -1, labels.max() + 1, labels))
+    if labels.size:
+        labels_for_lexsort = np.where(labels == -1, labels.max() + 1, labels)
+    else:
+        labels_for_lexsort = labels
+    order = (values, labels_for_lexsort)
     sort_arr = np.lexsort(order).astype(np.int64, copy=False)
 
     with nogil:

From f10e9dd84954536d4ebf559a8884c5922bf29441 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <dsaxton@pm.me>
Date: Thu, 16 Apr 2020 20:01:45 -0500
Subject: [PATCH 4/6] Better test

---
 pandas/tests/groupby/test_function.py | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 2a50262362da2..40702f5e88577 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -1508,31 +1508,23 @@ def test_quantile_missing_group_values_no_segfaults():
 
 
 @pytest.mark.parametrize(
-    "key, value",
+    "key",
     [
-        (["a"] * 4 + ["b"] * 4, np.array([1.0, 2.0, 3.0, np.nan] * 2)),
-        (["a"] * 4 + ["b"] * 3 + [np.nan], np.array([1.0, 2.0, 3.0, np.nan] * 2)),
-        (["a"] * 4 + [np.nan] + ["b"] * 3, np.array([np.nan, 1.0, 2.0, 3.0] * 2)),
-        (
-            ["a"] * 3 + [np.nan] + ["b"] * 3 + [np.nan],
-            np.array([1.0, 2.0, 3.0, np.nan] * 2),
-        ),
-        (
-            ["a"] * 3 + [np.nan] + ["b"] * 3 + [np.nan],
-            np.array([1.0, 2.0, 3.0, 4.0] * 2),
-        ),
+        ["a"] * 4 + ["b"] * 3 + [np.nan],
+        ["a"] * 3 + [np.nan] + ["b"] * 4,
+        ["a"] * 3 + [np.nan] + ["b"] * 3 + [np.nan],
     ],
 )
 @pytest.mark.parametrize(
     "quantile, expected_value", [(0.0, 1.0), (0.5, 2.0), (1.0, 3.0)]
 )
 def test_quantile_missing_group_values_correct_results(
-    key, value, quantile, expected_value
+    key, quantile, expected_value
 ):
     # GH 28662
     # https://github.com/pandas-dev/pandas/issues/33569
+    value = np.array([1.0, 2.0, 3.0, np.nan] * 2)
     df = pd.DataFrame({"key": key, "value": value})
-
     result = df.groupby("key").quantile(quantile)
     expected = pd.DataFrame(
         [expected_value] * 2, index=pd.Index(["a", "b"], name="key"), columns=["value"]

From 0257f86adbe3189e25731dd1b5f664ab8cdb54b9 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <dsaxton@pm.me>
Date: Thu, 16 Apr 2020 20:08:09 -0500
Subject: [PATCH 5/6] Remove

---
 pandas/_libs/groupby.pyx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 9ea780e0b9259..5e464cfd70539 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -784,8 +784,8 @@ def group_quantile(ndarray[float64_t] out,
         labels_for_lexsort = np.where(labels == -1, labels.max() + 1, labels)
     else:
         labels_for_lexsort = labels
-    order = (values, labels_for_lexsort)
-    sort_arr = np.lexsort(order).astype(np.int64, copy=False)
+
+    sort_arr = np.lexsort((values, labels_for_lexsort)).astype(np.int64, copy=False)
 
     with nogil:
         for i in range(ngroups):

From e921701043b9d6a939d03ed578733a3bff05e66f Mon Sep 17 00:00:00 2001
From: Daniel Saxton <dsaxton@pm.me>
Date: Thu, 16 Apr 2020 20:50:07 -0500
Subject: [PATCH 6/6] Black

---
 pandas/tests/groupby/test_function.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 40702f5e88577..1bc236f459992 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -1518,9 +1518,7 @@ def test_quantile_missing_group_values_no_segfaults():
 @pytest.mark.parametrize(
     "quantile, expected_value", [(0.0, 1.0), (0.5, 2.0), (1.0, 3.0)]
 )
-def test_quantile_missing_group_values_correct_results(
-    key, quantile, expected_value
-):
+def test_quantile_missing_group_values_correct_results(key, quantile, expected_value):
     # GH 28662
     # https://github.com/pandas-dev/pandas/issues/33569
     value = np.array([1.0, 2.0, 3.0, np.nan] * 2)