pandas-dev · dsaxton · Apr 15, 2020 · Apr 15, 2020 · Apr 15, 2020 · Apr 17, 2020
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -598,6 +598,7 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrameGroupBy.agg` with dictionary input losing ``ExtensionArray`` dtypes (:issue:`32194`)
 - Bug in :meth:`DataFrame.resample` where an ``AmbiguousTimeError`` would be raised when the resulting timezone aware :class:`DatetimeIndex` had a DST transition at midnight (:issue:`25758`)
 - Bug in :meth:`DataFrame.groupby` where a ``ValueError`` would be raised when grouping by a categorical column with read-only categories and ``sort=False`` (:issue:`33410`)
+- Bug in :meth:`DataFrameGroupBy.quantile` where incorrect values would be returned when missing group keys were present (:issue:`33569`)
 - Bug in :meth:`GroupBy.first` and :meth:`GroupBy.last` where None is not preserved in object dtype (:issue:`32800`)
 
 Reshaping

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
@@ -778,9 +778,14 @@ def group_quantile(ndarray[float64_t] out,
             if not mask[i]:
                 non_na_counts[lab] += 1
 
-    # Get an index of values sorted by labels and then values
-    order = (values, labels)
-    sort_arr = np.lexsort(order).astype(np.int64, copy=False)
+    # Get an index of values sorted by labels and then values,
+    # make sure missing labels sort to the back of the array
+    if labels.size:
+        labels_for_lexsort = np.where(labels == -1, labels.max() + 1, labels)
+    else:
+        labels_for_lexsort = labels
+
+    sort_arr = np.lexsort((values, labels_for_lexsort)).astype(np.int64, copy=False)
 
     with nogil:
         for i in range(ngroups):

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
@@ -1507,14 +1507,25 @@ def test_quantile_missing_group_values_no_segfaults():
         grp.quantile()
 
 
-def test_quantile_missing_group_values_correct_results():
+@pytest.mark.parametrize(
+    "key",
+    [
+        ["a"] * 4 + ["b"] * 3 + [np.nan],
+        ["a"] * 3 + [np.nan] + ["b"] * 4,
+        ["a"] * 3 + [np.nan] + ["b"] * 3 + [np.nan],
+    ],
+)
+@pytest.mark.parametrize(
+    "quantile, expected_value", [(0.0, 1.0), (0.5, 2.0), (1.0, 3.0)]
+)
+def test_quantile_missing_group_values_correct_results(key, quantile, expected_value):
     # GH 28662
-    data = np.array([1.0, np.nan, 3.0, np.nan])
-    df = pd.DataFrame(dict(key=data, val=range(4)))
-
-    result = df.groupby("key").quantile()
+    # https://github.com/pandas-dev/pandas/issues/33569
+    value = np.array([1.0, 2.0, 3.0, np.nan] * 2)
+    df = pd.DataFrame({"key": key, "value": value})
+    result = df.groupby("key").quantile(quantile)
     expected = pd.DataFrame(
-        [1.0, 3.0], index=pd.Index([1.0, 3.0], name="key"), columns=["val"]
+        [expected_value] * 2, index=pd.Index(["a", "b"], name="key"), columns=["value"]
     )
     tm.assert_frame_equal(result, expected)