Backport PR #27827: BUG: Fixed groupby quantile for listlike q (#28085)

meeseeksmachine · TomAugspurger · commit 91aa2f58c37f · 2019-08-22T07:17:28.000-05:00
diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst
@@ -85,6 +85,7 @@ Plotting
 Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
+- Fixed regression in :meth:`pands.core.groupby.DataFrameGroupBy.quantile` raising when multiple quantiles are given (:issue:`27526`)
 - Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` where applying a timezone conversion lambda function would drop timezone information (:issue:`27496`)
 - Bug in :meth:`pandas.core.groupby.GroupBy.nth` where ``observed=False`` was being ignored for Categorical groupers (:issue:`26385`)
 - Bug in windowing over read-only arrays (:issue:`27766`)
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -1872,6 +1872,7 @@ def quantile(self, q=0.5, interpolation="linear"):
         a    2.0
         b    3.0
         """
+        from pandas import concat
 
         def pre_processor(vals: np.ndarray) -> Tuple[np.ndarray, Optional[Type]]:
             if is_object_dtype(vals):
@@ -1899,18 +1900,57 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray:
 
             return vals
 
-        return self._get_cythonized_result(
-            "group_quantile",
-            self.grouper,
-            aggregate=True,
-            needs_values=True,
-            needs_mask=True,
-            cython_dtype=np.float64,
-            pre_processing=pre_processor,
-            post_processing=post_processor,
-            q=q,
-            interpolation=interpolation,
-        )
+        if is_scalar(q):
+            return self._get_cythonized_result(
+                "group_quantile",
+                self.grouper,
+                aggregate=True,
+                needs_values=True,
+                needs_mask=True,
+                cython_dtype=np.float64,
+                pre_processing=pre_processor,
+                post_processing=post_processor,
+                q=q,
+                interpolation=interpolation,
+            )
+        else:
+            results = [
+                self._get_cythonized_result(
+                    "group_quantile",
+                    self.grouper,
+                    aggregate=True,
+                    needs_values=True,
+                    needs_mask=True,
+                    cython_dtype=np.float64,
+                    pre_processing=pre_processor,
+                    post_processing=post_processor,
+                    q=qi,
+                    interpolation=interpolation,
+                )
+                for qi in q
+            ]
+            result = concat(results, axis=0, keys=q)
+            # fix levels to place quantiles on the inside
+            # TODO(GH-10710): Ideally, we could write this as
+            #  >>> result.stack(0).loc[pd.IndexSlice[:, ..., q], :]
+            #  but this hits https://github.com/pandas-dev/pandas/issues/10710
+            #  which doesn't reorder the list-like `q` on the inner level.
+            order = np.roll(list(range(result.index.nlevels)), -1)
+            result = result.reorder_levels(order)
+            result = result.reindex(q, level=-1)
+
+            # fix order.
+            hi = len(q) * self.ngroups
+            arr = np.arange(0, hi, self.ngroups)
+            arrays = []
+
+            for i in range(self.ngroups):
+                arr = arr + i
+                arrays.append(arr)
+
+            indices = np.concatenate(arrays)
+            assert len(indices) == len(result)
+            return result.take(indices)
 
     @Substitution(name="groupby")
     def ngroup(self, ascending=True):
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
@@ -1238,6 +1238,57 @@ def test_quantile(interpolation, a_vals, b_vals, q):
     tm.assert_frame_equal(result, expected)
 
 
+def test_quantile_array():
+    # https://github.com/pandas-dev/pandas/issues/27526
+    df = pd.DataFrame({"A": [0, 1, 2, 3, 4]})
+    result = df.groupby([0, 0, 1, 1, 1]).quantile([0.25])
+
+    index = pd.MultiIndex.from_product([[0, 1], [0.25]])
+    expected = pd.DataFrame({"A": [0.25, 2.50]}, index=index)
+    tm.assert_frame_equal(result, expected)
+
+    df = pd.DataFrame({"A": [0, 1, 2, 3], "B": [4, 5, 6, 7]})
+    index = pd.MultiIndex.from_product([[0, 1], [0.25, 0.75]])
+
+    result = df.groupby([0, 0, 1, 1]).quantile([0.25, 0.75])
+    expected = pd.DataFrame(
+        {"A": [0.25, 0.75, 2.25, 2.75], "B": [4.25, 4.75, 6.25, 6.75]}, index=index
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_quantile_array_no_sort():
+    df = pd.DataFrame({"A": [0, 1, 2], "B": [3, 4, 5]})
+    result = df.groupby([1, 0, 1], sort=False).quantile([0.25, 0.5, 0.75])
+    expected = pd.DataFrame(
+        {"A": [0.5, 1.0, 1.5, 1.0, 1.0, 1.0], "B": [3.5, 4.0, 4.5, 4.0, 4.0, 4.0]},
+        index=pd.MultiIndex.from_product([[1, 0], [0.25, 0.5, 0.75]]),
+    )
+    tm.assert_frame_equal(result, expected)
+
+    result = df.groupby([1, 0, 1], sort=False).quantile([0.75, 0.25])
+    expected = pd.DataFrame(
+        {"A": [1.5, 0.5, 1.0, 1.0], "B": [4.5, 3.5, 4.0, 4.0]},
+        index=pd.MultiIndex.from_product([[1, 0], [0.75, 0.25]]),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_quantile_array_multiple_levels():
+    df = pd.DataFrame(
+        {"A": [0, 1, 2], "B": [3, 4, 5], "c": ["a", "a", "a"], "d": ["a", "a", "b"]}
+    )
+    result = df.groupby(["c", "d"]).quantile([0.25, 0.75])
+    index = pd.MultiIndex.from_tuples(
+        [("a", "a", 0.25), ("a", "a", 0.75), ("a", "b", 0.25), ("a", "b", 0.75)],
+        names=["c", "d", None],
+    )
+    expected = pd.DataFrame(
+        {"A": [0.25, 0.75, 2.0, 2.0], "B": [3.25, 3.75, 5.0, 5.0]}, index=index
+    )
+    tm.assert_frame_equal(result, expected)
+
+
 def test_quantile_raises():
     df = pd.DataFrame(
         [["foo", "a"], ["foo", "b"], ["foo", "c"]], columns=["key", "val"]