diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index 94770a7aae676..4d385ef0ec166 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -117,6 +117,7 @@ Plotting Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ +- Fixed regression in :meth:`pands.core.groupby.DataFrameGroupBy.quantile` raising when multiple quantiles are given (:issue:`27526`) - Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` where applying a timezone conversion lambda function would drop timezone information (:issue:`27496`) - Bug in windowing over read-only arrays (:issue:`27766`) - Fixed segfault in `pandas.core.groupby.DataFrameGroupBy.quantile` when an invalid quantile was passed (:issue:`27470`) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index c5e81e21e9fd5..35c2dac3901a2 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1870,6 +1870,7 @@ def quantile(self, q=0.5, interpolation="linear"): a 2.0 b 3.0 """ + from pandas import concat def pre_processor(vals: np.ndarray) -> Tuple[np.ndarray, Optional[Type]]: if is_object_dtype(vals): @@ -1897,18 +1898,57 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray: return vals - return self._get_cythonized_result( - "group_quantile", - self.grouper, - aggregate=True, - needs_values=True, - needs_mask=True, - cython_dtype=np.float64, - pre_processing=pre_processor, - post_processing=post_processor, - q=q, - interpolation=interpolation, - ) + if is_scalar(q): + return self._get_cythonized_result( + "group_quantile", + self.grouper, + aggregate=True, + needs_values=True, + needs_mask=True, + cython_dtype=np.float64, + pre_processing=pre_processor, + post_processing=post_processor, + q=q, + interpolation=interpolation, + ) + else: + results = [ + self._get_cythonized_result( + "group_quantile", + self.grouper, + aggregate=True, + needs_values=True, + needs_mask=True, + cython_dtype=np.float64, + pre_processing=pre_processor, + post_processing=post_processor, + q=qi, + interpolation=interpolation, + ) + for qi in q + ] + result = concat(results, axis=0, keys=q) + # fix levels to place quantiles on the inside + # TODO(GH-10710): Ideally, we could write this as + # >>> result.stack(0).loc[pd.IndexSlice[:, ..., q], :] + # but this hits https://github.com/pandas-dev/pandas/issues/10710 + # which doesn't reorder the list-like `q` on the inner level. + order = np.roll(list(range(result.index.nlevels)), -1) + result = result.reorder_levels(order) + result = result.reindex(q, level=-1) + + # fix order. + hi = len(q) * self.ngroups + arr = np.arange(0, hi, self.ngroups) + arrays = [] + + for i in range(self.ngroups): + arr = arr + i + arrays.append(arr) + + indices = np.concatenate(arrays) + assert len(indices) == len(result) + return result.take(indices) @Substitution(name="groupby") def ngroup(self, ascending=True): diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 3794120281e1f..509d7c33b643b 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -1238,6 +1238,57 @@ def test_quantile(interpolation, a_vals, b_vals, q): tm.assert_frame_equal(result, expected) +def test_quantile_array(): + # https://github.com/pandas-dev/pandas/issues/27526 + df = pd.DataFrame({"A": [0, 1, 2, 3, 4]}) + result = df.groupby([0, 0, 1, 1, 1]).quantile([0.25]) + + index = pd.MultiIndex.from_product([[0, 1], [0.25]]) + expected = pd.DataFrame({"A": [0.25, 2.50]}, index=index) + tm.assert_frame_equal(result, expected) + + df = pd.DataFrame({"A": [0, 1, 2, 3], "B": [4, 5, 6, 7]}) + index = pd.MultiIndex.from_product([[0, 1], [0.25, 0.75]]) + + result = df.groupby([0, 0, 1, 1]).quantile([0.25, 0.75]) + expected = pd.DataFrame( + {"A": [0.25, 0.75, 2.25, 2.75], "B": [4.25, 4.75, 6.25, 6.75]}, index=index + ) + tm.assert_frame_equal(result, expected) + + +def test_quantile_array_no_sort(): + df = pd.DataFrame({"A": [0, 1, 2], "B": [3, 4, 5]}) + result = df.groupby([1, 0, 1], sort=False).quantile([0.25, 0.5, 0.75]) + expected = pd.DataFrame( + {"A": [0.5, 1.0, 1.5, 1.0, 1.0, 1.0], "B": [3.5, 4.0, 4.5, 4.0, 4.0, 4.0]}, + index=pd.MultiIndex.from_product([[1, 0], [0.25, 0.5, 0.75]]), + ) + tm.assert_frame_equal(result, expected) + + result = df.groupby([1, 0, 1], sort=False).quantile([0.75, 0.25]) + expected = pd.DataFrame( + {"A": [1.5, 0.5, 1.0, 1.0], "B": [4.5, 3.5, 4.0, 4.0]}, + index=pd.MultiIndex.from_product([[1, 0], [0.75, 0.25]]), + ) + tm.assert_frame_equal(result, expected) + + +def test_quantile_array_multiple_levels(): + df = pd.DataFrame( + {"A": [0, 1, 2], "B": [3, 4, 5], "c": ["a", "a", "a"], "d": ["a", "a", "b"]} + ) + result = df.groupby(["c", "d"]).quantile([0.25, 0.75]) + index = pd.MultiIndex.from_tuples( + [("a", "a", 0.25), ("a", "a", 0.75), ("a", "b", 0.25), ("a", "b", 0.75)], + names=["c", "d", None], + ) + expected = pd.DataFrame( + {"A": [0.25, 0.75, 2.0, 2.0], "B": [3.25, 3.75, 5.0, 5.0]}, index=index + ) + tm.assert_frame_equal(result, expected) + + def test_quantile_raises(): df = pd.DataFrame( [["foo", "a"], ["foo", "b"], ["foo", "c"]], columns=["key", "val"]