diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 84ac2d0c17676..5c7ccd256a84c 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -769,6 +769,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.groupby` dropped ``nan`` groups from result with ``dropna=False`` when grouping over a single column (:issue:`35646`, :issue:`35542`) - Bug in :meth:`.DataFrameGroupBy.head`, :meth:`.DataFrameGroupBy.tail`, :meth:`SeriesGroupBy.head`, and :meth:`SeriesGroupBy.tail` would raise when used with ``axis=1`` (:issue:`9772`) - Bug in :meth:`.DataFrameGroupBy.transform` would raise when used with ``axis=1`` and a transformation kernel (e.g. "shift") (:issue:`36308`) +- Bug in :meth:`.DataFrameGroupBy.quantile` couldn't handle with arraylike ``q`` when grouping by columns (:issue:`33795`) - Bug in :meth:`DataFrameGroupBy.rank` with ``datetime64tz`` or period dtype incorrectly casting results to those dtypes instead of returning ``float64`` dtype (:issue:`38187`) Reshaping diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 184fa3a2b4204..ba4d7dd063c38 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2232,29 +2232,36 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray: ) for qi in q ] - result = concat(results, axis=0, keys=q) + result = concat(results, axis=self.axis, keys=q) # fix levels to place quantiles on the inside # TODO(GH-10710): Ideally, we could write this as # >>> result.stack(0).loc[pd.IndexSlice[:, ..., q], :] # but this hits https://github.com/pandas-dev/pandas/issues/10710 # which doesn't reorder the list-like `q` on the inner level. - order = list(range(1, result.index.nlevels)) + [0] + order = list(range(1, result.axes[self.axis].nlevels)) + [0] # temporarily saves the index names - index_names = np.array(result.index.names) + index_names = np.array(result.axes[self.axis].names) # set index names to positions to avoid confusion - result.index.names = np.arange(len(index_names)) + result.axes[self.axis].names = np.arange(len(index_names)) # place quantiles on the inside - result = result.reorder_levels(order) + if isinstance(result, Series): + result = result.reorder_levels(order) + else: + result = result.reorder_levels(order, axis=self.axis) # restore the index names in order - result.index.names = index_names[order] + result.axes[self.axis].names = index_names[order] # reorder rows to keep things sorted - indices = np.arange(len(result)).reshape([len(q), self.ngroups]).T.flatten() - return result.take(indices) + indices = ( + np.arange(result.shape[self.axis]) + .reshape([len(q), self.ngroups]) + .T.flatten() + ) + return result.take(indices, axis=self.axis) @Substitution(name="groupby") def ngroup(self, ascending: bool = True): diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index 53729a120ae8d..76fc82c6288eb 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -254,3 +254,26 @@ def test_groupby_timedelta_quantile(): index=Index([1, 2], name="group"), ) tm.assert_frame_equal(result, expected) + + +def test_columns_groupby_quantile(): + # GH 33795 + df = DataFrame( + np.arange(12).reshape(3, -1), + index=list("XYZ"), + columns=pd.Series(list("ABAB"), name="col"), + ) + result = df.groupby("col", axis=1).quantile(q=[0.8, 0.2]) + expected = DataFrame( + [ + [1.6, 0.4, 2.6, 1.4], + [5.6, 4.4, 6.6, 5.4], + [9.6, 8.4, 10.6, 9.4], + ], + index=list("XYZ"), + columns=Index( + [("A", 0.8), ("A", 0.2), ("B", 0.8), ("B", 0.2)], names=["col", None] + ), + ) + + tm.assert_frame_equal(result, expected)