Skip to content

Commit 5fdf642

Browse files
authored
BUG: array-like quantile fails on column groupby (#38173)
1 parent 58fca97 commit 5fdf642

File tree

3 files changed

+39
-8
lines changed

3 files changed

+39
-8
lines changed

doc/source/whatsnew/v1.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -769,6 +769,7 @@ Groupby/resample/rolling
769769
- Bug in :meth:`DataFrame.groupby` dropped ``nan`` groups from result with ``dropna=False`` when grouping over a single column (:issue:`35646`, :issue:`35542`)
770770
- Bug in :meth:`.DataFrameGroupBy.head`, :meth:`.DataFrameGroupBy.tail`, :meth:`SeriesGroupBy.head`, and :meth:`SeriesGroupBy.tail` would raise when used with ``axis=1`` (:issue:`9772`)
771771
- Bug in :meth:`.DataFrameGroupBy.transform` would raise when used with ``axis=1`` and a transformation kernel (e.g. "shift") (:issue:`36308`)
772+
- Bug in :meth:`.DataFrameGroupBy.quantile` couldn't handle with arraylike ``q`` when grouping by columns (:issue:`33795`)
772773
- Bug in :meth:`DataFrameGroupBy.rank` with ``datetime64tz`` or period dtype incorrectly casting results to those dtypes instead of returning ``float64`` dtype (:issue:`38187`)
773774

774775
Reshaping

pandas/core/groupby/groupby.py

+15-8
Original file line numberDiff line numberDiff line change
@@ -2232,29 +2232,36 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray:
22322232
)
22332233
for qi in q
22342234
]
2235-
result = concat(results, axis=0, keys=q)
2235+
result = concat(results, axis=self.axis, keys=q)
22362236
# fix levels to place quantiles on the inside
22372237
# TODO(GH-10710): Ideally, we could write this as
22382238
# >>> result.stack(0).loc[pd.IndexSlice[:, ..., q], :]
22392239
# but this hits https://github.com/pandas-dev/pandas/issues/10710
22402240
# which doesn't reorder the list-like `q` on the inner level.
2241-
order = list(range(1, result.index.nlevels)) + [0]
2241+
order = list(range(1, result.axes[self.axis].nlevels)) + [0]
22422242

22432243
# temporarily saves the index names
2244-
index_names = np.array(result.index.names)
2244+
index_names = np.array(result.axes[self.axis].names)
22452245

22462246
# set index names to positions to avoid confusion
2247-
result.index.names = np.arange(len(index_names))
2247+
result.axes[self.axis].names = np.arange(len(index_names))
22482248

22492249
# place quantiles on the inside
2250-
result = result.reorder_levels(order)
2250+
if isinstance(result, Series):
2251+
result = result.reorder_levels(order)
2252+
else:
2253+
result = result.reorder_levels(order, axis=self.axis)
22512254

22522255
# restore the index names in order
2253-
result.index.names = index_names[order]
2256+
result.axes[self.axis].names = index_names[order]
22542257

22552258
# reorder rows to keep things sorted
2256-
indices = np.arange(len(result)).reshape([len(q), self.ngroups]).T.flatten()
2257-
return result.take(indices)
2259+
indices = (
2260+
np.arange(result.shape[self.axis])
2261+
.reshape([len(q), self.ngroups])
2262+
.T.flatten()
2263+
)
2264+
return result.take(indices, axis=self.axis)
22582265

22592266
@Substitution(name="groupby")
22602267
def ngroup(self, ascending: bool = True):

pandas/tests/groupby/test_quantile.py

+23
Original file line numberDiff line numberDiff line change
@@ -254,3 +254,26 @@ def test_groupby_timedelta_quantile():
254254
index=Index([1, 2], name="group"),
255255
)
256256
tm.assert_frame_equal(result, expected)
257+
258+
259+
def test_columns_groupby_quantile():
260+
# GH 33795
261+
df = DataFrame(
262+
np.arange(12).reshape(3, -1),
263+
index=list("XYZ"),
264+
columns=pd.Series(list("ABAB"), name="col"),
265+
)
266+
result = df.groupby("col", axis=1).quantile(q=[0.8, 0.2])
267+
expected = DataFrame(
268+
[
269+
[1.6, 0.4, 2.6, 1.4],
270+
[5.6, 4.4, 6.6, 5.4],
271+
[9.6, 8.4, 10.6, 9.4],
272+
],
273+
index=list("XYZ"),
274+
columns=Index(
275+
[("A", 0.8), ("A", 0.2), ("B", 0.8), ("B", 0.2)], names=["col", None]
276+
),
277+
)
278+
279+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)