Skip to content

Commit 91aa2f5

Browse files
meeseeksmachineTomAugspurger
authored andcommitted
Backport PR #27827: BUG: Fixed groupby quantile for listlike q (#28085)
1 parent 74ce280 commit 91aa2f5

File tree

3 files changed

+104
-12
lines changed

3 files changed

+104
-12
lines changed

doc/source/whatsnew/v0.25.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ Plotting
8585
Groupby/resample/rolling
8686
^^^^^^^^^^^^^^^^^^^^^^^^
8787

88+
- Fixed regression in :meth:`pands.core.groupby.DataFrameGroupBy.quantile` raising when multiple quantiles are given (:issue:`27526`)
8889
- Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` where applying a timezone conversion lambda function would drop timezone information (:issue:`27496`)
8990
- Bug in :meth:`pandas.core.groupby.GroupBy.nth` where ``observed=False`` was being ignored for Categorical groupers (:issue:`26385`)
9091
- Bug in windowing over read-only arrays (:issue:`27766`)

pandas/core/groupby/groupby.py

+52-12
Original file line numberDiff line numberDiff line change
@@ -1872,6 +1872,7 @@ def quantile(self, q=0.5, interpolation="linear"):
18721872
a 2.0
18731873
b 3.0
18741874
"""
1875+
from pandas import concat
18751876

18761877
def pre_processor(vals: np.ndarray) -> Tuple[np.ndarray, Optional[Type]]:
18771878
if is_object_dtype(vals):
@@ -1899,18 +1900,57 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray:
18991900

19001901
return vals
19011902

1902-
return self._get_cythonized_result(
1903-
"group_quantile",
1904-
self.grouper,
1905-
aggregate=True,
1906-
needs_values=True,
1907-
needs_mask=True,
1908-
cython_dtype=np.float64,
1909-
pre_processing=pre_processor,
1910-
post_processing=post_processor,
1911-
q=q,
1912-
interpolation=interpolation,
1913-
)
1903+
if is_scalar(q):
1904+
return self._get_cythonized_result(
1905+
"group_quantile",
1906+
self.grouper,
1907+
aggregate=True,
1908+
needs_values=True,
1909+
needs_mask=True,
1910+
cython_dtype=np.float64,
1911+
pre_processing=pre_processor,
1912+
post_processing=post_processor,
1913+
q=q,
1914+
interpolation=interpolation,
1915+
)
1916+
else:
1917+
results = [
1918+
self._get_cythonized_result(
1919+
"group_quantile",
1920+
self.grouper,
1921+
aggregate=True,
1922+
needs_values=True,
1923+
needs_mask=True,
1924+
cython_dtype=np.float64,
1925+
pre_processing=pre_processor,
1926+
post_processing=post_processor,
1927+
q=qi,
1928+
interpolation=interpolation,
1929+
)
1930+
for qi in q
1931+
]
1932+
result = concat(results, axis=0, keys=q)
1933+
# fix levels to place quantiles on the inside
1934+
# TODO(GH-10710): Ideally, we could write this as
1935+
# >>> result.stack(0).loc[pd.IndexSlice[:, ..., q], :]
1936+
# but this hits https://github.com/pandas-dev/pandas/issues/10710
1937+
# which doesn't reorder the list-like `q` on the inner level.
1938+
order = np.roll(list(range(result.index.nlevels)), -1)
1939+
result = result.reorder_levels(order)
1940+
result = result.reindex(q, level=-1)
1941+
1942+
# fix order.
1943+
hi = len(q) * self.ngroups
1944+
arr = np.arange(0, hi, self.ngroups)
1945+
arrays = []
1946+
1947+
for i in range(self.ngroups):
1948+
arr = arr + i
1949+
arrays.append(arr)
1950+
1951+
indices = np.concatenate(arrays)
1952+
assert len(indices) == len(result)
1953+
return result.take(indices)
19141954

19151955
@Substitution(name="groupby")
19161956
def ngroup(self, ascending=True):

pandas/tests/groupby/test_function.py

+51
Original file line numberDiff line numberDiff line change
@@ -1238,6 +1238,57 @@ def test_quantile(interpolation, a_vals, b_vals, q):
12381238
tm.assert_frame_equal(result, expected)
12391239

12401240

1241+
def test_quantile_array():
1242+
# https://github.com/pandas-dev/pandas/issues/27526
1243+
df = pd.DataFrame({"A": [0, 1, 2, 3, 4]})
1244+
result = df.groupby([0, 0, 1, 1, 1]).quantile([0.25])
1245+
1246+
index = pd.MultiIndex.from_product([[0, 1], [0.25]])
1247+
expected = pd.DataFrame({"A": [0.25, 2.50]}, index=index)
1248+
tm.assert_frame_equal(result, expected)
1249+
1250+
df = pd.DataFrame({"A": [0, 1, 2, 3], "B": [4, 5, 6, 7]})
1251+
index = pd.MultiIndex.from_product([[0, 1], [0.25, 0.75]])
1252+
1253+
result = df.groupby([0, 0, 1, 1]).quantile([0.25, 0.75])
1254+
expected = pd.DataFrame(
1255+
{"A": [0.25, 0.75, 2.25, 2.75], "B": [4.25, 4.75, 6.25, 6.75]}, index=index
1256+
)
1257+
tm.assert_frame_equal(result, expected)
1258+
1259+
1260+
def test_quantile_array_no_sort():
1261+
df = pd.DataFrame({"A": [0, 1, 2], "B": [3, 4, 5]})
1262+
result = df.groupby([1, 0, 1], sort=False).quantile([0.25, 0.5, 0.75])
1263+
expected = pd.DataFrame(
1264+
{"A": [0.5, 1.0, 1.5, 1.0, 1.0, 1.0], "B": [3.5, 4.0, 4.5, 4.0, 4.0, 4.0]},
1265+
index=pd.MultiIndex.from_product([[1, 0], [0.25, 0.5, 0.75]]),
1266+
)
1267+
tm.assert_frame_equal(result, expected)
1268+
1269+
result = df.groupby([1, 0, 1], sort=False).quantile([0.75, 0.25])
1270+
expected = pd.DataFrame(
1271+
{"A": [1.5, 0.5, 1.0, 1.0], "B": [4.5, 3.5, 4.0, 4.0]},
1272+
index=pd.MultiIndex.from_product([[1, 0], [0.75, 0.25]]),
1273+
)
1274+
tm.assert_frame_equal(result, expected)
1275+
1276+
1277+
def test_quantile_array_multiple_levels():
1278+
df = pd.DataFrame(
1279+
{"A": [0, 1, 2], "B": [3, 4, 5], "c": ["a", "a", "a"], "d": ["a", "a", "b"]}
1280+
)
1281+
result = df.groupby(["c", "d"]).quantile([0.25, 0.75])
1282+
index = pd.MultiIndex.from_tuples(
1283+
[("a", "a", 0.25), ("a", "a", 0.75), ("a", "b", 0.25), ("a", "b", 0.75)],
1284+
names=["c", "d", None],
1285+
)
1286+
expected = pd.DataFrame(
1287+
{"A": [0.25, 0.75, 2.0, 2.0], "B": [3.25, 3.75, 5.0, 5.0]}, index=index
1288+
)
1289+
tm.assert_frame_equal(result, expected)
1290+
1291+
12411292
def test_quantile_raises():
12421293
df = pd.DataFrame(
12431294
[["foo", "a"], ["foo", "b"], ["foo", "c"]], columns=["key", "val"]

0 commit comments

Comments
 (0)