Skip to content

Commit 8f6118c

Browse files
BUG: Fixed groupby quantile for listlike q (#27827)
* BUG: Fixed groupby quantile for listlike q Closes #27526
1 parent bdcab11 commit 8f6118c

File tree

3 files changed

+104
-12
lines changed

3 files changed

+104
-12
lines changed

doc/source/whatsnew/v0.25.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ Plotting
8585
Groupby/resample/rolling
8686
^^^^^^^^^^^^^^^^^^^^^^^^
8787

88+
- Fixed regression in :meth:`pands.core.groupby.DataFrameGroupBy.quantile` raising when multiple quantiles are given (:issue:`27526`)
8889
- Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` where applying a timezone conversion lambda function would drop timezone information (:issue:`27496`)
8990
- Bug in :meth:`pandas.core.groupby.GroupBy.nth` where ``observed=False`` was being ignored for Categorical groupers (:issue:`26385`)
9091
- Bug in windowing over read-only arrays (:issue:`27766`)

pandas/core/groupby/groupby.py

+52-12
Original file line numberDiff line numberDiff line change
@@ -1874,6 +1874,7 @@ def quantile(self, q=0.5, interpolation="linear"):
18741874
a 2.0
18751875
b 3.0
18761876
"""
1877+
from pandas import concat
18771878

18781879
def pre_processor(vals: np.ndarray) -> Tuple[np.ndarray, Optional[Type]]:
18791880
if is_object_dtype(vals):
@@ -1901,18 +1902,57 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray:
19011902

19021903
return vals
19031904

1904-
return self._get_cythonized_result(
1905-
"group_quantile",
1906-
self.grouper,
1907-
aggregate=True,
1908-
needs_values=True,
1909-
needs_mask=True,
1910-
cython_dtype=np.float64,
1911-
pre_processing=pre_processor,
1912-
post_processing=post_processor,
1913-
q=q,
1914-
interpolation=interpolation,
1915-
)
1905+
if is_scalar(q):
1906+
return self._get_cythonized_result(
1907+
"group_quantile",
1908+
self.grouper,
1909+
aggregate=True,
1910+
needs_values=True,
1911+
needs_mask=True,
1912+
cython_dtype=np.float64,
1913+
pre_processing=pre_processor,
1914+
post_processing=post_processor,
1915+
q=q,
1916+
interpolation=interpolation,
1917+
)
1918+
else:
1919+
results = [
1920+
self._get_cythonized_result(
1921+
"group_quantile",
1922+
self.grouper,
1923+
aggregate=True,
1924+
needs_values=True,
1925+
needs_mask=True,
1926+
cython_dtype=np.float64,
1927+
pre_processing=pre_processor,
1928+
post_processing=post_processor,
1929+
q=qi,
1930+
interpolation=interpolation,
1931+
)
1932+
for qi in q
1933+
]
1934+
result = concat(results, axis=0, keys=q)
1935+
# fix levels to place quantiles on the inside
1936+
# TODO(GH-10710): Ideally, we could write this as
1937+
# >>> result.stack(0).loc[pd.IndexSlice[:, ..., q], :]
1938+
# but this hits https://github.com/pandas-dev/pandas/issues/10710
1939+
# which doesn't reorder the list-like `q` on the inner level.
1940+
order = np.roll(list(range(result.index.nlevels)), -1)
1941+
result = result.reorder_levels(order)
1942+
result = result.reindex(q, level=-1)
1943+
1944+
# fix order.
1945+
hi = len(q) * self.ngroups
1946+
arr = np.arange(0, hi, self.ngroups)
1947+
arrays = []
1948+
1949+
for i in range(self.ngroups):
1950+
arr = arr + i
1951+
arrays.append(arr)
1952+
1953+
indices = np.concatenate(arrays)
1954+
assert len(indices) == len(result)
1955+
return result.take(indices)
19161956

19171957
@Substitution(name="groupby")
19181958
def ngroup(self, ascending=True):

pandas/tests/groupby/test_function.py

+51
Original file line numberDiff line numberDiff line change
@@ -1238,6 +1238,57 @@ def test_quantile(interpolation, a_vals, b_vals, q):
12381238
tm.assert_frame_equal(result, expected)
12391239

12401240

1241+
def test_quantile_array():
1242+
# https://github.com/pandas-dev/pandas/issues/27526
1243+
df = pd.DataFrame({"A": [0, 1, 2, 3, 4]})
1244+
result = df.groupby([0, 0, 1, 1, 1]).quantile([0.25])
1245+
1246+
index = pd.MultiIndex.from_product([[0, 1], [0.25]])
1247+
expected = pd.DataFrame({"A": [0.25, 2.50]}, index=index)
1248+
tm.assert_frame_equal(result, expected)
1249+
1250+
df = pd.DataFrame({"A": [0, 1, 2, 3], "B": [4, 5, 6, 7]})
1251+
index = pd.MultiIndex.from_product([[0, 1], [0.25, 0.75]])
1252+
1253+
result = df.groupby([0, 0, 1, 1]).quantile([0.25, 0.75])
1254+
expected = pd.DataFrame(
1255+
{"A": [0.25, 0.75, 2.25, 2.75], "B": [4.25, 4.75, 6.25, 6.75]}, index=index
1256+
)
1257+
tm.assert_frame_equal(result, expected)
1258+
1259+
1260+
def test_quantile_array_no_sort():
1261+
df = pd.DataFrame({"A": [0, 1, 2], "B": [3, 4, 5]})
1262+
result = df.groupby([1, 0, 1], sort=False).quantile([0.25, 0.5, 0.75])
1263+
expected = pd.DataFrame(
1264+
{"A": [0.5, 1.0, 1.5, 1.0, 1.0, 1.0], "B": [3.5, 4.0, 4.5, 4.0, 4.0, 4.0]},
1265+
index=pd.MultiIndex.from_product([[1, 0], [0.25, 0.5, 0.75]]),
1266+
)
1267+
tm.assert_frame_equal(result, expected)
1268+
1269+
result = df.groupby([1, 0, 1], sort=False).quantile([0.75, 0.25])
1270+
expected = pd.DataFrame(
1271+
{"A": [1.5, 0.5, 1.0, 1.0], "B": [4.5, 3.5, 4.0, 4.0]},
1272+
index=pd.MultiIndex.from_product([[1, 0], [0.75, 0.25]]),
1273+
)
1274+
tm.assert_frame_equal(result, expected)
1275+
1276+
1277+
def test_quantile_array_multiple_levels():
1278+
df = pd.DataFrame(
1279+
{"A": [0, 1, 2], "B": [3, 4, 5], "c": ["a", "a", "a"], "d": ["a", "a", "b"]}
1280+
)
1281+
result = df.groupby(["c", "d"]).quantile([0.25, 0.75])
1282+
index = pd.MultiIndex.from_tuples(
1283+
[("a", "a", 0.25), ("a", "a", 0.75), ("a", "b", 0.25), ("a", "b", 0.75)],
1284+
names=["c", "d", None],
1285+
)
1286+
expected = pd.DataFrame(
1287+
{"A": [0.25, 0.75, 2.0, 2.0], "B": [3.25, 3.75, 5.0, 5.0]}, index=index
1288+
)
1289+
tm.assert_frame_equal(result, expected)
1290+
1291+
12411292
def test_quantile_raises():
12421293
df = pd.DataFrame(
12431294
[["foo", "a"], ["foo", "b"], ["foo", "c"]], columns=["key", "val"]

0 commit comments

Comments
 (0)