From 5da8e96d2db066f3f2e70cac6f1c9632790bfd94 Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Mon, 30 Nov 2020 13:21:21 +0800 Subject: [PATCH 1/8] fix-quantile --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/core/groupby/groupby.py | 16 +++++++++++++++- pandas/tests/groupby/test_quantile.py | 24 ++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 1f8fa1e2072fd..4f99fdfd7299d 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -734,6 +734,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.groupby` dropped ``nan`` groups from result with ``dropna=False`` when grouping over a single column (:issue:`35646`, :issue:`35542`) - Bug in :meth:`.DataFrameGroupBy.head`, :meth:`.DataFrameGroupBy.tail`, :meth:`SeriesGroupBy.head`, and :meth:`SeriesGroupBy.tail` would raise when used with ``axis=1`` (:issue:`9772`) - Bug in :meth:`.DataFrameGroupBy.transform` would raise when used with ``axis=1`` and a transformation kernel (e.g. "shift") (:issue:`36308`) +- Bug in :meth:`.DataFrameGroupBy.quantile` couldn't handle with arraylike ``q`` when grouping by columns (:issue:`35269`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 7c97725f1264c..2cbecfc55a0be 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2227,6 +2227,7 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray: ) for qi in q ] + if self.axis == 0: result = concat(results, axis=0, keys=q) # fix levels to place quantiles on the inside # TODO(GH-10710): Ideally, we could write this as @@ -2246,10 +2247,23 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray: # restore the index names in order result.index.names = index_names[order] - # reorder rows to keep things sorted + indices = np.arange(len(result)).reshape([len(q), self.ngroups]).T.flatten() return result.take(indices) + else: + result = concat(results, axis=1, keys=q) + + order = list(range(1, result.columns.nlevels)) + [0] + index_names = np.array(result.columns.names) + result.columns.names = np.arange(len(index_names)) + result = result.reorder_levels(order, axis=1) + result.columns.names = index_names[order] + + indices = np.arange(result.shape[1]).reshape( + [len(q), self.ngroups], + ).T.flatten() + return result.take(indices, axis=1) @Substitution(name="groupby") def ngroup(self, ascending: bool = True): diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index 53729a120ae8d..62184d39d75dd 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -254,3 +254,27 @@ def test_groupby_timedelta_quantile(): index=Index([1, 2], name="group"), ) tm.assert_frame_equal(result, expected) + + +def test_columns_groupby_quantile(): + # GH 33795 + df = DataFrame( + np.arange(12).reshape(3, -1), + index=list("XYZ"), + columns=Series(list("ABAB"), name="col"), + ) + result = df.groupby("col", axis=1).quantile(q=[0.8, 0.2]) + expected = DataFrame( + [ + [1.6, 0.4, 2.6, 1.4], + [5.6, 4.4, 6.6, 5.4], + [9.6, 8.4, 10.6, 9.4], + ], + index=list("XYZ"), + columns=Index( + [('A', 0.8), ('A', 0.2), ('B', 0.8), ('B', 0.2)], + names=['col', None] + ) + ) + + tm.assert_frame_equal(result, expected) From 7e735ac1c28415d38fd389da79b2e9c8b0ed0ca9 Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Mon, 30 Nov 2020 13:33:32 +0800 Subject: [PATCH 2/8] update --- pandas/core/groupby/groupby.py | 11 +++++++---- pandas/tests/groupby/test_quantile.py | 7 +++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 2cbecfc55a0be..d08cc7866621a 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2259,10 +2259,13 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray: result.columns.names = np.arange(len(index_names)) result = result.reorder_levels(order, axis=1) result.columns.names = index_names[order] - - indices = np.arange(result.shape[1]).reshape( - [len(q), self.ngroups], - ).T.flatten() + indices = ( + np.arange(result.shape[1]) + .reshape( + [len(q), self.ngroups], + ) + .T.flatten() + ) return result.take(indices, axis=1) @Substitution(name="groupby") diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index 62184d39d75dd..5f50dc27c8751 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -2,7 +2,7 @@ import pytest import pandas as pd -from pandas import DataFrame, Index +from pandas import DataFrame, Index, Series import pandas._testing as tm @@ -272,9 +272,8 @@ def test_columns_groupby_quantile(): ], index=list("XYZ"), columns=Index( - [('A', 0.8), ('A', 0.2), ('B', 0.8), ('B', 0.2)], - names=['col', None] - ) + [('A', 0.8), ('A', 0.2), ('B', 0.8), ('B', 0.2)], names=['col', None] + ), ) tm.assert_frame_equal(result, expected) From 95f0afed1abb81c696c18f57791823081c3ad343 Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Mon, 30 Nov 2020 13:46:30 +0800 Subject: [PATCH 3/8] Update test_quantile.py --- pandas/tests/groupby/test_quantile.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index 5f50dc27c8751..76fc82c6288eb 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -2,7 +2,7 @@ import pytest import pandas as pd -from pandas import DataFrame, Index, Series +from pandas import DataFrame, Index import pandas._testing as tm @@ -261,7 +261,7 @@ def test_columns_groupby_quantile(): df = DataFrame( np.arange(12).reshape(3, -1), index=list("XYZ"), - columns=Series(list("ABAB"), name="col"), + columns=pd.Series(list("ABAB"), name="col"), ) result = df.groupby("col", axis=1).quantile(q=[0.8, 0.2]) expected = DataFrame( @@ -272,7 +272,7 @@ def test_columns_groupby_quantile(): ], index=list("XYZ"), columns=Index( - [('A', 0.8), ('A', 0.2), ('B', 0.8), ('B', 0.2)], names=['col', None] + [("A", 0.8), ("A", 0.2), ("B", 0.8), ("B", 0.2)], names=["col", None] ), ) From 8a41c030f49c8d35c9d4b5a632e13b120f44be96 Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Mon, 30 Nov 2020 13:48:40 +0800 Subject: [PATCH 4/8] Update groupby.py --- pandas/core/groupby/groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index d08cc7866621a..4cbaa8f07b93e 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2247,8 +2247,8 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray: # restore the index names in order result.index.names = index_names[order] - # reorder rows to keep things sorted + # reorder rows to keep things sorted indices = np.arange(len(result)).reshape([len(q), self.ngroups]).T.flatten() return result.take(indices) else: From d26c36cd5c94f1359898d69b99c806a88360f978 Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Wed, 2 Dec 2020 11:23:41 +0800 Subject: [PATCH 5/8] fix wrong issue number & use result.axes --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/core/groupby/groupby.py | 60 +++++++++++++--------------------- 2 files changed, 24 insertions(+), 38 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 4f99fdfd7299d..486a7f8cec8b6 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -734,7 +734,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.groupby` dropped ``nan`` groups from result with ``dropna=False`` when grouping over a single column (:issue:`35646`, :issue:`35542`) - Bug in :meth:`.DataFrameGroupBy.head`, :meth:`.DataFrameGroupBy.tail`, :meth:`SeriesGroupBy.head`, and :meth:`SeriesGroupBy.tail` would raise when used with ``axis=1`` (:issue:`9772`) - Bug in :meth:`.DataFrameGroupBy.transform` would raise when used with ``axis=1`` and a transformation kernel (e.g. "shift") (:issue:`36308`) -- Bug in :meth:`.DataFrameGroupBy.quantile` couldn't handle with arraylike ``q`` when grouping by columns (:issue:`35269`) +- Bug in :meth:`.DataFrameGroupBy.quantile` couldn't handle with arraylike ``q`` when grouping by columns (:issue:`33795`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 4cbaa8f07b93e..7755332de947d 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2227,46 +2227,32 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray: ) for qi in q ] - if self.axis == 0: - result = concat(results, axis=0, keys=q) - # fix levels to place quantiles on the inside - # TODO(GH-10710): Ideally, we could write this as - # >>> result.stack(0).loc[pd.IndexSlice[:, ..., q], :] - # but this hits https://github.com/pandas-dev/pandas/issues/10710 - # which doesn't reorder the list-like `q` on the inner level. - order = list(range(1, result.index.nlevels)) + [0] - - # temporarily saves the index names - index_names = np.array(result.index.names) - - # set index names to positions to avoid confusion - result.index.names = np.arange(len(index_names)) - - # place quantiles on the inside + result = concat(results, axis=self.axis, keys=q) + # fix levels to place quantiles on the inside + # TODO(GH-10710): Ideally, we could write this as + # >>> result.stack(0).loc[pd.IndexSlice[:, ..., q], :] + # but this hits https://github.com/pandas-dev/pandas/issues/10710 + # which doesn't reorder the list-like `q` on the inner level. + order = list(range(1, result.axes[self.axis].nlevels)) + [0] + + # temporarily saves the index names + index_names = np.array(result.axes[self.axis].names) + + # set index names to positions to avoid confusion + result.axes[self.axis].names = np.arange(len(index_names)) + + # place quantiles on the inside + if isinstance(result, Series): result = result.reorder_levels(order) + else: + result = result.reorder_levels(order, axis=self.axis) - # restore the index names in order - result.index.names = index_names[order] + # restore the index names in order + result.axes[self.axis].names = index_names[order] - # reorder rows to keep things sorted - indices = np.arange(len(result)).reshape([len(q), self.ngroups]).T.flatten() - return result.take(indices) - else: - result = concat(results, axis=1, keys=q) - - order = list(range(1, result.columns.nlevels)) + [0] - index_names = np.array(result.columns.names) - result.columns.names = np.arange(len(index_names)) - result = result.reorder_levels(order, axis=1) - result.columns.names = index_names[order] - indices = ( - np.arange(result.shape[1]) - .reshape( - [len(q), self.ngroups], - ) - .T.flatten() - ) - return result.take(indices, axis=1) + # reorder rows to keep things sorted + indices = np.arange(result.shape[self.axis]).reshape([len(q), self.ngroups]).T.flatten() + return result.take(indices, axis=self.axis) @Substitution(name="groupby") def ngroup(self, ascending: bool = True): From c0e629af14e7f6a94925455c463079a2420af4bb Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Wed, 2 Dec 2020 11:28:45 +0800 Subject: [PATCH 6/8] Update groupby.py --- pandas/core/groupby/groupby.py | 48 +++++++++++++++++----------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 7755332de947d..f8a6d52848653 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2227,32 +2227,32 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray: ) for qi in q ] - result = concat(results, axis=self.axis, keys=q) - # fix levels to place quantiles on the inside - # TODO(GH-10710): Ideally, we could write this as - # >>> result.stack(0).loc[pd.IndexSlice[:, ..., q], :] - # but this hits https://github.com/pandas-dev/pandas/issues/10710 - # which doesn't reorder the list-like `q` on the inner level. - order = list(range(1, result.axes[self.axis].nlevels)) + [0] - - # temporarily saves the index names - index_names = np.array(result.axes[self.axis].names) - - # set index names to positions to avoid confusion - result.axes[self.axis].names = np.arange(len(index_names)) - - # place quantiles on the inside - if isinstance(result, Series): - result = result.reorder_levels(order) - else: - result = result.reorder_levels(order, axis=self.axis) + result = concat(results, axis=self.axis, keys=q) + # fix levels to place quantiles on the inside + # TODO(GH-10710): Ideally, we could write this as + # >>> result.stack(0).loc[pd.IndexSlice[:, ..., q], :] + # but this hits https://github.com/pandas-dev/pandas/issues/10710 + # which doesn't reorder the list-like `q` on the inner level. + order = list(range(1, result.axes[self.axis].nlevels)) + [0] + + # temporarily saves the index names + index_names = np.array(result.axes[self.axis].names) + + # set index names to positions to avoid confusion + result.axes[self.axis].names = np.arange(len(index_names)) + + # place quantiles on the inside + if isinstance(result, Series): + result = result.reorder_levels(order) + else: + result = result.reorder_levels(order, axis=self.axis) - # restore the index names in order - result.axes[self.axis].names = index_names[order] + # restore the index names in order + result.axes[self.axis].names = index_names[order] - # reorder rows to keep things sorted - indices = np.arange(result.shape[self.axis]).reshape([len(q), self.ngroups]).T.flatten() - return result.take(indices, axis=self.axis) + # reorder rows to keep things sorted + indices = np.arange(result.shape[self.axis]).reshape([len(q), self.ngroups]).T.flatten() + return result.take(indices, axis=self.axis) @Substitution(name="groupby") def ngroup(self, ascending: bool = True): From e9a7c15c97bcdcc4ddd9b07d5fa7af0cd28b8a3a Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Wed, 2 Dec 2020 11:32:03 +0800 Subject: [PATCH 7/8] Update groupby.py --- pandas/core/groupby/groupby.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index f8a6d52848653..2d400f48f7f3d 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2251,7 +2251,9 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray: result.axes[self.axis].names = index_names[order] # reorder rows to keep things sorted - indices = np.arange(result.shape[self.axis]).reshape([len(q), self.ngroups]).T.flatten() + indices = np.arange(result.shape[self.axis]).reshape( + [len(q), self.ngroups] + ).T.flatten() return result.take(indices, axis=self.axis) @Substitution(name="groupby") From 55c9a19567c6c546e200d934ce3457e5a8b15a22 Mon Sep 17 00:00:00 2001 From: GYHHAHA <1801214626@qq.com> Date: Wed, 2 Dec 2020 11:46:34 +0800 Subject: [PATCH 8/8] update --- pandas/core/groupby/groupby.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 2d400f48f7f3d..96a11fb190cb3 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2251,9 +2251,11 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray: result.axes[self.axis].names = index_names[order] # reorder rows to keep things sorted - indices = np.arange(result.shape[self.axis]).reshape( - [len(q), self.ngroups] - ).T.flatten() + indices = ( + np.arange(result.shape[self.axis]) + .reshape([len(q), self.ngroups]) + .T.flatten() + ) return result.take(indices, axis=self.axis) @Substitution(name="groupby")