From 977d16938494c3d9dc1733c3132378cc899f9625 Mon Sep 17 00:00:00 2001 From: Jens Diewald Date: Fri, 22 Dec 2023 16:41:04 +0100 Subject: [PATCH 1/4] Add test cases for level sequences This test fail currently, as the level parameter currently does not accept any sequences. --- tests/test_frame.py | 12 ++++++++++++ tests/test_series.py | 9 +++++++++ 2 files changed, 21 insertions(+) diff --git a/tests/test_frame.py b/tests/test_frame.py index a25e9b683..f7df37dc0 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -1073,6 +1073,18 @@ def test_types_groupby_iter() -> None: ) +def test_types_groupby_level() -> None: + # GH 836 + data = { + "col1": [0, 0, 0], + "col2": [0, 1, 0], + "col3": [1, 2, 3], + "col4": [1, 2, 3], + } + df = pd.DataFrame(data=data).set_index(["col1", "col2", "col3"]) + df.groupby(level=["col1", "col2"]).sum() + + def test_types_merge() -> None: df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5]}) df2 = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [0, 1, 0]}) diff --git a/tests/test_series.py b/tests/test_series.py index 72ecd0cbe..1a48df179 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -439,6 +439,15 @@ def test_types_max() -> None: s.max(skipna=False) +def test_types_groupby_level() -> None: + # GH 836 + index = pd.MultiIndex.from_tuples( + [(0, 0, 1), (0, 1, 2), (0, 0, 3)], names=["col1", "col2", "col3"] + ) + s = pd.Series([1, 2, 3], index=index) + s.groupby(level=["col1", "col2"]).sum() + + def test_types_quantile() -> None: s = pd.Series([1, 2, 3, 10]) s.quantile([0.25, 0.5]) From ac36726d64910686de405fb1a56d34a394374a8f Mon Sep 17 00:00:00 2001 From: Jens Diewald Date: Fri, 22 Dec 2023 16:51:46 +0100 Subject: [PATCH 2/4] Allow sequences for groupby level parameter This fixes #836 --- pandas-stubs/core/frame.pyi | 16 ++++++++-------- pandas-stubs/core/series.pyi | 17 +++++++++-------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 6cd3236bb..f413e6110 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -1009,7 +1009,7 @@ class DataFrame(NDFrame, OpsMixin): self, by: Scalar, axis: AxisIndex = ..., - level: Level | None = ..., + level: IndexLabel | None = ..., as_index: _bool = ..., sort: _bool = ..., group_keys: _bool = ..., @@ -1022,7 +1022,7 @@ class DataFrame(NDFrame, OpsMixin): self, by: DatetimeIndex, axis: AxisIndex = ..., - level: Level | None = ..., + level: IndexLabel | None = ..., as_index: _bool = ..., sort: _bool = ..., group_keys: _bool = ..., @@ -1035,7 +1035,7 @@ class DataFrame(NDFrame, OpsMixin): self, by: TimedeltaIndex, axis: AxisIndex = ..., - level: Level | None = ..., + level: IndexLabel | None = ..., as_index: _bool = ..., sort: _bool = ..., group_keys: _bool = ..., @@ -1048,7 +1048,7 @@ class DataFrame(NDFrame, OpsMixin): self, by: PeriodIndex, axis: AxisIndex = ..., - level: Level | None = ..., + level: IndexLabel | None = ..., as_index: _bool = ..., sort: _bool = ..., group_keys: _bool = ..., @@ -1061,7 +1061,7 @@ class DataFrame(NDFrame, OpsMixin): self, by: IntervalIndex[IntervalT], axis: AxisIndex = ..., - level: Level | None = ..., + level: IndexLabel | None = ..., as_index: _bool = ..., sort: _bool = ..., group_keys: _bool = ..., @@ -1074,7 +1074,7 @@ class DataFrame(NDFrame, OpsMixin): self, by: MultiIndex | GroupByObjectNonScalar | None = ..., axis: AxisIndex = ..., - level: Level | None = ..., + level: IndexLabel | None = ..., as_index: _bool = ..., sort: _bool = ..., group_keys: _bool = ..., @@ -1087,7 +1087,7 @@ class DataFrame(NDFrame, OpsMixin): self, by: Series[SeriesByT], axis: AxisIndex = ..., - level: Level | None = ..., + level: IndexLabel | None = ..., as_index: _bool = ..., sort: _bool = ..., group_keys: _bool = ..., @@ -1100,7 +1100,7 @@ class DataFrame(NDFrame, OpsMixin): self, by: CategoricalIndex | Index | Series, axis: AxisIndex = ..., - level: Level | None = ..., + level: IndexLabel | None = ..., as_index: _bool = ..., sort: _bool = ..., group_keys: _bool = ..., diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index aeee0fecd..2a42e8a3d 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -117,6 +117,7 @@ from pandas._typing import ( HashableT3, IgnoreRaise, IndexingInt, + IndexLabel, IntDtypeArg, InterpolateOptions, IntervalClosedType, @@ -547,7 +548,7 @@ class Series(IndexOpsMixin[S1], NDFrame): self, by: Scalar, axis: AxisIndex = ..., - level: Level | None = ..., + level: IndexLabel | None = ..., as_index: _bool = ..., sort: _bool = ..., group_keys: _bool = ..., @@ -560,7 +561,7 @@ class Series(IndexOpsMixin[S1], NDFrame): self, by: DatetimeIndex, axis: AxisIndex = ..., - level: Level | None = ..., + level: IndexLabel | None = ..., as_index: _bool = ..., sort: _bool = ..., group_keys: _bool = ..., @@ -573,7 +574,7 @@ class Series(IndexOpsMixin[S1], NDFrame): self, by: TimedeltaIndex, axis: AxisIndex = ..., - level: Level | None = ..., + level: IndexLabel | None = ..., as_index: _bool = ..., sort: _bool = ..., group_keys: _bool = ..., @@ -586,7 +587,7 @@ class Series(IndexOpsMixin[S1], NDFrame): self, by: PeriodIndex, axis: AxisIndex = ..., - level: Level | None = ..., + level: IndexLabel | None = ..., as_index: _bool = ..., sort: _bool = ..., group_keys: _bool = ..., @@ -599,7 +600,7 @@ class Series(IndexOpsMixin[S1], NDFrame): self, by: IntervalIndex[IntervalT], axis: AxisIndex = ..., - level: Level | None = ..., + level: IndexLabel | None = ..., as_index: _bool = ..., sort: _bool = ..., group_keys: _bool = ..., @@ -612,7 +613,7 @@ class Series(IndexOpsMixin[S1], NDFrame): self, by: MultiIndex | GroupByObjectNonScalar = ..., axis: AxisIndex = ..., - level: Level | None = ..., + level: IndexLabel | None = ..., as_index: _bool = ..., sort: _bool = ..., group_keys: _bool = ..., @@ -625,7 +626,7 @@ class Series(IndexOpsMixin[S1], NDFrame): self, by: Series[SeriesByT], axis: AxisIndex = ..., - level: Level | None = ..., + level: IndexLabel | None = ..., as_index: _bool = ..., sort: _bool = ..., group_keys: _bool = ..., @@ -638,7 +639,7 @@ class Series(IndexOpsMixin[S1], NDFrame): self, by: CategoricalIndex | Index | Series, axis: AxisIndex = ..., - level: Level | None = ..., + level: IndexLabel | None = ..., as_index: _bool = ..., sort: _bool = ..., group_keys: _bool = ..., From 876343c8d99e6827dccb498516921ae1aa29cabf Mon Sep 17 00:00:00 2001 From: Jens Diewald Date: Fri, 22 Dec 2023 21:14:12 +0100 Subject: [PATCH 3/4] Add assert_type --- tests/test_frame.py | 5 ++++- tests/test_series.py | 6 +++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/tests/test_frame.py b/tests/test_frame.py index f7df37dc0..54121c825 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -1082,7 +1082,10 @@ def test_types_groupby_level() -> None: "col4": [1, 2, 3], } df = pd.DataFrame(data=data).set_index(["col1", "col2", "col3"]) - df.groupby(level=["col1", "col2"]).sum() + check( + assert_type(df.groupby(level=["col1", "col2"]).sum(), "pd.DataFrame"), + pd.DataFrame, + ) def test_types_merge() -> None: diff --git a/tests/test_series.py b/tests/test_series.py index 1a48df179..ba6ae57f7 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -445,7 +445,11 @@ def test_types_groupby_level() -> None: [(0, 0, 1), (0, 1, 2), (0, 0, 3)], names=["col1", "col2", "col3"] ) s = pd.Series([1, 2, 3], index=index) - s.groupby(level=["col1", "col2"]).sum() + check( + assert_type(s.groupby(level=["col1", "col2"]).sum(), "pd.Series[int]"), + pd.Series, + np.integer, + ) def test_types_quantile() -> None: From f577cacf60aef29aec999c30b4cb0bd7af1c8cc5 Mon Sep 17 00:00:00 2001 From: Jens Diewald Date: Sat, 23 Dec 2023 10:26:28 +0100 Subject: [PATCH 4/4] Remove unnecessary quotes --- tests/test_frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_frame.py b/tests/test_frame.py index 54121c825..bdd9bf491 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -1083,7 +1083,7 @@ def test_types_groupby_level() -> None: } df = pd.DataFrame(data=data).set_index(["col1", "col2", "col3"]) check( - assert_type(df.groupby(level=["col1", "col2"]).sum(), "pd.DataFrame"), + assert_type(df.groupby(level=["col1", "col2"]).sum(), pd.DataFrame), pd.DataFrame, )