Skip to content

Fix groupby-resample KeyError when resampling on Index and giving explicit list of columns. #50876

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jan 23, 2023
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1096,6 +1096,7 @@ Groupby/resample/rolling
- Bug in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` would raise incorrectly when grouper had ``axis=1`` for ``"idxmin"`` and ``"idxmax"`` arguments (:issue:`45986`)
- Bug in :class:`.DataFrameGroupBy` would raise when used with an empty DataFrame, categorical grouper, and ``dropna=False`` (:issue:`50634`)
- Bug in :meth:`.SeriesGroupBy.value_counts` did not respect ``sort=False`` (:issue:`50482`)
- Bug in :meth:`DataFrameGroupBy.resample` raises ``KeyError`` when getting the result from a key list when resampling on time index (:issue:`50840`)
-

Reshaping
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -1202,7 +1202,7 @@ def _gotitem(self, key, ndim, subset=None):

# Try to select from a DataFrame, falling back to a Series
try:
if isinstance(key, list) and self.key not in key:
if isinstance(key, list) and self.key not in key and self.key is not None:
key.append(self.key)
groupby = self._groupby[key]
except IndexError:
Expand Down
63 changes: 63 additions & 0 deletions pandas/tests/resample/test_resampler_grouper.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import unittest
from textwrap import dedent

import numpy as np
Expand Down Expand Up @@ -536,3 +537,65 @@ def test_groupby_resample_size_all_index_same():
),
)
tm.assert_series_equal(result, expected)


class TestGroupByResampleTimeIndex(unittest.TestCase):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you keep the tests as python functions? We don't use unittest.

"""Test groupby resample with a time index where a list of columns is given."""
def setUp(self) -> None:
self.df = DataFrame(
data={
"group": [0, 0, 0, 0, 1, 1, 1, 1],
"first_val": [3, 1, 4, 1, 5, 9, 2, 6],
"second_val": [2, 7, 1, 8, 2, 8, 1, 8],
"third_val": [1, 4, 1, 4, 2, 1, 3, 5],
},
index=Series(
date_range(start="2016-01-01", periods=8),
name="date",
),
)

def test_list_of_one_key(self):
# GH 50840
result = self.df.groupby("group").resample("2D")[["first_val"]].mean()
expected = DataFrame(
data={
"first_val": [2.0, 2.5, 7.0, 4.0],
},
index=Index(
data=[
(0, Timestamp("2016-01-01")),
(0, Timestamp("2016-01-03")),
(1, Timestamp("2016-01-05")),
(1, Timestamp("2016-01-07")),
],
name=("group", "date"),
),
)
tm.assert_frame_equal(result, expected)

def test_list_of_multiple_keys(self):
# GH 50876
result = self.df.groupby("group").resample("2D")[["first_val", "second_val"]].mean()
expected = DataFrame(
data={
"first_val": [2.0, 2.5, 7.0, 4.0],
"second_val": [4.5, 4.5, 5.0, 4.5],
},
index=Index(
data=[
(0, Timestamp("2016-01-01")),
(0, Timestamp("2016-01-03")),
(1, Timestamp("2016-01-05")),
(1, Timestamp("2016-01-07")),
],
name=("group", "date"),
),
)
tm.assert_frame_equal(result, expected)

def test_missing_key_raises_KeyError(self):
"""Test a key that is not in the list of columns."""
# GH 50876
with pytest.raises(KeyError, match="Columns not found"):
self.df.groupby("group").resample("2D")[["val_not_in_dataframe"]].mean()