Skip to content

Commit 38ad5ce

Browse files
authored
Fix groupby-resample KeyError when resampling on Index and giving explicit list of columns. (#50876)
* Add failing test reproducing groupby-resample KeyError (#50840) * Fix groupby-resample KeyError (#50840) by adding None check. * Update whatsnew for #50840 * Improve coverage with multi and missing column groupby-resample tests. * Refactor groupby-resample tests via TestCase to remove duplicate code. * Revert "Refactor groupby-resample tests via TestCase to remove duplicate code." This reverts commit d522606. * Fix typo in bug fix entry for #50840 in doc/source/whatsnew/v2.0.0.rst
1 parent e7f6a84 commit 38ad5ce

File tree

3 files changed

+81
-1
lines changed

3 files changed

+81
-1
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1100,6 +1100,7 @@ Groupby/resample/rolling
11001100
- Bug in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` would raise incorrectly when grouper had ``axis=1`` for ``"idxmin"`` and ``"idxmax"`` arguments (:issue:`45986`)
11011101
- Bug in :class:`.DataFrameGroupBy` would raise when used with an empty DataFrame, categorical grouper, and ``dropna=False`` (:issue:`50634`)
11021102
- Bug in :meth:`.SeriesGroupBy.value_counts` did not respect ``sort=False`` (:issue:`50482`)
1103+
- Bug in :meth:`.DataFrameGroupBy.resample` raises ``KeyError`` when getting the result from a key list when resampling on time index (:issue:`50840`)
11031104
-
11041105

11051106
Reshaping

pandas/core/resample.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1202,7 +1202,7 @@ def _gotitem(self, key, ndim, subset=None):
12021202

12031203
# Try to select from a DataFrame, falling back to a Series
12041204
try:
1205-
if isinstance(key, list) and self.key not in key:
1205+
if isinstance(key, list) and self.key not in key and self.key is not None:
12061206
key.append(self.key)
12071207
groupby = self._groupby[key]
12081208
except IndexError:

pandas/tests/resample/test_resampler_grouper.py

+79
Original file line numberDiff line numberDiff line change
@@ -536,3 +536,82 @@ def test_groupby_resample_size_all_index_same():
536536
),
537537
)
538538
tm.assert_series_equal(result, expected)
539+
540+
541+
def test_groupby_resample_on_index_with_list_of_keys():
542+
# GH 50840
543+
df = DataFrame(
544+
data={
545+
"group": [0, 0, 0, 0, 1, 1, 1, 1],
546+
"val": [3, 1, 4, 1, 5, 9, 2, 6],
547+
},
548+
index=Series(
549+
date_range(start="2016-01-01", periods=8),
550+
name="date",
551+
),
552+
)
553+
result = df.groupby("group").resample("2D")[["val"]].mean()
554+
expected = DataFrame(
555+
data={
556+
"val": [2.0, 2.5, 7.0, 4.0],
557+
},
558+
index=Index(
559+
data=[
560+
(0, Timestamp("2016-01-01")),
561+
(0, Timestamp("2016-01-03")),
562+
(1, Timestamp("2016-01-05")),
563+
(1, Timestamp("2016-01-07")),
564+
],
565+
name=("group", "date"),
566+
),
567+
)
568+
tm.assert_frame_equal(result, expected)
569+
570+
571+
def test_groupby_resample_on_index_with_list_of_keys_multi_columns():
572+
# GH 50876
573+
df = DataFrame(
574+
data={
575+
"group": [0, 0, 0, 0, 1, 1, 1, 1],
576+
"first_val": [3, 1, 4, 1, 5, 9, 2, 6],
577+
"second_val": [2, 7, 1, 8, 2, 8, 1, 8],
578+
"third_val": [1, 4, 1, 4, 2, 1, 3, 5],
579+
},
580+
index=Series(
581+
date_range(start="2016-01-01", periods=8),
582+
name="date",
583+
),
584+
)
585+
result = df.groupby("group").resample("2D")[["first_val", "second_val"]].mean()
586+
expected = DataFrame(
587+
data={
588+
"first_val": [2.0, 2.5, 7.0, 4.0],
589+
"second_val": [4.5, 4.5, 5.0, 4.5],
590+
},
591+
index=Index(
592+
data=[
593+
(0, Timestamp("2016-01-01")),
594+
(0, Timestamp("2016-01-03")),
595+
(1, Timestamp("2016-01-05")),
596+
(1, Timestamp("2016-01-07")),
597+
],
598+
name=("group", "date"),
599+
),
600+
)
601+
tm.assert_frame_equal(result, expected)
602+
603+
604+
def test_groupby_resample_on_index_with_list_of_keys_missing_column():
605+
# GH 50876
606+
df = DataFrame(
607+
data={
608+
"group": [0, 0, 0, 0, 1, 1, 1, 1],
609+
"val": [3, 1, 4, 1, 5, 9, 2, 6],
610+
},
611+
index=Series(
612+
date_range(start="2016-01-01", periods=8),
613+
name="date",
614+
),
615+
)
616+
with pytest.raises(KeyError, match="Columns not found"):
617+
df.groupby("group").resample("2D")[["val_not_in_dataframe"]].mean()

0 commit comments

Comments
 (0)