Skip to content

Commit aa309a7

Browse files
GYHHAHAyehoshuadimarsky
authored andcommitted
BUG: df.groupby().resample()[[cols]] without key columns raise KeyError (pandas-dev#47605)
* Update resample.py * Update v1.5.0.rst * Update test_resampler_grouper.py * delete blank * Update test_resampler_grouper.py * Update v1.5.0.rst * Update resample.py
1 parent 9b194a1 commit aa309a7

File tree

3 files changed

+36
-4
lines changed

3 files changed

+36
-4
lines changed

doc/source/whatsnew/v1.5.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -1000,6 +1000,8 @@ Groupby/resample/rolling
10001000
- Bug in :meth:`.DataFrameGroupBy.describe` and :meth:`.SeriesGroupBy.describe` produces inconsistent results for empty datasets (:issue:`41575`)
10011001
- Bug in :meth:`DataFrame.resample` reduction methods when used with ``on`` would attempt to aggregate the provided column (:issue:`47079`)
10021002
- Bug in :meth:`DataFrame.groupby` and :meth:`Series.groupby` would not respect ``dropna=False`` when the input DataFrame/Series had a NaN values in a :class:`MultiIndex` (:issue:`46783`)
1003+
- Bug in :meth:`DataFrameGroupBy.resample` raises ``KeyError`` when getting the result from a key list which misses the resample key (:issue:`47362`)
1004+
-
10031005

10041006
Reshaping
10051007
^^^^^^^^^

pandas/core/resample.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -502,11 +502,11 @@ def _apply_loffset(self, result):
502502
self.loffset = None
503503
return result
504504

505-
def _get_resampler_for_grouping(self, groupby):
505+
def _get_resampler_for_grouping(self, groupby, key=None):
506506
"""
507507
Return the correct class for resampling with groupby.
508508
"""
509-
return self._resampler_for_grouping(self, groupby=groupby)
509+
return self._resampler_for_grouping(self, groupby=groupby, key=key)
510510

511511
def _wrap_result(self, result):
512512
"""
@@ -1132,7 +1132,7 @@ class _GroupByMixin(PandasObject):
11321132
_attributes: list[str] # in practice the same as Resampler._attributes
11331133
_selection: IndexLabel | None = None
11341134

1135-
def __init__(self, obj, parent=None, groupby=None, **kwargs) -> None:
1135+
def __init__(self, obj, parent=None, groupby=None, key=None, **kwargs) -> None:
11361136
# reached via ._gotitem and _get_resampler_for_grouping
11371137

11381138
if parent is None:
@@ -1145,6 +1145,7 @@ def __init__(self, obj, parent=None, groupby=None, **kwargs) -> None:
11451145
self._selection = kwargs.get("selection")
11461146

11471147
self.binner = parent.binner
1148+
self.key = key
11481149

11491150
self._groupby = groupby
11501151
self._groupby.mutated = True
@@ -1197,6 +1198,8 @@ def _gotitem(self, key, ndim, subset=None):
11971198

11981199
# Try to select from a DataFrame, falling back to a Series
11991200
try:
1201+
if isinstance(key, list) and self.key not in key:
1202+
key.append(self.key)
12001203
groupby = self._groupby[key]
12011204
except IndexError:
12021205
groupby = self._groupby
@@ -1513,7 +1516,7 @@ def get_resampler_for_grouping(
15131516
# .resample uses 'on' similar to how .groupby uses 'key'
15141517
tg = TimeGrouper(freq=rule, key=on, **kwargs)
15151518
resampler = tg._get_resampler(groupby.obj, kind=kind)
1516-
return resampler._get_resampler_for_grouping(groupby=groupby)
1519+
return resampler._get_resampler_for_grouping(groupby=groupby, key=tg.key)
15171520

15181521

15191522
class TimeGrouper(Grouper):

pandas/tests/resample/test_resampler_grouper.py

+27
Original file line numberDiff line numberDiff line change
@@ -470,3 +470,30 @@ def test_resample_groupby_agg_object_dtype_all_nan(consolidate):
470470
index=idx,
471471
)
472472
tm.assert_frame_equal(result, expected)
473+
474+
475+
def test_groupby_resample_with_list_of_keys():
476+
# GH 47362
477+
df = DataFrame(
478+
data={
479+
"date": date_range(start="2016-01-01", periods=8),
480+
"group": [0, 0, 0, 0, 1, 1, 1, 1],
481+
"val": [1, 7, 5, 2, 3, 10, 5, 1],
482+
}
483+
)
484+
result = df.groupby("group").resample("2D", on="date")[["val"]].mean()
485+
expected = DataFrame(
486+
data={
487+
"val": [4.0, 3.5, 6.5, 3.0],
488+
},
489+
index=Index(
490+
data=[
491+
(0, Timestamp("2016-01-01")),
492+
(0, Timestamp("2016-01-03")),
493+
(1, Timestamp("2016-01-05")),
494+
(1, Timestamp("2016-01-07")),
495+
],
496+
name=("group", "date"),
497+
),
498+
)
499+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)