Skip to content

Commit 5b58a20

Browse files
discortjreback
authored andcommitted
BUG: groupby with resample using on parameter errors when selecting column to apply function
closes pandas-dev#17813 Author: discort <[email protected]> Closes pandas-dev#19433 from discort/fix_17813 and squashes the following commits: 2f25d40 [discort] Fixed bug in df.resample using 'on' parameter
1 parent 98f3937 commit 5b58a20

File tree

3 files changed

+30
-5
lines changed

3 files changed

+30
-5
lines changed

doc/source/whatsnew/v0.23.0.txt

+6-2
Original file line numberDiff line numberDiff line change
@@ -289,13 +289,17 @@ Convert to an xarray DataArray
289289
p.to_xarray()
290290

291291

292+
.. _whatsnew_0230.api_breaking.build_changes:
293+
292294
Build Changes
293295
^^^^^^^^^^^^^
294296

295297
- Building pandas for development now requires ``cython >= 0.24`` (:issue:`18613`)
296298
- Building from source now explicitly requires ``setuptools`` in ``setup.py`` (:issue:`18113`)
297299
- Updated conda recipe to be in compliance with conda-build 3.0+ (:issue:`18002`)
298300

301+
.. _whatsnew_0230.api_breaking.extract:
302+
299303
Extraction of matching patterns from strings
300304
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
301305

@@ -594,8 +598,8 @@ Groupby/Resample/Rolling
594598
- Fixed regression in :func:`DataFrame.groupby` which would not emit an error when called with a tuple key not in the index (:issue:`18798`)
595599
- Bug in :func:`DataFrame.resample` which silently ignored unsupported (or mistyped) options for ``label``, ``closed`` and ``convention`` (:issue:`19303`)
596600
- Bug in :func:`DataFrame.groupby` where tuples were interpreted as lists of keys rather than as keys (:issue:`17979`, :issue:`18249`)
597-
- Bug in ``transform`` where particular aggregation functions were being incorrectly cast to match the dtype(s) of the grouped data (:issue:`19200`)
598-
-
601+
- Bug in :func:`DataFrame.transform` where particular aggregation functions were being incorrectly cast to match the dtype(s) of the grouped data (:issue:`19200`)
602+
- Bug in :func:`DataFrame.groupby` passing the `on=` kwarg, and subsequently using ``.apply()`` (:issue:`17813`)
599603

600604
Sparse
601605
^^^^^^

pandas/core/groupby.py

+15-3
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
_ensure_categorical,
3838
_ensure_float)
3939
from pandas.core.dtypes.cast import maybe_downcast_to_dtype
40+
from pandas.core.dtypes.generic import ABCSeries
4041
from pandas.core.dtypes.missing import isna, notna, _maybe_fill
4142

4243
from pandas.core.base import (PandasObject, SelectionMixin, GroupByError,
@@ -423,6 +424,7 @@ def __init__(self, key=None, level=None, freq=None, axis=0, sort=False):
423424
self.obj = None
424425
self.indexer = None
425426
self.binner = None
427+
self._grouper = None
426428

427429
@property
428430
def ax(self):
@@ -465,12 +467,22 @@ def _set_grouper(self, obj, sort=False):
465467
raise ValueError(
466468
"The Grouper cannot specify both a key and a level!")
467469

470+
# Keep self.grouper value before overriding
471+
if self._grouper is None:
472+
self._grouper = self.grouper
473+
468474
# the key must be a valid info item
469475
if self.key is not None:
470476
key = self.key
471-
if key not in obj._info_axis:
472-
raise KeyError("The grouper name {0} is not found".format(key))
473-
ax = Index(obj[key], name=key)
477+
# The 'on' is already defined
478+
if getattr(self.grouper, 'name', None) == key and \
479+
isinstance(obj, ABCSeries):
480+
ax = self._grouper.take(obj.index)
481+
else:
482+
if key not in obj._info_axis:
483+
raise KeyError(
484+
"The grouper name {0} is not found".format(key))
485+
ax = Index(obj[key], name=key)
474486

475487
else:
476488
ax = obj._get_axis(self.axis)

pandas/tests/test_resample.py

+9
Original file line numberDiff line numberDiff line change
@@ -3077,6 +3077,15 @@ def test_getitem_multiple(self):
30773077
result = r['buyer'].count()
30783078
assert_series_equal(result, expected)
30793079

3080+
def test_groupby_resample_on_api_with_getitem(self):
3081+
# GH 17813
3082+
df = pd.DataFrame({'id': list('aabbb'),
3083+
'date': pd.date_range('1-1-2016', periods=5),
3084+
'data': 1})
3085+
exp = df.set_index('date').groupby('id').resample('2D')['data'].sum()
3086+
result = df.groupby('id').resample('2D', on='date')['data'].sum()
3087+
assert_series_equal(result, exp)
3088+
30803089
def test_nearest(self):
30813090

30823091
# GH 17496

0 commit comments

Comments
 (0)