Skip to content

Commit 458c320

Browse files
jalmaguerluckyvs1
authored andcommitted
BUG: groupby resample different results with .agg() vs .mean() (pandas-dev#37905)
1 parent 8e502ea commit 458c320

File tree

3 files changed

+51
-4
lines changed

3 files changed

+51
-4
lines changed

doc/source/whatsnew/v1.2.0.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -793,8 +793,9 @@ Groupby/resample/rolling
793793
- Bug in :meth:`.DataFrameGroupBy.quantile` and :meth:`.Resampler.quantile` raised ``TypeError`` when values were of type ``Timedelta`` (:issue:`29485`)
794794
- Bug in :meth:`.Rolling.median` and :meth:`.Rolling.quantile` returned wrong values for :class:`.BaseIndexer` subclasses with non-monotonic starting or ending points for windows (:issue:`37153`)
795795
- Bug in :meth:`DataFrame.groupby` dropped ``nan`` groups from result with ``dropna=False`` when grouping over a single column (:issue:`35646`, :issue:`35542`)
796-
- Bug in :meth:`.DataFrameGroupBy.head`, :meth:`.DataFrameGroupBy.tail`, :meth:`SeriesGroupBy.head`, and :meth:`SeriesGroupBy.tail` would raise when used with ``axis=1`` (:issue:`9772`)
796+
- Bug in :meth:`.DataFrameGroupBy.head`, :meth:`DataFrameGroupBy.tail`, :meth:`SeriesGroupBy.head`, and :meth:`SeriesGroupBy.tail` would raise when used with ``axis=1`` (:issue:`9772`)
797797
- Bug in :meth:`.DataFrameGroupBy.transform` would raise when used with ``axis=1`` and a transformation kernel (e.g. "shift") (:issue:`36308`)
798+
- Bug in :meth:`.DataFrameGroupBy.resample` using ``.agg`` with sum produced different result than just calling ``.sum`` (:issue:`33548`)
798799
- Bug in :meth:`.DataFrameGroupBy.apply` dropped values on ``nan`` group when returning the same axes with the original frame (:issue:`38227`)
799800
- Bug in :meth:`.DataFrameGroupBy.quantile` couldn't handle with arraylike ``q`` when grouping by columns (:issue:`33795`)
800801
- Bug in :meth:`DataFrameGroupBy.rank` with ``datetime64tz`` or period dtype incorrectly casting results to those dtypes instead of returning ``float64`` dtype (:issue:`38187`)

pandas/core/groupby/grouper.py

+13-3
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,7 @@ def __init__(
258258
self.indexer = None
259259
self.binner = None
260260
self._grouper = None
261+
self._indexer = None
261262
self.dropna = dropna
262263

263264
@final
@@ -312,15 +313,24 @@ def _set_grouper(self, obj: FrameOrSeries, sort: bool = False):
312313
# Keep self.grouper value before overriding
313314
if self._grouper is None:
314315
self._grouper = self.grouper
316+
self._indexer = self.indexer
315317

316318
# the key must be a valid info item
317319
if self.key is not None:
318320
key = self.key
319321
# The 'on' is already defined
320322
if getattr(self.grouper, "name", None) == key and isinstance(obj, Series):
321-
# pandas\core\groupby\grouper.py:348: error: Item "None" of
322-
# "Optional[Any]" has no attribute "take" [union-attr]
323-
ax = self._grouper.take(obj.index) # type: ignore[union-attr]
323+
# Sometimes self._grouper will have been resorted while
324+
# obj has not. In this case there is a mismatch when we
325+
# call self._grouper.take(obj.index) so we need to undo the sorting
326+
# before we call _grouper.take.
327+
assert self._grouper is not None
328+
if self._indexer is not None:
329+
reverse_indexer = self._indexer.argsort()
330+
unsorted_ax = self._grouper.take(reverse_indexer)
331+
ax = unsorted_ax.take(obj.index)
332+
else:
333+
ax = self._grouper.take(obj.index)
324334
else:
325335
if key not in obj._info_axis:
326336
raise KeyError(f"The grouper name {key} is not found")

pandas/tests/resample/test_resampler_grouper.py

+36
Original file line numberDiff line numberDiff line change
@@ -362,3 +362,39 @@ def test_apply_to_one_column_of_df():
362362
tm.assert_series_equal(result, expected)
363363
result = df.resample("H").apply(lambda group: group["col"].sum())
364364
tm.assert_series_equal(result, expected)
365+
366+
367+
def test_resample_groupby_agg():
368+
# GH: 33548
369+
df = DataFrame(
370+
{
371+
"cat": [
372+
"cat_1",
373+
"cat_1",
374+
"cat_2",
375+
"cat_1",
376+
"cat_2",
377+
"cat_1",
378+
"cat_2",
379+
"cat_1",
380+
],
381+
"num": [5, 20, 22, 3, 4, 30, 10, 50],
382+
"date": [
383+
"2019-2-1",
384+
"2018-02-03",
385+
"2020-3-11",
386+
"2019-2-2",
387+
"2019-2-2",
388+
"2018-12-4",
389+
"2020-3-11",
390+
"2020-12-12",
391+
],
392+
}
393+
)
394+
df["date"] = pd.to_datetime(df["date"])
395+
396+
resampled = df.groupby("cat").resample("Y", on="date")
397+
expected = resampled.sum()
398+
result = resampled.agg({"num": "sum"})
399+
400+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)