Skip to content

Commit f15e1e0

Browse files
Backport PR #37905: BUG: groupby resample different results with .agg() vs .mean() (#38633)
Co-authored-by: Jose <[email protected]>
1 parent 6bdfe67 commit f15e1e0

File tree

3 files changed

+51
-4
lines changed

3 files changed

+51
-4
lines changed

doc/source/whatsnew/v1.2.0.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -793,8 +793,9 @@ Groupby/resample/rolling
793793
- Bug in :meth:`.DataFrameGroupBy.quantile` and :meth:`.Resampler.quantile` raised ``TypeError`` when values were of type ``Timedelta`` (:issue:`29485`)
794794
- Bug in :meth:`.Rolling.median` and :meth:`.Rolling.quantile` returned wrong values for :class:`.BaseIndexer` subclasses with non-monotonic starting or ending points for windows (:issue:`37153`)
795795
- Bug in :meth:`DataFrame.groupby` dropped ``nan`` groups from result with ``dropna=False`` when grouping over a single column (:issue:`35646`, :issue:`35542`)
796-
- Bug in :meth:`.DataFrameGroupBy.head`, :meth:`.DataFrameGroupBy.tail`, :meth:`SeriesGroupBy.head`, and :meth:`SeriesGroupBy.tail` would raise when used with ``axis=1`` (:issue:`9772`)
796+
- Bug in :meth:`.DataFrameGroupBy.head`, :meth:`DataFrameGroupBy.tail`, :meth:`SeriesGroupBy.head`, and :meth:`SeriesGroupBy.tail` would raise when used with ``axis=1`` (:issue:`9772`)
797797
- Bug in :meth:`.DataFrameGroupBy.transform` would raise when used with ``axis=1`` and a transformation kernel (e.g. "shift") (:issue:`36308`)
798+
- Bug in :meth:`.DataFrameGroupBy.resample` using ``.agg`` with sum produced different result than just calling ``.sum`` (:issue:`33548`)
798799
- Bug in :meth:`.DataFrameGroupBy.apply` dropped values on ``nan`` group when returning the same axes with the original frame (:issue:`38227`)
799800
- Bug in :meth:`.DataFrameGroupBy.quantile` couldn't handle with arraylike ``q`` when grouping by columns (:issue:`33795`)
800801
- Bug in :meth:`DataFrameGroupBy.rank` with ``datetime64tz`` or period dtype incorrectly casting results to those dtypes instead of returning ``float64`` dtype (:issue:`38187`)

pandas/core/groupby/grouper.py

+13-3
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,7 @@ def __init__(
287287
self.indexer = None
288288
self.binner = None
289289
self._grouper = None
290+
self._indexer = None
290291
self.dropna = dropna
291292

292293
@final
@@ -341,15 +342,24 @@ def _set_grouper(self, obj: FrameOrSeries, sort: bool = False):
341342
# Keep self.grouper value before overriding
342343
if self._grouper is None:
343344
self._grouper = self.grouper
345+
self._indexer = self.indexer
344346

345347
# the key must be a valid info item
346348
if self.key is not None:
347349
key = self.key
348350
# The 'on' is already defined
349351
if getattr(self.grouper, "name", None) == key and isinstance(obj, Series):
350-
# pandas\core\groupby\grouper.py:348: error: Item "None" of
351-
# "Optional[Any]" has no attribute "take" [union-attr]
352-
ax = self._grouper.take(obj.index) # type: ignore[union-attr]
352+
# Sometimes self._grouper will have been resorted while
353+
# obj has not. In this case there is a mismatch when we
354+
# call self._grouper.take(obj.index) so we need to undo the sorting
355+
# before we call _grouper.take.
356+
assert self._grouper is not None
357+
if self._indexer is not None:
358+
reverse_indexer = self._indexer.argsort()
359+
unsorted_ax = self._grouper.take(reverse_indexer)
360+
ax = unsorted_ax.take(obj.index)
361+
else:
362+
ax = self._grouper.take(obj.index)
353363
else:
354364
if key not in obj._info_axis:
355365
raise KeyError(f"The grouper name {key} is not found")

pandas/tests/resample/test_resampler_grouper.py

+36
Original file line numberDiff line numberDiff line change
@@ -362,3 +362,39 @@ def test_apply_to_one_column_of_df():
362362
tm.assert_series_equal(result, expected)
363363
result = df.resample("H").apply(lambda group: group["col"].sum())
364364
tm.assert_series_equal(result, expected)
365+
366+
367+
def test_resample_groupby_agg():
368+
# GH: 33548
369+
df = DataFrame(
370+
{
371+
"cat": [
372+
"cat_1",
373+
"cat_1",
374+
"cat_2",
375+
"cat_1",
376+
"cat_2",
377+
"cat_1",
378+
"cat_2",
379+
"cat_1",
380+
],
381+
"num": [5, 20, 22, 3, 4, 30, 10, 50],
382+
"date": [
383+
"2019-2-1",
384+
"2018-02-03",
385+
"2020-3-11",
386+
"2019-2-2",
387+
"2019-2-2",
388+
"2018-12-4",
389+
"2020-3-11",
390+
"2020-12-12",
391+
],
392+
}
393+
)
394+
df["date"] = pd.to_datetime(df["date"])
395+
396+
resampled = df.groupby("cat").resample("Y", on="date")
397+
expected = resampled.sum()
398+
result = resampled.agg({"num": "sum"})
399+
400+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)