diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 8e72ce83ac028..9405e7804a461 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -254,6 +254,7 @@ Groupby/Resample/Rolling - Bug in :func:`pandas.core.groupby.GroupBy.agg` when applying a aggregation function to timezone aware data (:issue:`23683`) - Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` where timezone information would be dropped (:issue:`21603`) - Ensured that ordering of outputs in ``groupby`` aggregation functions is consistent across all versions of Python (:issue:`25692`) +- Bug in :func:`idxmax` and :func:`idxmin` on :meth:`DataFrame.groupby` with datetime column would return incorrect dtype (:issue:`25444`, :issue:`15306`) Reshaping diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py index 903c898b68873..a5804586bdf11 100644 --- a/pandas/core/groupby/base.py +++ b/pandas/core/groupby/base.py @@ -89,7 +89,8 @@ def _gotitem(self, key, ndim, subset=None): cython_transforms = frozenset(['cumprod', 'cumsum', 'shift', 'cummin', 'cummax']) -cython_cast_blacklist = frozenset(['rank', 'count', 'size']) +cython_cast_blacklist = frozenset(['rank', 'count', 'size', 'idxmin', + 'idxmax']) def whitelist_method_generator(base, klass, whitelist): diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index bdae6f36b5572..5823c39cbd8ed 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -254,8 +254,13 @@ def _aggregate_item_by_item(self, func, *args, **kwargs): data = obj[item] colg = SeriesGroupBy(data, selection=item, grouper=self.grouper) - result[item] = self._try_cast( - colg.aggregate(func, *args, **kwargs), data) + + cast = self._transform_should_cast(func) + + result[item] = colg.aggregate(func, *args, **kwargs) + if cast: + result[item] = self._try_cast(result[item], data) + except ValueError: cannot_agg.append(item) continue diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index b5e328ef64424..4ea0d12656ee4 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -400,6 +400,25 @@ def test_groupby_non_arithmetic_agg_int_like_precision(i): assert res.iloc[0].b == data["expected"] +@pytest.mark.parametrize("func, values", [ + ("idxmin", {'c_int': [0, 2], 'c_float': [1, 3], 'c_date': [1, 2]}), + ("idxmax", {'c_int': [1, 3], 'c_float': [0, 2], 'c_date': [0, 3]}) +]) +def test_idxmin_idxmax_returns_int_types(func, values): + # GH 25444 + df = pd.DataFrame({'name': ['A', 'A', 'B', 'B'], + 'c_int': [1, 2, 3, 4], + 'c_float': [4.02, 3.03, 2.04, 1.05], + 'c_date': ['2019', '2018', '2016', '2017']}) + df['c_date'] = pd.to_datetime(df['c_date']) + + result = getattr(df.groupby('name'), func)() + + expected = pd.DataFrame(values, index=Index(['A', 'B'], name="name")) + + tm.assert_frame_equal(result, expected) + + def test_fill_consistency(): # GH9221 diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index b645073fcf72a..26f39f8f41e2f 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -845,3 +845,22 @@ def test_groupby_transform_timezone_column(func): expected = pd.DataFrame([[ts, 1, ts]], columns=['end_time', 'id', 'max_end_time']) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("func, values", [ + ("idxmin", ["1/1/2011"] * 2 + ["1/3/2011"] * 7 + ["1/10/2011"]), + ("idxmax", ["1/2/2011"] * 2 + ["1/9/2011"] * 7 + ["1/10/2011"]) +]) +def test_groupby_transform_with_datetimes(func, values): + # GH 15306 + dates = pd.date_range('1/1/2011', periods=10, freq='D') + + stocks = pd.DataFrame({'price': np.arange(10.0)}, index=dates) + stocks['week_id'] = pd.to_datetime(stocks.index).week + + result = stocks.groupby(stocks['week_id'])['price'].transform(func) + + expected = pd.Series(data=pd.to_datetime(values), + index=dates, name="price") + + tm.assert_series_equal(result, expected)