From 4700e7717db42212df91c15c69bef92edd13c2f4 Mon Sep 17 00:00:00 2001 From: Richard Date: Sun, 5 Jul 2020 16:56:39 -0400 Subject: [PATCH] BUG: transform with nunique should have dtype int64 GH35109 --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/core/dtypes/cast.py | 3 +++ pandas/tests/groupby/test_nunique.py | 7 +++++++ 3 files changed, 11 insertions(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 9bd4ddbb624d9..0e929ff062cff 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -1080,6 +1080,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.groupby` lost index, when one of the ``agg`` keys referenced an empty list (:issue:`32580`) - Bug in :meth:`Rolling.apply` where ``center=True`` was ignored when ``engine='numba'`` was specified (:issue:`34784`) - Bug in :meth:`DataFrame.ewm.cov` was throwing ``AssertionError`` for :class:`MultiIndex` inputs (:issue:`34440`) +- Bug in :meth:`core.groupby.DataFrameGroupBy.transform` when ``func='nunique'`` and columns are of type ``datetime64``, the result would also be of type ``datetime64`` instead of ``int64`` (:issue:`35109`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index d0417d51da497..be073bc2aaead 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -319,6 +319,9 @@ def maybe_cast_result_dtype(dtype: DtypeObj, how: str) -> DtypeObj: return np.dtype(np.int64) elif how in ["add", "cumsum", "sum"] and isinstance(dtype, BooleanDtype): return Int64Dtype() + elif how == "nunique": + return np.dtype(np.int64) + return dtype diff --git a/pandas/tests/groupby/test_nunique.py b/pandas/tests/groupby/test_nunique.py index 1475b1ce2907c..8bd71c0cf6bd7 100644 --- a/pandas/tests/groupby/test_nunique.py +++ b/pandas/tests/groupby/test_nunique.py @@ -167,3 +167,10 @@ def test_nunique_preserves_column_level_names(): result = test.groupby([0, 0, 0]).nunique() expected = pd.DataFrame([2], columns=test.columns) tm.assert_frame_equal(result, expected) + + +def test_nunique_transform_with_datetime(): + df = pd.DataFrame(date_range("2008-12-31", "2009-01-02"), columns=["date"]) + result = df.groupby([0, 0, 1])["date"].transform("nunique") + expected = pd.Series([2, 2, 1], name="date") + tm.assert_series_equal(result, expected)