diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 1cd65bb530f73..e4a3bd796e3ba 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -543,6 +543,7 @@ Groupby/Resample/Rolling - Bug in ``Series.resample(...).apply()`` where an empty ``Series`` modified the source index and did not return the name of a ``Series`` (:issue:`14313`) - Bug in ``.rolling(...).apply(...)`` with a ``DataFrame`` with a ``DatetimeIndex``, a ``window`` of a timedelta-convertible and ``min_periods >= 1` (:issue:`15305`) - Bug in ``DataFrame.groupby`` where index and column keys were not recognized correctly when the number of keys equaled the number of elements on the groupby axis (:issue:`16859`) +- Bug in ``groupby.nunique()`` with ``TimeGrouper`` which cannot handle ``NaT`` correctly (:issue:`17575`) Sparse ^^^^^^ diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index f14ed08a27fae..a62ae40a85941 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -3177,7 +3177,13 @@ def nunique(self, dropna=True): out = np.add.reduceat(inc, idx).astype('int64', copy=False) if len(ids): - res = out if ids[0] != -1 else out[1:] + # NaN/NaT group exists if the head of ids is -1, + # so remove it from res and exclude its index from idx + if ids[0] == -1: + res = out[1:] + idx = idx[np.flatnonzero(idx)] + else: + res = out else: res = out[1:] ri = self.grouper.result_index diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index df0a93d783375..f83a3fcd0668d 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -608,3 +608,16 @@ def test_first_last_max_min_on_time_data(self): assert_frame_equal(grouped_ref.min(), grouped_test.min()) assert_frame_equal(grouped_ref.first(), grouped_test.first()) assert_frame_equal(grouped_ref.last(), grouped_test.last()) + + def test_nunique_with_timegrouper_and_nat(self): + # GH 17575 + test = pd.DataFrame({ + 'time': [Timestamp('2016-06-28 09:35:35'), + pd.NaT, + Timestamp('2016-06-28 16:46:28')], + 'data': ['1', '2', '3']}) + + grouper = pd.TimeGrouper(key='time', freq='h') + result = test.groupby(grouper)['data'].nunique() + expected = test[test.time.notnull()].groupby(grouper)['data'].nunique() + tm.assert_series_equal(result, expected)