Skip to content

Commit f797c1d

Browse files
Licht-Tjreback
authored andcommitted
BUG: Fix groupby nunique with NaT (pandas-dev#17624)
1 parent e6d8953 commit f797c1d

File tree

3 files changed

+21
-1
lines changed

3 files changed

+21
-1
lines changed

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -552,6 +552,7 @@ Groupby/Resample/Rolling
552552
- Bug in ``Series.resample(...).apply()`` where an empty ``Series`` modified the source index and did not return the name of a ``Series`` (:issue:`14313`)
553553
- Bug in ``.rolling(...).apply(...)`` with a ``DataFrame`` with a ``DatetimeIndex``, a ``window`` of a timedelta-convertible and ``min_periods >= 1` (:issue:`15305`)
554554
- Bug in ``DataFrame.groupby`` where index and column keys were not recognized correctly when the number of keys equaled the number of elements on the groupby axis (:issue:`16859`)
555+
- Bug in ``groupby.nunique()`` with ``TimeGrouper`` which cannot handle ``NaT`` correctly (:issue:`17575`)
555556

556557
Sparse
557558
^^^^^^

pandas/core/groupby.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -3177,7 +3177,13 @@ def nunique(self, dropna=True):
31773177

31783178
out = np.add.reduceat(inc, idx).astype('int64', copy=False)
31793179
if len(ids):
3180-
res = out if ids[0] != -1 else out[1:]
3180+
# NaN/NaT group exists if the head of ids is -1,
3181+
# so remove it from res and exclude its index from idx
3182+
if ids[0] == -1:
3183+
res = out[1:]
3184+
idx = idx[np.flatnonzero(idx)]
3185+
else:
3186+
res = out
31813187
else:
31823188
res = out[1:]
31833189
ri = self.grouper.result_index

pandas/tests/groupby/test_timegrouper.py

+13
Original file line numberDiff line numberDiff line change
@@ -608,3 +608,16 @@ def test_first_last_max_min_on_time_data(self):
608608
assert_frame_equal(grouped_ref.min(), grouped_test.min())
609609
assert_frame_equal(grouped_ref.first(), grouped_test.first())
610610
assert_frame_equal(grouped_ref.last(), grouped_test.last())
611+
612+
def test_nunique_with_timegrouper_and_nat(self):
613+
# GH 17575
614+
test = pd.DataFrame({
615+
'time': [Timestamp('2016-06-28 09:35:35'),
616+
pd.NaT,
617+
Timestamp('2016-06-28 16:46:28')],
618+
'data': ['1', '2', '3']})
619+
620+
grouper = pd.TimeGrouper(key='time', freq='h')
621+
result = test.groupby(grouper)['data'].nunique()
622+
expected = test[test.time.notnull()].groupby(grouper)['data'].nunique()
623+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)