diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 4708abe4d592e..868ae003b8cd5 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -585,3 +585,5 @@ Bug Fixes - Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`) - Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`) - Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`) + +- Bug in ``groupby().nunique()`` when using ``TimeGrouper`` and a gap existed in the dates the values wouldn't be correct (:issue:`13453`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 23c835318b0e6..ba2de295fa0a9 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -3032,7 +3032,7 @@ def nunique(self, dropna=True): # we might have duplications among the bins if len(res) != len(ri): res, out = np.zeros(len(ri), dtype=out.dtype), res - res[ids] = out + res[ids[idx]] = out return Series(res, index=ri, diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index d53446870beb1..59cbcab23b9e7 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -4159,6 +4159,19 @@ def test_nunique_with_empty_series(self): expected = pd.Series(name='name', dtype='int64') tm.assert_series_equal(result, expected) + def test_nunique_with_timegrouper(self): + # GH 13453 + test = pd.DataFrame({ + 'time': [Timestamp('2016-06-28 09:35:35'), + Timestamp('2016-06-28 16:09:30'), + Timestamp('2016-06-28 16:46:28')], + 'data': ['1', '2', '3']}).set_index('time') + result = test.groupby(pd.TimeGrouper(freq='h'))['data'].nunique() + expected = test.groupby( + pd.TimeGrouper(freq='h') + )['data'].apply(pd.Series.nunique) + tm.assert_series_equal(result, expected) + def test_numpy_compat(self): # see gh-12811 df = pd.DataFrame({'A': [1, 2, 1], 'B': [1, 2, 3]}) diff --git a/pandas/tests/tseries/test_resample.py b/pandas/tests/tseries/test_resample.py index afb44887fe7d1..95ce1b980a548 100755 --- a/pandas/tests/tseries/test_resample.py +++ b/pandas/tests/tseries/test_resample.py @@ -1939,6 +1939,28 @@ def test_resample_nunique(self): result = df.ID.groupby(pd.Grouper(freq='D')).nunique() assert_series_equal(result, expected) + def test_resample_nunique_with_date_gap(self): + # GH 13453 + index = pd.date_range('1-1-2000', '2-15-2000', freq='h') + index2 = pd.date_range('4-15-2000', '5-15-2000', freq='h') + index3 = index.append(index2) + s = pd.Series(range(len(index3)), index=index3) + r = s.resample('M') + + # Since all elements are unique, these should all be the same + results = [ + r.count(), + r.nunique(), + r.agg(pd.Series.nunique), + r.agg('nunique') + ] + + for res1 in results: + for res2 in results: + if res1 is not res2: + print('running') + assert_series_equal(res1, res2) + def test_resample_group_info(self): # GH10914 for n, k in product((10000, 100000), (10, 100, 1000)): dr = date_range(start='2015-08-27', periods=n // 10, freq='T')