Skip to content

Commit d19c5fe

Browse files
committed
BUG: resample with nunique
closes pandas-dev#12352
1 parent 286d304 commit d19c5fe

File tree

3 files changed

+39
-3
lines changed

3 files changed

+39
-3
lines changed

doc/source/whatsnew/v0.18.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1035,7 +1035,7 @@ Bug Fixes
10351035
- Bug in ``pd.concat`` while concatenating tz-aware NaT series. (:issue:`11693`, :issue:`11755`, :issue:`12217`)
10361036
- Bug in ``pd.read_stata`` with version <= 108 files (:issue:`12232`)
10371037
- Bug in ``Series.resample`` using a frequency of ``Nano`` when the index is a ``DatetimeIndex`` and contains non-zero nanosecond parts (:issue:`12037`)
1038-
1038+
- Bug in resampling with ``.nunique`` and a sparse index (:issue:`12352`)
10391039

10401040
- Bug in ``NaT`` subtraction from ``Timestamp`` or ``DatetimeIndex`` with timezones (:issue:`11718`)
10411041
- Bug in subtraction of ``Series`` of a single tz-aware ``Timestamp`` (:issue:`12290`)

pandas/core/groupby.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -2817,8 +2817,16 @@ def nunique(self, dropna=True):
28172817
inc[idx] = 1
28182818

28192819
out = np.add.reduceat(inc, idx).astype('int64', copy=False)
2820-
return Series(out if ids[0] != -1 else out[1:],
2821-
index=self.grouper.result_index,
2820+
res = out if ids[0] != -1 else out[1:]
2821+
ri = self.grouper.result_index
2822+
2823+
# we might have duplications among the bins
2824+
if len(res) != len(ri):
2825+
res, out = np.zeros(len(ri), dtype=out.dtype), res
2826+
res[ids] = out
2827+
2828+
return Series(res,
2829+
index=ri,
28222830
name=self.name)
28232831

28242832
@deprecate_kwarg('take_last', 'keep',

pandas/tseries/tests/test_resample.py

+28
Original file line numberDiff line numberDiff line change
@@ -1526,6 +1526,34 @@ def test_resample_timegrouper(self):
15261526
result = df.groupby(pd.Grouper(freq='M', key='A')).count()
15271527
assert_frame_equal(result, expected)
15281528

1529+
def test_resample_nunique(self):
1530+
1531+
# GH 12352
1532+
df = DataFrame({
1533+
'ID': {pd.Timestamp('2015-06-05 00:00:00'): '0010100903',
1534+
pd.Timestamp('2015-06-08 00:00:00'): '0010150847'},
1535+
'DATE': {pd.Timestamp('2015-06-05 00:00:00'): '2015-06-05',
1536+
pd.Timestamp('2015-06-08 00:00:00'): '2015-06-08'}})
1537+
r = df.resample('D')
1538+
g = df.groupby(pd.Grouper(freq='D'))
1539+
expected = df.groupby(pd.TimeGrouper('D')).ID.apply(lambda x:
1540+
x.nunique())
1541+
self.assertEqual(expected.name, 'ID')
1542+
1543+
for t in [r, g]:
1544+
result = r.ID.nunique()
1545+
assert_series_equal(result, expected)
1546+
1547+
# TODO
1548+
# this should have name
1549+
# https://github.com/pydata/pandas/issues/12363
1550+
expected.name = None
1551+
result = df.ID.resample('D').nunique()
1552+
assert_series_equal(result, expected)
1553+
1554+
result = df.ID.groupby(pd.Grouper(freq='D')).nunique()
1555+
assert_series_equal(result, expected)
1556+
15291557
def test_resample_group_info(self): # GH10914
15301558
for n, k in product((10000, 100000), (10, 100, 1000)):
15311559
dr = date_range(start='2015-08-27', periods=n // 10, freq='T')

0 commit comments

Comments
 (0)