Skip to content

Commit e67a981

Browse files
committed
Merge pull request #10918 from behzadnouri/bin-gr-info
BUG: closes bug in BinGrouper.group_info where returned values are not compatible with base class
2 parents af5089e + c42ba30 commit e67a981

File tree

3 files changed

+30
-2
lines changed

3 files changed

+30
-2
lines changed

doc/source/whatsnew/v0.17.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -747,6 +747,7 @@ Bug Fixes
747747
- Bug in ``Index`` construction with a mixed list of tuples (:issue:`10697`)
748748
- Bug in ``DataFrame.reset_index`` when index contains `NaT`. (:issue:`10388`)
749749
- Bug in ``ExcelReader`` when worksheet is empty (:issue:`6403`)
750+
- Bug in ``BinGrouper.group_info`` where returned values are not compatible with base class (:issue:`10914`)
750751

751752

752753
- Bug causing ``DataFrame.where`` to not respect the ``axis`` parameter when the frame has a symmetric shape. (:issue:`9736`)

pandas/core/groupby.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1790,8 +1790,10 @@ def indices(self):
17901790

17911791
@cache_readonly
17921792
def group_info(self):
1793-
# for compat
1794-
return self.bins, self.binlabels, self.ngroups
1793+
ngroups = self.ngroups
1794+
obs_group_ids = np.arange(ngroups)
1795+
comp_ids = np.repeat(np.arange(ngroups), np.diff(np.r_[0, self.bins]))
1796+
return comp_ids, obs_group_ids, ngroups
17951797

17961798
@cache_readonly
17971799
def ngroups(self):

pandas/tseries/tests/test_resample.py

+25
Original file line numberDiff line numberDiff line change
@@ -916,6 +916,31 @@ def test_resample_timegrouper(self):
916916
result = df.groupby(pd.Grouper(freq='M', key='A')).count()
917917
assert_frame_equal(result, expected)
918918

919+
def test_resample_group_info(self): # GH10914
920+
for n, k in product((10000, 100000), (10, 100, 1000)):
921+
dr = date_range(start='2015-08-27', periods=n // 10, freq='T')
922+
ts = Series(np.random.randint(0, n // k, n),
923+
index=np.random.choice(dr, n))
924+
925+
left = ts.resample('30T', how='nunique')
926+
ix = date_range(start=ts.index.min(),
927+
end=ts.index.max(),
928+
freq='30T')
929+
930+
vals = ts.values
931+
bins = np.searchsorted(ix.values, ts.index, side='right')
932+
933+
sorter = np.lexsort((vals, bins))
934+
vals, bins = vals[sorter], bins[sorter]
935+
936+
mask = np.r_[True, vals[1:] != vals[:-1]]
937+
mask |= np.r_[True, bins[1:] != bins[:-1]]
938+
939+
arr = np.bincount(bins[mask] - 1, minlength=len(ix))
940+
right = Series(arr, index=ix)
941+
942+
assert_series_equal(left, right)
943+
919944
def test_resmaple_dst_anchor(self):
920945
# 5172
921946
dti = DatetimeIndex([datetime(2012, 11, 4, 23)], tz='US/Eastern')

0 commit comments

Comments
 (0)