Skip to content

Commit 151947c

Browse files
committed
COMPAT: platform_int fixes in groupby ops, #11189
1 parent 3fb802a commit 151947c

File tree

4 files changed

+23
-14
lines changed

4 files changed

+23
-14
lines changed

pandas/core/groupby.py

+17-9
Original file line numberDiff line numberDiff line change
@@ -1379,8 +1379,9 @@ def size(self):
13791379
13801380
"""
13811381
ids, _, ngroup = self.group_info
1382+
ids = com._ensure_platform_int(ids)
13821383
out = np.bincount(ids[ids != -1], minlength=ngroup)
1383-
return Series(out, index=self.result_index)
1384+
return Series(out, index=self.result_index, dtype='int64')
13841385

13851386
@cache_readonly
13861387
def _max_groupsize(self):
@@ -1808,15 +1809,17 @@ def indices(self):
18081809
@cache_readonly
18091810
def group_info(self):
18101811
ngroups = self.ngroups
1811-
obs_group_ids = np.arange(ngroups, dtype='int64')
1812+
obs_group_ids = np.arange(ngroups)
18121813
rep = np.diff(np.r_[0, self.bins])
18131814

1815+
rep = com._ensure_platform_int(rep)
18141816
if ngroups == len(self.bins):
1815-
comp_ids = np.repeat(np.arange(ngroups, dtype='int64'), rep)
1817+
comp_ids = np.repeat(np.arange(ngroups), rep)
18161818
else:
1817-
comp_ids = np.repeat(np.r_[-1, np.arange(ngroups, dtype='int64')], rep)
1819+
comp_ids = np.repeat(np.r_[-1, np.arange(ngroups)], rep)
18181820

1819-
return comp_ids, obs_group_ids, ngroups
1821+
return comp_ids.astype('int64', copy=False), \
1822+
obs_group_ids.astype('int64', copy=False), ngroups
18201823

18211824
@cache_readonly
18221825
def ngroups(self):
@@ -2565,8 +2568,8 @@ def nunique(self, dropna=True):
25652568

25662569
# group boundries are where group ids change
25672570
# unique observations are where sorted values change
2568-
idx = com._ensure_int64(np.r_[0, 1 + np.nonzero(ids[1:] != ids[:-1])[0]])
2569-
inc = com._ensure_int64(np.r_[1, val[1:] != val[:-1]])
2571+
idx = np.r_[0, 1 + np.nonzero(ids[1:] != ids[:-1])[0]]
2572+
inc = np.r_[1, val[1:] != val[:-1]]
25702573

25712574
# 1st item of each group is a new unique observation
25722575
mask = isnull(val)
@@ -2577,7 +2580,7 @@ def nunique(self, dropna=True):
25772580
inc[mask & np.r_[False, mask[:-1]]] = 0
25782581
inc[idx] = 1
25792582

2580-
out = np.add.reduceat(inc, idx)
2583+
out = np.add.reduceat(inc, idx).astype('int64', copy=False)
25812584
return Series(out if ids[0] != -1 else out[1:],
25822585
index=self.grouper.result_index,
25832586
name=self.name)
@@ -2666,6 +2669,8 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
26662669
mi = MultiIndex(levels=levels, labels=labels, names=names,
26672670
verify_integrity=False)
26682671

2672+
if com.is_integer_dtype(out):
2673+
out = com._ensure_int64(out)
26692674
return Series(out, index=mi)
26702675

26712676
# for compat. with algos.value_counts need to ensure every
@@ -2695,6 +2700,8 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
26952700
mi = MultiIndex(levels=levels, labels=labels, names=names,
26962701
verify_integrity=False)
26972702

2703+
if com.is_integer_dtype(out):
2704+
out = com._ensure_int64(out)
26982705
return Series(out, index=mi)
26992706

27002707
def count(self):
@@ -2703,9 +2710,10 @@ def count(self):
27032710
val = self.obj.get_values()
27042711

27052712
mask = (ids != -1) & ~isnull(val)
2713+
ids = com._ensure_platform_int(ids)
27062714
out = np.bincount(ids[mask], minlength=ngroups) if ngroups != 0 else []
27072715

2708-
return Series(out, index=self.grouper.result_index, name=self.name)
2716+
return Series(out, index=self.grouper.result_index, name=self.name, dtype='int64')
27092717

27102718
def _apply_to_column_groupbys(self, func):
27112719
""" return a pass thru """

pandas/core/series.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1137,7 +1137,7 @@ def count(self, level=None):
11371137
lev = lev.insert(cnt, _get_na_value(lev.dtype.type))
11381138

11391139
out = np.bincount(lab[notnull(self.values)], minlength=len(lev))
1140-
return self._constructor(out, index=lev).__finalize__(self)
1140+
return self._constructor(out, index=lev, dtype='int64').__finalize__(self)
11411141

11421142
def mode(self):
11431143
"""Returns the mode(s) of the dataset.

pandas/tests/test_tseries.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import pandas.lib as lib
1010
import pandas._period as period
1111
import pandas.algos as algos
12+
from pandas.core import common as com
1213
from pandas.tseries.holiday import Holiday, SA, next_monday,USMartinLutherKingJr,USMemorialDay,AbstractHolidayCalendar
1314
import datetime
1415
from pandas import DateOffset
@@ -480,10 +481,10 @@ def test_group_ohlc():
480481
def _check(dtype):
481482
obj = np.array(np.random.randn(20),dtype=dtype)
482483

483-
bins = np.array([6, 12, 20], dtype=np.int64)
484+
bins = np.array([6, 12, 20])
484485
out = np.zeros((3, 4), dtype)
485486
counts = np.zeros(len(out), dtype=np.int64)
486-
labels = np.repeat(np.arange(3, dtype='int64'), np.diff(np.r_[0, bins]))
487+
labels = com._ensure_int64(np.repeat(np.arange(3), np.diff(np.r_[0, bins])))
487488

488489
func = getattr(algos,'group_ohlc_%s' % dtype)
489490
func(out, counts, obj[:, None], labels)

pandas/tseries/tests/test_resample.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -936,7 +936,7 @@ def test_resample_group_info(self): # GH10914
936936
mask = np.r_[True, vals[1:] != vals[:-1]]
937937
mask |= np.r_[True, bins[1:] != bins[:-1]]
938938

939-
arr = np.bincount(bins[mask] - 1, minlength=len(ix))
939+
arr = np.bincount(bins[mask] - 1, minlength=len(ix)).astype('int64',copy=False)
940940
right = Series(arr, index=ix)
941941

942942
assert_series_equal(left, right)
@@ -950,7 +950,7 @@ def test_resample_size(self):
950950
ix = date_range(start=left.index.min(), end=ts.index.max(), freq='7T')
951951

952952
bins = np.searchsorted(ix.values, ts.index.values, side='right')
953-
val = np.bincount(bins, minlength=len(ix) + 1)[1:]
953+
val = np.bincount(bins, minlength=len(ix) + 1)[1:].astype('int64',copy=False)
954954

955955
right = Series(val, index=ix)
956956
assert_series_equal(left, right)

0 commit comments

Comments
 (0)