diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index e72f7c6c6a6bf..9748a594a9b6f 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1379,8 +1379,9 @@ def size(self): """ ids, _, ngroup = self.group_info + ids = com._ensure_platform_int(ids) out = np.bincount(ids[ids != -1], minlength=ngroup) - return Series(out, index=self.result_index) + return Series(out, index=self.result_index, dtype='int64') @cache_readonly def _max_groupsize(self): @@ -1808,15 +1809,17 @@ def indices(self): @cache_readonly def group_info(self): ngroups = self.ngroups - obs_group_ids = np.arange(ngroups, dtype='int64') + obs_group_ids = np.arange(ngroups) rep = np.diff(np.r_[0, self.bins]) + rep = com._ensure_platform_int(rep) if ngroups == len(self.bins): - comp_ids = np.repeat(np.arange(ngroups, dtype='int64'), rep) + comp_ids = np.repeat(np.arange(ngroups), rep) else: - comp_ids = np.repeat(np.r_[-1, np.arange(ngroups, dtype='int64')], rep) + comp_ids = np.repeat(np.r_[-1, np.arange(ngroups)], rep) - return comp_ids, obs_group_ids, ngroups + return comp_ids.astype('int64', copy=False), \ + obs_group_ids.astype('int64', copy=False), ngroups @cache_readonly def ngroups(self): @@ -2565,8 +2568,8 @@ def nunique(self, dropna=True): # group boundries are where group ids change # unique observations are where sorted values change - idx = com._ensure_int64(np.r_[0, 1 + np.nonzero(ids[1:] != ids[:-1])[0]]) - inc = com._ensure_int64(np.r_[1, val[1:] != val[:-1]]) + idx = np.r_[0, 1 + np.nonzero(ids[1:] != ids[:-1])[0]] + inc = np.r_[1, val[1:] != val[:-1]] # 1st item of each group is a new unique observation mask = isnull(val) @@ -2577,7 +2580,7 @@ def nunique(self, dropna=True): inc[mask & np.r_[False, mask[:-1]]] = 0 inc[idx] = 1 - out = np.add.reduceat(inc, idx) + out = np.add.reduceat(inc, idx).astype('int64', copy=False) return Series(out if ids[0] != -1 else out[1:], index=self.grouper.result_index, name=self.name) @@ -2666,6 +2669,8 @@ def value_counts(self, normalize=False, sort=True, ascending=False, mi = MultiIndex(levels=levels, labels=labels, names=names, verify_integrity=False) + if com.is_integer_dtype(out): + out = com._ensure_int64(out) return Series(out, index=mi) # for compat. with algos.value_counts need to ensure every @@ -2695,6 +2700,8 @@ def value_counts(self, normalize=False, sort=True, ascending=False, mi = MultiIndex(levels=levels, labels=labels, names=names, verify_integrity=False) + if com.is_integer_dtype(out): + out = com._ensure_int64(out) return Series(out, index=mi) def count(self): @@ -2703,9 +2710,10 @@ def count(self): val = self.obj.get_values() mask = (ids != -1) & ~isnull(val) + ids = com._ensure_platform_int(ids) out = np.bincount(ids[mask], minlength=ngroups) if ngroups != 0 else [] - return Series(out, index=self.grouper.result_index, name=self.name) + return Series(out, index=self.grouper.result_index, name=self.name, dtype='int64') def _apply_to_column_groupbys(self, func): """ return a pass thru """ diff --git a/pandas/core/series.py b/pandas/core/series.py index 03b2ea5597ab6..11645311467d5 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1137,7 +1137,7 @@ def count(self, level=None): lev = lev.insert(cnt, _get_na_value(lev.dtype.type)) out = np.bincount(lab[notnull(self.values)], minlength=len(lev)) - return self._constructor(out, index=lev).__finalize__(self) + return self._constructor(out, index=lev, dtype='int64').__finalize__(self) def mode(self): """Returns the mode(s) of the dataset. diff --git a/pandas/tests/test_tseries.py b/pandas/tests/test_tseries.py index 2e7fb2adf2fd4..b94c91f72802a 100644 --- a/pandas/tests/test_tseries.py +++ b/pandas/tests/test_tseries.py @@ -9,6 +9,7 @@ import pandas.lib as lib import pandas._period as period import pandas.algos as algos +from pandas.core import common as com from pandas.tseries.holiday import Holiday, SA, next_monday,USMartinLutherKingJr,USMemorialDay,AbstractHolidayCalendar import datetime from pandas import DateOffset @@ -480,10 +481,10 @@ def test_group_ohlc(): def _check(dtype): obj = np.array(np.random.randn(20),dtype=dtype) - bins = np.array([6, 12, 20], dtype=np.int64) + bins = np.array([6, 12, 20]) out = np.zeros((3, 4), dtype) counts = np.zeros(len(out), dtype=np.int64) - labels = np.repeat(np.arange(3, dtype='int64'), np.diff(np.r_[0, bins])) + labels = com._ensure_int64(np.repeat(np.arange(3), np.diff(np.r_[0, bins]))) func = getattr(algos,'group_ohlc_%s' % dtype) func(out, counts, obj[:, None], labels) diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index 7052348e2e6a4..b48f077bd6f6d 100644 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -936,7 +936,7 @@ def test_resample_group_info(self): # GH10914 mask = np.r_[True, vals[1:] != vals[:-1]] mask |= np.r_[True, bins[1:] != bins[:-1]] - arr = np.bincount(bins[mask] - 1, minlength=len(ix)) + arr = np.bincount(bins[mask] - 1, minlength=len(ix)).astype('int64',copy=False) right = Series(arr, index=ix) assert_series_equal(left, right) @@ -950,7 +950,7 @@ def test_resample_size(self): ix = date_range(start=left.index.min(), end=ts.index.max(), freq='7T') bins = np.searchsorted(ix.values, ts.index.values, side='right') - val = np.bincount(bins, minlength=len(ix) + 1)[1:] + val = np.bincount(bins, minlength=len(ix) + 1)[1:].astype('int64',copy=False) right = Series(val, index=ix) assert_series_equal(left, right)