@@ -1379,8 +1379,9 @@ def size(self):
1379
1379
1380
1380
"""
1381
1381
ids , _ , ngroup = self .group_info
1382
+ ids = com ._ensure_platform_int (ids )
1382
1383
out = np .bincount (ids [ids != - 1 ], minlength = ngroup )
1383
- return Series (out , index = self .result_index )
1384
+ return Series (out , index = self .result_index , dtype = 'int64' )
1384
1385
1385
1386
@cache_readonly
1386
1387
def _max_groupsize (self ):
@@ -1808,15 +1809,17 @@ def indices(self):
1808
1809
@cache_readonly
1809
1810
def group_info (self ):
1810
1811
ngroups = self .ngroups
1811
- obs_group_ids = np .arange (ngroups , dtype = 'int64' )
1812
+ obs_group_ids = np .arange (ngroups )
1812
1813
rep = np .diff (np .r_ [0 , self .bins ])
1813
1814
1815
+ rep = com ._ensure_platform_int (rep )
1814
1816
if ngroups == len (self .bins ):
1815
- comp_ids = np .repeat (np .arange (ngroups , dtype = 'int64' ), rep )
1817
+ comp_ids = np .repeat (np .arange (ngroups ), rep )
1816
1818
else :
1817
- comp_ids = np .repeat (np .r_ [- 1 , np .arange (ngroups , dtype = 'int64' )], rep )
1819
+ comp_ids = np .repeat (np .r_ [- 1 , np .arange (ngroups )], rep )
1818
1820
1819
- return comp_ids , obs_group_ids , ngroups
1821
+ return comp_ids .astype ('int64' , copy = False ), \
1822
+ obs_group_ids .astype ('int64' , copy = False ), ngroups
1820
1823
1821
1824
@cache_readonly
1822
1825
def ngroups (self ):
@@ -2565,8 +2568,8 @@ def nunique(self, dropna=True):
2565
2568
2566
2569
# group boundries are where group ids change
2567
2570
# unique observations are where sorted values change
2568
- idx = com . _ensure_int64 ( np .r_ [0 , 1 + np .nonzero (ids [1 :] != ids [:- 1 ])[0 ]])
2569
- inc = com . _ensure_int64 ( np .r_ [1 , val [1 :] != val [:- 1 ]])
2571
+ idx = np .r_ [0 , 1 + np .nonzero (ids [1 :] != ids [:- 1 ])[0 ]]
2572
+ inc = np .r_ [1 , val [1 :] != val [:- 1 ]]
2570
2573
2571
2574
# 1st item of each group is a new unique observation
2572
2575
mask = isnull (val )
@@ -2577,7 +2580,7 @@ def nunique(self, dropna=True):
2577
2580
inc [mask & np .r_ [False , mask [:- 1 ]]] = 0
2578
2581
inc [idx ] = 1
2579
2582
2580
- out = np .add .reduceat (inc , idx )
2583
+ out = np .add .reduceat (inc , idx ). astype ( 'int64' , copy = False )
2581
2584
return Series (out if ids [0 ] != - 1 else out [1 :],
2582
2585
index = self .grouper .result_index ,
2583
2586
name = self .name )
@@ -2666,6 +2669,8 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
2666
2669
mi = MultiIndex (levels = levels , labels = labels , names = names ,
2667
2670
verify_integrity = False )
2668
2671
2672
+ if com .is_integer_dtype (out ):
2673
+ out = com ._ensure_int64 (out )
2669
2674
return Series (out , index = mi )
2670
2675
2671
2676
# for compat. with algos.value_counts need to ensure every
@@ -2695,6 +2700,8 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
2695
2700
mi = MultiIndex (levels = levels , labels = labels , names = names ,
2696
2701
verify_integrity = False )
2697
2702
2703
+ if com .is_integer_dtype (out ):
2704
+ out = com ._ensure_int64 (out )
2698
2705
return Series (out , index = mi )
2699
2706
2700
2707
def count (self ):
@@ -2703,9 +2710,10 @@ def count(self):
2703
2710
val = self .obj .get_values ()
2704
2711
2705
2712
mask = (ids != - 1 ) & ~ isnull (val )
2713
+ ids = com ._ensure_platform_int (ids )
2706
2714
out = np .bincount (ids [mask ], minlength = ngroups ) if ngroups != 0 else []
2707
2715
2708
- return Series (out , index = self .grouper .result_index , name = self .name )
2716
+ return Series (out , index = self .grouper .result_index , name = self .name , dtype = 'int64' )
2709
2717
2710
2718
def _apply_to_column_groupbys (self , func ):
2711
2719
""" return a pass thru """
0 commit comments