@@ -1141,17 +1141,11 @@ def nunique(self, dropna=True):
1141
1141
Number of unique values within each group.
1142
1142
"""
1143
1143
ids , _ , _ = self .grouper .group_info
1144
- # breakpoint()
1145
1144
1146
1145
val = self .obj ._internal_get_values ()
1147
- # breakpoint()
1148
1146
1149
- # # GH 27951
1150
- # breakpoint()
1147
+ # GH 27951
1151
1148
val [isna (val )] = np .datetime64 ("NaT" )
1152
- # mask = notna(val)
1153
- # ids = ids[mask]
1154
- # val = val[mask]
1155
1149
1156
1150
try :
1157
1151
sorter = np .lexsort ((val , ids ))
@@ -1163,29 +1157,22 @@ def nunique(self, dropna=True):
1163
1157
_isna = lambda a : a == - 1
1164
1158
else :
1165
1159
_isna = isna
1166
- # breakpoint()
1167
1160
1168
1161
ids , val = ids [sorter ], val [sorter ]
1169
- # breakpoint()
1170
1162
1171
1163
# group boundaries are where group ids change
1172
1164
# unique observations are where sorted values change
1173
- # idx: ids at which groups change
1174
1165
idx = np .r_ [0 , 1 + np .nonzero (ids [1 :] != ids [:- 1 ])[0 ]]
1175
- # inc: 1 if it's a new value, 0 else
1176
1166
inc = np .r_ [1 , val [1 :] != val [:- 1 ]]
1177
- # breakpoint()
1178
1167
1179
1168
# 1st item of each group is a new unique observation
1180
1169
mask = _isna (val )
1181
1170
if dropna :
1182
- inc [idx ] = 1 # 1st itme of each group is defo unique!
1183
- inc [mask ] = 0 # Nas should be excluded, we don't like them!
1171
+ inc [idx ] = 1
1172
+ inc [mask ] = 0
1184
1173
else :
1185
- inc [mask & np .r_ [False , mask [:- 1 ]]] = 0 # only set it to
1186
- # zero if there are two consecutive nans?
1187
- inc [idx ] = 1 # As before, first of each group is defo unique
1188
- # breakpoint()
1174
+ inc [mask & np .r_ [False , mask [:- 1 ]]] = 0
1175
+ inc [idx ] = 1
1189
1176
1190
1177
out = np .add .reduceat (inc , idx ).astype ("int64" , copy = False )
1191
1178
if len (ids ):
@@ -1199,13 +1186,11 @@ def nunique(self, dropna=True):
1199
1186
else :
1200
1187
res = out [1 :]
1201
1188
ri = self .grouper .result_index
1202
- # breakpoint()
1203
1189
1204
1190
# we might have duplications among the bins
1205
1191
if len (res ) != len (ri ):
1206
1192
res , out = np .zeros (len (ri ), dtype = out .dtype ), res
1207
1193
res [ids [idx ]] = out
1208
- # breakpoint()
1209
1194
1210
1195
return Series (res , index = ri , name = self ._selection_name )
1211
1196
0 commit comments