@@ -223,14 +223,13 @@ def __iter__(self):
223
223
def _multi_iter (self ):
224
224
data = self .obj
225
225
group_index = self ._group_index
226
- comp_ids , _ , ngroups = _compress_group_index (group_index )
226
+ comp_ids , obs_ids = _compress_group_index (group_index )
227
+ ngroups = len (obs_ids )
227
228
label_list = [ping .labels for ping in self .groupings ]
228
229
level_list = [ping .group_index for ping in self .groupings ]
229
230
mapper = _KeyMapper (comp_ids , ngroups , label_list , level_list )
230
231
231
232
for label , group in self ._generate_groups (data , comp_ids , ngroups ):
232
- if group is None :
233
- continue
234
233
key = mapper .get_key (label )
235
234
yield key , group
236
235
@@ -335,7 +334,8 @@ def _cython_agg_general(self, how):
335
334
# aggregate all the columns at once?)
336
335
337
336
group_index = self ._group_index
338
- comp_ids , obs_group_ids , max_group = _compress_group_index (group_index )
337
+ comp_ids , obs_group_ids = _compress_group_index (group_index )
338
+ max_group = len (obs_group_ids )
339
339
340
340
output = {}
341
341
for name , obj in self ._iterate_slices ():
@@ -355,6 +355,32 @@ def _cython_agg_general(self, how):
355
355
356
356
return self ._wrap_aggregated_output (output , mask , obs_group_ids )
357
357
358
+ def _python_agg_general (self , func , * args , ** kwargs ):
359
+ agg_func = lambda x : func (x , * args , ** kwargs )
360
+
361
+ group_index = self ._group_index
362
+ comp_ids , obs_group_ids = _compress_group_index (group_index )
363
+ max_group = len (obs_group_ids )
364
+
365
+ # iterate through "columns" ex exclusions to populate output dict
366
+ output = {}
367
+ for name , obj in self ._iterate_slices ():
368
+ try :
369
+ result , counts = self ._aggregate_series (obj , agg_func ,
370
+ comp_ids , max_group )
371
+ output [name ] = result
372
+ except TypeError :
373
+ continue
374
+
375
+ if len (output ) == 0 :
376
+ return self ._python_apply_general (func , * args , ** kwargs )
377
+
378
+ mask = counts .ravel () > 0
379
+ for name , result in output .iteritems ():
380
+ output [name ] = result [mask ]
381
+
382
+ return self ._wrap_aggregated_output (output , mask , obs_group_ids )
383
+
358
384
@property
359
385
def _group_index (self ):
360
386
result = get_group_index ([ping .labels for ping in self .groupings ],
@@ -380,31 +406,6 @@ def _get_group_levels(self, mask, obs_ids):
380
406
381
407
return name_list
382
408
383
- def _python_agg_general (self , func , * args , ** kwargs ):
384
- agg_func = lambda x : func (x , * args , ** kwargs )
385
-
386
- group_index = self ._group_index
387
- comp_ids , obs_group_ids , max_group = _compress_group_index (group_index )
388
-
389
- # iterate through "columns" ex exclusions to populate output dict
390
- output = {}
391
- for name , obj in self ._iterate_slices ():
392
- try :
393
- result , counts = self ._aggregate_series (obj , agg_func ,
394
- comp_ids , max_group )
395
- output [name ] = result
396
- except TypeError :
397
- continue
398
-
399
- if len (output ) == 0 :
400
- return self ._python_apply_general (func , * args , ** kwargs )
401
-
402
- mask = counts .ravel () > 0
403
- for name , result in output .iteritems ():
404
- output [name ] = result [mask ]
405
-
406
- return self ._wrap_aggregated_output (output , mask , obs_group_ids )
407
-
408
409
def _aggregate_series (self , obj , func , group_index , ngroups ):
409
410
try :
410
411
return self ._aggregate_series_fast (obj , func , group_index , ngroups )
@@ -431,8 +432,6 @@ def _aggregate_series_pure_python(self, obj, func, group_index, ngroups):
431
432
result = None
432
433
433
434
for label , group in self ._generate_groups (obj , group_index , ngroups ):
434
- if group is None :
435
- continue
436
435
res = func (group )
437
436
if result is None :
438
437
try :
@@ -597,7 +596,6 @@ def __iter__(self):
597
596
return iter (self .indices )
598
597
599
598
_labels = None
600
- _ids = None
601
599
_counts = None
602
600
_group_index = None
603
601
@@ -615,13 +613,6 @@ def labels(self):
615
613
self ._make_labels ()
616
614
return self ._labels
617
615
618
- @property
619
- def ids (self ):
620
- if self ._ids is None :
621
- index = self .group_index
622
- self ._ids = dict (zip (range (len (index )), index ))
623
- return self ._ids
624
-
625
616
@property
626
617
def counts (self ):
627
618
if self ._counts is None :
@@ -1297,10 +1288,11 @@ def _get_slice(slob):
1297
1288
ngroups )
1298
1289
1299
1290
for i , (start , end ) in enumerate (zip (starts , ends )):
1300
- if start == end :
1301
- yield i , None
1302
- else :
1303
- yield i , _get_slice (slice (start , end ))
1291
+ # Since I'm now compressing the group ids, it's now not "possible" to
1292
+ # produce empty slices because such groups would not be observed in the
1293
+ # data
1294
+ assert (start < end )
1295
+ yield i , _get_slice (slice (start , end ))
1304
1296
1305
1297
def get_group_index (label_list , shape ):
1306
1298
if len (label_list ) == 1 :
@@ -1390,7 +1382,6 @@ def _compress_group_index(group_index, sort=True):
1390
1382
1391
1383
group_index = _ensure_int64 (group_index )
1392
1384
comp_ids = table .get_labels_groupby (group_index , uniques )
1393
- max_group = len (uniques )
1394
1385
1395
1386
# these are the ones we observed
1396
1387
obs_group_ids = np .array (uniques , dtype = 'i8' )
@@ -1406,7 +1397,7 @@ def _compress_group_index(group_index, sort=True):
1406
1397
1407
1398
obs_group_ids = obs_group_ids .take (sorter )
1408
1399
1409
- return comp_ids , obs_group_ids , max_group
1400
+ return comp_ids , obs_group_ids
1410
1401
1411
1402
def _groupby_indices (values ):
1412
1403
if values .dtype != np .object_ :
0 commit comments