@@ -149,7 +149,7 @@ def _obj_with_exclusions(self):
149
149
150
150
@property
151
151
def _group_shape (self ):
152
- return tuple (len (ping .ids ) for ping in self .groupings )
152
+ return tuple (len (ping .counts ) for ping in self .groupings )
153
153
154
154
@property
155
155
def _agg_stride_shape (self ):
@@ -535,24 +535,35 @@ def __init__(self, index, grouper=None, name=None, level=None):
535
535
if isinstance (grouper , Series ) and name is None :
536
536
self .name = grouper .name
537
537
538
+ # pre-computed
539
+ self ._was_factor = False
540
+
538
541
if level is not None :
539
542
if not isinstance (level , int ):
540
543
assert (level in index .names )
541
544
level = index .names .index (level )
542
545
543
546
inds = index .labels [level ]
544
- labels = index .levels [level ].take (inds )
547
+ level_index = index .levels [level ]
548
+
545
549
if self .name is None :
546
550
self .name = index .names [level ]
547
551
552
+ # XXX complete hack
553
+
554
+ level_values = index .levels [level ].take (inds )
548
555
if grouper is not None :
549
- self .grouper = labels .map (self .grouper )
556
+ self .grouper = level_values .map (self .grouper )
550
557
else :
551
- self .grouper = labels
552
-
553
- # no level passed
554
- if not isinstance (self .grouper , np .ndarray ):
555
- self .grouper = self .index .map (self .grouper )
558
+ self ._was_factor = True
559
+ self ._labels = inds
560
+ self ._group_index = level_index
561
+ self ._counts = lib .group_count (inds , len (level_index ))
562
+ self .grouper = level_values
563
+ else :
564
+ # no level passed
565
+ if not isinstance (self .grouper , np .ndarray ):
566
+ self .grouper = self .index .map (self .grouper )
556
567
557
568
def __repr__ (self ):
558
569
return 'Grouping(%s)' % self .name
@@ -563,6 +574,7 @@ def __iter__(self):
563
574
_labels = None
564
575
_ids = None
565
576
_counts = None
577
+ _group_index = None
566
578
567
579
@cache_readonly
568
580
def indices (self ):
@@ -577,7 +589,11 @@ def labels(self):
577
589
@property
578
590
def ids (self ):
579
591
if self ._ids is None :
580
- self ._make_labels ()
592
+ if self ._was_factor :
593
+ index = self ._group_index
594
+ self ._ids = dict (zip (range (len (index )), index ))
595
+ else :
596
+ self ._make_labels ()
581
597
return self ._ids
582
598
583
599
@cache_readonly
@@ -590,13 +606,21 @@ def counts(self):
590
606
self ._make_labels ()
591
607
return self ._counts
592
608
593
- @cache_readonly
609
+ @property
594
610
def group_index (self ):
595
- return Index ([self .ids [i ] for i in range (len (self .ids ))])
611
+ if self ._group_index is None :
612
+ ids = self .ids
613
+ values = np .arange (len (self .ids ), dtype = 'O' )
614
+ self ._group_index = Index (lib .lookup_values (values , ids ))
615
+ return self ._group_index
596
616
597
617
def _make_labels (self ):
598
- ids , labels , counts = _group_labels (self .grouper )
599
- sids , slabels , scounts = sort_group_labels (ids , labels , counts )
618
+ if self ._was_factor : # pragma: no cover
619
+ raise Exception ('Should not call this method grouping by level' )
620
+ else :
621
+ ids , labels , counts = _group_labels (self .grouper )
622
+ sids , slabels , scounts = sort_group_labels (ids , labels , counts )
623
+
600
624
self ._labels = slabels
601
625
self ._ids = sids
602
626
self ._counts = scounts
@@ -768,7 +792,12 @@ def _get_index():
768
792
if len (self .groupings ) > 1 :
769
793
index = MultiIndex .from_tuples (keys , names = key_names )
770
794
else :
771
- index = Index (keys , name = key_names [0 ])
795
+ ping = self .groupings [0 ]
796
+ if len (keys ) == len (ping .counts ):
797
+ index = ping .group_index
798
+ index .name = key_names [0 ]
799
+ else :
800
+ index = Index (keys , name = key_names [0 ])
772
801
return index
773
802
774
803
if isinstance (values [0 ], Series ):
@@ -981,7 +1010,10 @@ def _aggregate_generic(self, func, *args, **kwargs):
981
1010
982
1011
index_name = (self .groupings [0 ].name
983
1012
if len (self .groupings ) == 1 else None )
984
- result_index = Index (sorted (result ), name = index_name )
1013
+
1014
+ result_index = self .groupings [0 ].group_index
1015
+
1016
+ # result_index = Index(sorted(result), name=index_name)
985
1017
986
1018
if result :
987
1019
if axis == 0 :
@@ -1062,25 +1094,36 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
1062
1094
not_indexed_same = not_indexed_same )
1063
1095
else :
1064
1096
if len (self .groupings ) > 1 :
1065
- keys = MultiIndex .from_tuples (keys , names = key_names )
1097
+ key_index = MultiIndex .from_tuples (keys , names = key_names )
1066
1098
else :
1067
- keys = Index (keys , name = key_names [0 ])
1099
+ ping = self .groupings [0 ]
1100
+ if len (keys ) == len (ping .counts ):
1101
+ key_index = ping .group_index
1102
+ key_index .name = key_names [0 ]
1103
+
1104
+ key_lookup = Index (keys )
1105
+ indexer = key_lookup .get_indexer (key_index )
1106
+
1107
+ # reorder the values
1108
+ values = [values [i ] for i in indexer ]
1109
+ else :
1110
+ key_index = Index (keys , name = key_names [0 ])
1068
1111
1069
1112
if isinstance (values [0 ], np .ndarray ):
1070
1113
if self .axis == 0 :
1071
1114
stacked_values = np .vstack ([np .asarray (x )
1072
1115
for x in values ])
1073
1116
columns = values [0 ].index
1074
- index = keys
1117
+ index = key_index
1075
1118
else :
1076
1119
stacked_values = np .vstack ([np .asarray (x )
1077
1120
for x in values ]).T
1078
1121
index = values [0 ].index
1079
- columns = keys
1122
+ columns = key_index
1080
1123
return DataFrame (stacked_values , index = index ,
1081
1124
columns = columns )
1082
1125
else :
1083
- return Series (values , index = keys )
1126
+ return Series (values , index = key_index )
1084
1127
1085
1128
def transform (self , func , * args , ** kwargs ):
1086
1129
"""
@@ -1417,6 +1460,11 @@ def _group_labels(values):
1417
1460
1418
1461
def sort_group_labels (ids , labels , counts ):
1419
1462
n = len (ids )
1463
+
1464
+ # corner all NA case
1465
+ if n == 0 :
1466
+ return ids , labels , counts
1467
+
1420
1468
rng = np .arange (n )
1421
1469
values = Series (ids , index = rng , dtype = object ).values
1422
1470
indexer = values .argsort ()
0 commit comments