@@ -471,7 +471,9 @@ def _set_selection_from_grouper(self):
471
471
grp = self .grouper
472
472
if self .as_index and getattr (grp ,'groupings' ,None ) is not None and self .obj .ndim > 1 :
473
473
ax = self .obj ._info_axis
474
- groupers = [ g .name for g in grp .groupings if g .level is None and g .name is not None and g .name in ax ]
474
+ groupers = [g .name for g in grp .groupings
475
+ if g .level is None and g .in_axis ]
476
+
475
477
if len (groupers ):
476
478
self ._group_selection = ax .difference (Index (groupers )).tolist ()
477
479
@@ -1844,6 +1846,8 @@ class Grouping(object):
1844
1846
obj :
1845
1847
name :
1846
1848
level :
1849
+ in_axis : if the Grouping is a column in self.obj and hence among
1850
+ Groupby.exclusions list
1847
1851
1848
1852
Returns
1849
1853
-------
@@ -1857,14 +1861,15 @@ class Grouping(object):
1857
1861
"""
1858
1862
1859
1863
def __init__ (self , index , grouper = None , obj = None , name = None , level = None ,
1860
- sort = True ):
1864
+ sort = True , in_axis = False ):
1861
1865
1862
1866
self .name = name
1863
1867
self .level = level
1864
1868
self .grouper = _convert_grouper (index , grouper )
1865
1869
self .index = index
1866
1870
self .sort = sort
1867
1871
self .obj = obj
1872
+ self .in_axis = in_axis
1868
1873
1869
1874
# right place for this?
1870
1875
if isinstance (grouper , (Series , Index )) and name is None :
@@ -2096,23 +2101,43 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True):
2096
2101
2097
2102
groupings = []
2098
2103
exclusions = []
2099
- for i , (gpr , level ) in enumerate (zip (keys , levels )):
2100
- name = None
2104
+
2105
+ # if the actual grouper should be obj[key]
2106
+ def is_in_axis (key ):
2107
+ if not _is_label_like (key ):
2108
+ try :
2109
+ obj ._data .items .get_loc (key )
2110
+ except Exception :
2111
+ return False
2112
+
2113
+ return True
2114
+
2115
+ # if the the grouper is obj[name]
2116
+ def is_in_obj (gpr ):
2101
2117
try :
2102
- obj ._data .items .get_loc (gpr )
2103
- in_axis = True
2118
+ return id (gpr ) == id (obj [gpr .name ])
2104
2119
except Exception :
2105
- in_axis = False
2120
+ return False
2121
+
2122
+ for i , (gpr , level ) in enumerate (zip (keys , levels )):
2106
2123
2107
- if _is_label_like (gpr ) or in_axis :
2108
- exclusions .append (gpr )
2109
- name = gpr
2110
- gpr = obj [gpr ]
2124
+ if is_in_obj (gpr ): # df.groupby(df['name'])
2125
+ in_axis , name = True , gpr .name
2126
+ exclusions .append (name )
2127
+
2128
+ elif is_in_axis (gpr ): # df.groupby('name')
2129
+ in_axis , name , gpr = True , gpr , obj [gpr ]
2130
+ exclusions .append (name )
2131
+
2132
+ else :
2133
+ in_axis , name = False , None
2111
2134
2112
2135
if isinstance (gpr , Categorical ) and len (gpr ) != len (obj ):
2113
2136
raise ValueError ("Categorical grouper must have len(grouper) == len(data)" )
2114
2137
2115
- ping = Grouping (group_axis , gpr , obj = obj , name = name , level = level , sort = sort )
2138
+ ping = Grouping (group_axis , gpr , obj = obj , name = name ,
2139
+ level = level , sort = sort , in_axis = in_axis )
2140
+
2116
2141
groupings .append (ping )
2117
2142
2118
2143
if len (groupings ) == 0 :
@@ -2647,18 +2672,7 @@ def aggregate(self, arg, *args, **kwargs):
2647
2672
result = self ._aggregate_generic (arg , * args , ** kwargs )
2648
2673
2649
2674
if not self .as_index :
2650
- if isinstance (result .index , MultiIndex ):
2651
- zipped = zip (result .index .levels , result .index .labels ,
2652
- result .index .names )
2653
- for i , (lev , lab , name ) in enumerate (zipped ):
2654
- result .insert (i , name ,
2655
- com .take_nd (lev .values , lab ,
2656
- allow_fill = False ))
2657
- result = result .consolidate ()
2658
- else :
2659
- values = result .index .values
2660
- name = self .grouper .groupings [0 ].name
2661
- result .insert (0 , name , values )
2675
+ self ._insert_inaxis_grouper_inplace (result )
2662
2676
result .index = np .arange (len (result ))
2663
2677
2664
2678
return result .convert_objects ()
@@ -3180,6 +3194,17 @@ def _get_data_to_aggregate(self):
3180
3194
else :
3181
3195
return obj ._data , 1
3182
3196
3197
+ def _insert_inaxis_grouper_inplace (self , result ):
3198
+ # zip in reverse so we can always insert at loc 0
3199
+ izip = zip (* map (reversed , (
3200
+ self .grouper .names ,
3201
+ self .grouper .get_group_levels (),
3202
+ [grp .in_axis for grp in self .grouper .groupings ])))
3203
+
3204
+ for name , lev , in_axis in izip :
3205
+ if in_axis :
3206
+ result .insert (0 , name , lev )
3207
+
3183
3208
def _wrap_aggregated_output (self , output , names = None ):
3184
3209
agg_axis = 0 if self .axis == 1 else 1
3185
3210
agg_labels = self ._obj_with_exclusions ._get_axis (agg_axis )
@@ -3188,11 +3213,7 @@ def _wrap_aggregated_output(self, output, names=None):
3188
3213
3189
3214
if not self .as_index :
3190
3215
result = DataFrame (output , columns = output_keys )
3191
- group_levels = self .grouper .get_group_levels ()
3192
- zipped = zip (self .grouper .names , group_levels )
3193
-
3194
- for i , (name , labels ) in enumerate (zipped ):
3195
- result .insert (i , name , labels )
3216
+ self ._insert_inaxis_grouper_inplace (result )
3196
3217
result = result .consolidate ()
3197
3218
else :
3198
3219
index = self .grouper .result_index
@@ -3209,11 +3230,7 @@ def _wrap_agged_blocks(self, items, blocks):
3209
3230
mgr = BlockManager (blocks , [items , index ])
3210
3231
result = DataFrame (mgr )
3211
3232
3212
- group_levels = self .grouper .get_group_levels ()
3213
- zipped = zip (self .grouper .names , group_levels )
3214
-
3215
- for i , (name , labels ) in enumerate (zipped ):
3216
- result .insert (i , name , labels )
3233
+ self ._insert_inaxis_grouper_inplace (result )
3217
3234
result = result .consolidate ()
3218
3235
else :
3219
3236
index = self .grouper .result_index
0 commit comments