@@ -82,18 +82,10 @@ def __init__(self, values, index, level=-1, value_columns=None):
82
82
83
83
self .level = self .index ._get_level_number (level )
84
84
85
- levels = index .levels
86
- labels = index .labels
87
-
88
- def _make_index (lev , lab ):
89
- values = _make_index_array_level (lev .values , lab )
90
- i = lev ._simple_new (values , lev .name ,
91
- freq = getattr (lev , 'freq' , None ),
92
- tz = getattr (lev , 'tz' , None ))
93
- return i
94
-
95
- self .new_index_levels = [_make_index (lev , lab )
96
- for lev , lab in zip (levels , labels )]
85
+ # when index includes `nan`, need to lift levels/strides by 1
86
+ self .lift = 1 if - 1 in self .index .labels [self .level ] else 0
87
+
88
+ self .new_index_levels = list (index .levels )
97
89
self .new_index_names = list (index .names )
98
90
99
91
self .removed_name = self .new_index_names .pop (self .level )
@@ -134,10 +126,10 @@ def _make_selectors(self):
134
126
ngroups = len (obs_ids )
135
127
136
128
comp_index = _ensure_platform_int (comp_index )
137
- stride = self .index .levshape [self .level ]
129
+ stride = self .index .levshape [self .level ] + self . lift
138
130
self .full_shape = ngroups , stride
139
131
140
- selector = self .sorted_labels [- 1 ] + stride * comp_index
132
+ selector = self .sorted_labels [- 1 ] + stride * comp_index + self . lift
141
133
mask = np .zeros (np .prod (self .full_shape ), dtype = bool )
142
134
mask .put (selector , True )
143
135
@@ -166,20 +158,6 @@ def get_result(self):
166
158
values = com .take_nd (values , inds , axis = 1 )
167
159
columns = columns [inds ]
168
160
169
- # we might have a missing index
170
- if len (index ) != values .shape [0 ]:
171
- mask = isnull (index )
172
- if mask .any ():
173
- l = np .arange (len (index ))
174
- values , orig_values = (np .empty ((len (index ), values .shape [1 ])),
175
- values )
176
- values .fill (np .nan )
177
- values_indexer = com ._ensure_int64 (l [~ mask ])
178
- for i , j in enumerate (values_indexer ):
179
- values [j ] = orig_values [i ]
180
- else :
181
- index = index .take (self .unique_groups )
182
-
183
161
# may need to coerce categoricals here
184
162
if self .is_categorical is not None :
185
163
values = [ Categorical .from_array (values [:,i ],
@@ -220,9 +198,16 @@ def get_new_values(self):
220
198
221
199
def get_new_columns (self ):
222
200
if self .value_columns is None :
223
- return self .removed_level
201
+ if self .lift == 0 :
202
+ return self .removed_level
203
+
204
+ lev = self .removed_level
205
+ vals = np .insert (lev .astype ('object' ), 0 ,
206
+ _get_na_value (lev .dtype .type ))
207
+
208
+ return lev ._shallow_copy (vals )
224
209
225
- stride = len (self .removed_level )
210
+ stride = len (self .removed_level ) + self . lift
226
211
width = len (self .value_columns )
227
212
propagator = np .repeat (np .arange (width ), stride )
228
213
if isinstance (self .value_columns , MultiIndex ):
@@ -231,59 +216,34 @@ def get_new_columns(self):
231
216
232
217
new_labels = [lab .take (propagator )
233
218
for lab in self .value_columns .labels ]
234
- new_labels .append (np .tile (np .arange (stride ), width ))
235
219
else :
236
220
new_levels = [self .value_columns , self .removed_level ]
237
221
new_names = [self .value_columns .name , self .removed_name ]
222
+ new_labels = [propagator ]
238
223
239
- new_labels = []
240
-
241
- new_labels .append (propagator )
242
- new_labels .append (np .tile (np .arange (stride ), width ))
243
-
224
+ new_labels .append (np .tile (np .arange (stride ) - self .lift , width ))
244
225
return MultiIndex (levels = new_levels , labels = new_labels ,
245
226
names = new_names , verify_integrity = False )
246
227
247
228
def get_new_index (self ):
248
- result_labels = []
249
- for cur in self .sorted_labels [:- 1 ]:
250
- labels = cur .take (self .compressor )
251
- labels = _make_index_array_level (labels , cur )
252
- result_labels .append (labels )
229
+ result_labels = [lab .take (self .compressor )
230
+ for lab in self .sorted_labels [:- 1 ]]
253
231
254
232
# construct the new index
255
233
if len (self .new_index_levels ) == 1 :
256
- new_index = self .new_index_levels [0 ]
257
- new_index .name = self .new_index_names [0 ]
258
- else :
259
- new_index = MultiIndex (levels = self .new_index_levels ,
260
- labels = result_labels ,
261
- names = self .new_index_names ,
262
- verify_integrity = False )
263
-
264
- return new_index
234
+ lev , lab = self .new_index_levels [0 ], result_labels [0 ]
235
+ if not (lab == - 1 ).any ():
236
+ return lev .take (lab )
265
237
238
+ vals = np .insert (lev .astype ('object' ), len (lev ),
239
+ _get_na_value (lev .dtype .type )).take (lab )
266
240
267
- def _make_index_array_level (lev , lab ):
268
- """ create the combined index array, preserving nans, return an array """
269
- mask = lab == - 1
270
- if not mask .any ():
271
- return lev
272
-
273
- l = np .arange (len (lab ))
274
- mask_labels = np .empty (len (mask [mask ]), dtype = object )
275
- mask_labels .fill (_get_na_value (lev .dtype .type ))
276
- mask_indexer = com ._ensure_int64 (l [mask ])
277
-
278
- labels = lev
279
- labels_indexer = com ._ensure_int64 (l [~ mask ])
280
-
281
- new_labels = np .empty (tuple ([len (lab )]), dtype = object )
282
- new_labels [labels_indexer ] = labels
283
- new_labels [mask_indexer ] = mask_labels
284
-
285
- return new_labels
241
+ return lev ._shallow_copy (vals )
286
242
243
+ return MultiIndex (levels = self .new_index_levels ,
244
+ labels = result_labels ,
245
+ names = self .new_index_names ,
246
+ verify_integrity = False )
287
247
288
248
def _unstack_multiple (data , clocs ):
289
249
if len (clocs ) == 0 :
@@ -483,29 +443,10 @@ def _unstack_frame(obj, level):
483
443
484
444
485
445
def get_compressed_ids (labels , sizes ):
486
- # no overflow
487
- if com ._long_prod (sizes ) < 2 ** 63 :
488
- group_index = get_group_index (labels , sizes )
489
- comp_index , obs_ids = _compress_group_index (group_index )
490
- else :
491
- n = len (labels [0 ])
492
- mask = np .zeros (n , dtype = bool )
493
- for v in labels :
494
- mask |= v < 0
495
-
496
- while com ._long_prod (sizes ) >= 2 ** 63 :
497
- i = len (sizes )
498
- while com ._long_prod (sizes [:i ]) >= 2 ** 63 :
499
- i -= 1
500
-
501
- rem_index , rem_ids = get_compressed_ids (labels [:i ],
502
- sizes [:i ])
503
- sizes = [len (rem_ids )] + sizes [i :]
504
- labels = [rem_index ] + labels [i :]
505
-
506
- return get_compressed_ids (labels , sizes )
446
+ from pandas .core .groupby import get_flat_ids
507
447
508
- return comp_index , obs_ids
448
+ ids = get_flat_ids (labels , sizes , True )
449
+ return _compress_group_index (ids , sort = True )
509
450
510
451
511
452
def stack (frame , level = - 1 , dropna = True ):
0 commit comments