10
10
11
11
from pandas .core .categorical import Categorical
12
12
from pandas .core .common import (notnull , _ensure_platform_int , _maybe_promote ,
13
- _maybe_upcast )
13
+ _maybe_upcast , isnull )
14
14
from pandas .core .groupby import (get_group_index , _compress_group_index ,
15
15
decons_group_index )
16
16
import pandas .core .common as com
17
17
import pandas .algos as algos
18
-
18
+ from pandas import lib
19
19
20
20
from pandas .core .index import MultiIndex , Index
21
21
@@ -67,7 +67,14 @@ def __init__(self, values, index, level=-1, value_columns=None):
67
67
self .index = index
68
68
self .level = self .index ._get_level_number (level )
69
69
70
- self .new_index_levels = list (index .levels )
70
+ levels = index .levels
71
+ labels = index .labels
72
+ def _make_index (lev ,lab ):
73
+ i = lev .__class__ (_make_index_array_level (lev .values ,lab ))
74
+ i .name = lev .name
75
+ return i
76
+
77
+ self .new_index_levels = list ([ _make_index (lev ,lab ) for lev ,lab in zip (levels ,labels ) ])
71
78
self .new_index_names = list (index .names )
72
79
73
80
self .removed_name = self .new_index_names .pop (self .level )
@@ -140,6 +147,19 @@ def get_result(self):
140
147
values = com .take_nd (values , inds , axis = 1 )
141
148
columns = columns [inds ]
142
149
150
+ # we might have a missing index
151
+ if len (index ) != values .shape [0 ]:
152
+ mask = isnull (index )
153
+ if mask .any ():
154
+ l = np .arange (len (index ))
155
+ values , orig_values = np .empty ((len (index ),values .shape [1 ])), values
156
+ values .fill (np .nan )
157
+ values_indexer = com ._ensure_int64 (l [~ mask ])
158
+ for i , j in enumerate (values_indexer ):
159
+ values [j ] = orig_values [i ]
160
+ else :
161
+ index = index .take (self .unique_groups )
162
+
143
163
return DataFrame (values , index = index , columns = columns )
144
164
145
165
def get_new_values (self ):
@@ -201,11 +221,13 @@ def get_new_columns(self):
201
221
def get_new_index (self ):
202
222
result_labels = []
203
223
for cur in self .sorted_labels [:- 1 ]:
204
- result_labels .append (cur .take (self .compressor ))
224
+ labels = cur .take (self .compressor )
225
+ labels = _make_index_array_level (labels ,cur )
226
+ result_labels .append (labels )
205
227
206
228
# construct the new index
207
229
if len (self .new_index_levels ) == 1 :
208
- new_index = self .new_index_levels [0 ]. take ( self . unique_groups )
230
+ new_index = self .new_index_levels [0 ]
209
231
new_index .name = self .new_index_names [0 ]
210
232
else :
211
233
new_index = MultiIndex (levels = self .new_index_levels ,
@@ -215,6 +237,26 @@ def get_new_index(self):
215
237
return new_index
216
238
217
239
240
+ def _make_index_array_level (lev ,lab ):
241
+ """ create the combined index array, preserving nans, return an array """
242
+ mask = lab == - 1
243
+ if not mask .any ():
244
+ return lev
245
+
246
+ l = np .arange (len (lab ))
247
+ mask_labels = np .empty (len (mask [mask ]),dtype = object )
248
+ mask_labels .fill (np .nan )
249
+ mask_indexer = com ._ensure_int64 (l [mask ])
250
+
251
+ labels = lev
252
+ labels_indexer = com ._ensure_int64 (l [~ mask ])
253
+
254
+ new_labels = np .empty (tuple ([len (lab )]),dtype = object )
255
+ new_labels [labels_indexer ] = labels
256
+ new_labels [mask_indexer ] = mask_labels
257
+
258
+ return new_labels
259
+
218
260
def _unstack_multiple (data , clocs ):
219
261
if len (clocs ) == 0 :
220
262
return data
0 commit comments