1
1
# pylint: disable=E1101,E1103,W0232
2
+ from collections import OrderedDict
2
3
import datetime
3
4
from sys import getsizeof
4
5
import warnings
18
19
is_integer , is_iterator , is_list_like , is_object_dtype , is_scalar ,
19
20
pandas_dtype )
20
21
from pandas .core .dtypes .dtypes import ExtensionDtype , PandasExtensionDtype
22
+ from pandas .core .dtypes .generic import ABCDataFrame
21
23
from pandas .core .dtypes .missing import array_equivalent , isna
22
24
23
25
import pandas .core .algorithms as algos
@@ -125,25 +127,25 @@ class MultiIndex(Index):
125
127
Parameters
126
128
----------
127
129
levels : sequence of arrays
128
- The unique labels for each level
130
+ The unique labels for each level.
129
131
codes : sequence of arrays
130
- Integers for each level designating which label at each location
132
+ Integers for each level designating which label at each location.
131
133
132
134
.. versionadded:: 0.24.0
133
135
labels : sequence of arrays
134
- Integers for each level designating which label at each location
136
+ Integers for each level designating which label at each location.
135
137
136
138
.. deprecated:: 0.24.0
137
139
Use ``codes`` instead
138
140
sortorder : optional int
139
141
Level of sortedness (must be lexicographically sorted by that
140
- level)
142
+ level).
141
143
names : optional sequence of objects
142
- Names for each of the index levels. (name is accepted for compat)
143
- copy : boolean , default False
144
- Copy the meta-data
145
- verify_integrity : boolean , default True
146
- Check that the levels/codes are consistent and valid
144
+ Names for each of the index levels. (name is accepted for compat).
145
+ copy : bool , default False
146
+ Copy the meta-data.
147
+ verify_integrity : bool , default True
148
+ Check that the levels/codes are consistent and valid.
147
149
148
150
Attributes
149
151
----------
@@ -158,6 +160,7 @@ class MultiIndex(Index):
158
160
from_arrays
159
161
from_tuples
160
162
from_product
163
+ from_frame
161
164
set_levels
162
165
set_codes
163
166
to_frame
@@ -175,13 +178,9 @@ class MultiIndex(Index):
175
178
MultiIndex.from_product : Create a MultiIndex from the cartesian product
176
179
of iterables.
177
180
MultiIndex.from_tuples : Convert list of tuples to a MultiIndex.
181
+ MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
178
182
Index : The base pandas Index type.
179
183
180
- Notes
181
- -----
182
- See the `user guide
183
- <http://pandas.pydata.org/pandas-docs/stable/advanced.html>`_ for more.
184
-
185
184
Examples
186
185
---------
187
186
A new ``MultiIndex`` is typically constructed using one of the helper
@@ -196,6 +195,11 @@ class MultiIndex(Index):
196
195
197
196
See further examples for how to construct a MultiIndex in the doc strings
198
197
of the mentioned helper methods.
198
+
199
+ Notes
200
+ -----
201
+ See the `user guide
202
+ <http://pandas.pydata.org/pandas-docs/stable/advanced.html>`_ for more.
199
203
"""
200
204
201
205
# initialize to zero-length tuples to make everything work
@@ -288,7 +292,7 @@ def _verify_integrity(self, codes=None, levels=None):
288
292
@classmethod
289
293
def from_arrays (cls , arrays , sortorder = None , names = None ):
290
294
"""
291
- Convert arrays to MultiIndex
295
+ Convert arrays to MultiIndex.
292
296
293
297
Parameters
294
298
----------
@@ -297,7 +301,9 @@ def from_arrays(cls, arrays, sortorder=None, names=None):
297
301
len(arrays) is the number of levels.
298
302
sortorder : int or None
299
303
Level of sortedness (must be lexicographically sorted by that
300
- level)
304
+ level).
305
+ names : list / sequence of str, optional
306
+ Names for the levels in the index.
301
307
302
308
Returns
303
309
-------
@@ -308,11 +314,15 @@ def from_arrays(cls, arrays, sortorder=None, names=None):
308
314
MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
309
315
MultiIndex.from_product : Make a MultiIndex from cartesian product
310
316
of iterables.
317
+ MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
311
318
312
319
Examples
313
320
--------
314
321
>>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']]
315
322
>>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color'))
323
+ MultiIndex(levels=[[1, 2], ['blue', 'red']],
324
+ labels=[[0, 0, 1, 1], [1, 0, 1, 0]],
325
+ names=['number', 'color'])
316
326
"""
317
327
if not is_list_like (arrays ):
318
328
raise TypeError ("Input must be a list / sequence of array-likes." )
@@ -337,31 +347,37 @@ def from_arrays(cls, arrays, sortorder=None, names=None):
337
347
@classmethod
338
348
def from_tuples (cls , tuples , sortorder = None , names = None ):
339
349
"""
340
- Convert list of tuples to MultiIndex
350
+ Convert list of tuples to MultiIndex.
341
351
342
352
Parameters
343
353
----------
344
354
tuples : list / sequence of tuple-likes
345
355
Each tuple is the index of one row/column.
346
356
sortorder : int or None
347
357
Level of sortedness (must be lexicographically sorted by that
348
- level)
358
+ level).
359
+ names : list / sequence of str, optional
360
+ Names for the levels in the index.
349
361
350
362
Returns
351
363
-------
352
364
index : MultiIndex
353
365
354
366
See Also
355
367
--------
356
- MultiIndex.from_arrays : Convert list of arrays to MultiIndex
368
+ MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
357
369
MultiIndex.from_product : Make a MultiIndex from cartesian product
358
- of iterables
370
+ of iterables.
371
+ MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
359
372
360
373
Examples
361
374
--------
362
375
>>> tuples = [(1, u'red'), (1, u'blue'),
363
- (2, u'red'), (2, u'blue')]
376
+ ... (2, u'red'), (2, u'blue')]
364
377
>>> pd.MultiIndex.from_tuples(tuples, names=('number', 'color'))
378
+ MultiIndex(levels=[[1, 2], ['blue', 'red']],
379
+ labels=[[0, 0, 1, 1], [1, 0, 1, 0]],
380
+ names=['number', 'color'])
365
381
"""
366
382
if not is_list_like (tuples ):
367
383
raise TypeError ('Input must be a list / sequence of tuple-likes.' )
@@ -388,7 +404,7 @@ def from_tuples(cls, tuples, sortorder=None, names=None):
388
404
@classmethod
389
405
def from_product (cls , iterables , sortorder = None , names = None ):
390
406
"""
391
- Make a MultiIndex from the cartesian product of multiple iterables
407
+ Make a MultiIndex from the cartesian product of multiple iterables.
392
408
393
409
Parameters
394
410
----------
@@ -397,7 +413,7 @@ def from_product(cls, iterables, sortorder=None, names=None):
397
413
sortorder : int or None
398
414
Level of sortedness (must be lexicographically sorted by that
399
415
level).
400
- names : list / sequence of strings or None
416
+ names : list / sequence of str, optional
401
417
Names for the levels in the index.
402
418
403
419
Returns
@@ -408,16 +424,17 @@ def from_product(cls, iterables, sortorder=None, names=None):
408
424
--------
409
425
MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
410
426
MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
427
+ MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
411
428
412
429
Examples
413
430
--------
414
431
>>> numbers = [0, 1, 2]
415
- >>> colors = [u 'green', u 'purple']
432
+ >>> colors = ['green', 'purple']
416
433
>>> pd.MultiIndex.from_product([numbers, colors],
417
- names=['number', 'color'])
418
- MultiIndex(levels=[[0, 1, 2], [u 'green', u 'purple']],
434
+ ... names=['number', 'color'])
435
+ MultiIndex(levels=[[0, 1, 2], ['green', 'purple']],
419
436
labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
420
- names=[u 'number', u 'color'])
437
+ names=['number', 'color'])
421
438
"""
422
439
from pandas .core .arrays .categorical import _factorize_from_iterables
423
440
from pandas .core .reshape .util import cartesian_product
@@ -431,6 +448,68 @@ def from_product(cls, iterables, sortorder=None, names=None):
431
448
codes = cartesian_product (codes )
432
449
return MultiIndex (levels , codes , sortorder = sortorder , names = names )
433
450
451
+ @classmethod
452
+ def from_frame (cls , df , sortorder = None , names = None ):
453
+ """
454
+ Make a MultiIndex from a DataFrame.
455
+
456
+ .. versionadded:: 0.24.0
457
+
458
+ Parameters
459
+ ----------
460
+ df : DataFrame
461
+ DataFrame to be converted to MultiIndex.
462
+ sortorder : int, optional
463
+ Level of sortedness (must be lexicographically sorted by that
464
+ level).
465
+ names : list-like, optional
466
+ If no names are provided, use the column names, or tuple of column
467
+ names if the columns is a MultiIndex. If a sequence, overwrite
468
+ names with the given sequence.
469
+
470
+ Returns
471
+ -------
472
+ MultiIndex
473
+ The MultiIndex representation of the given DataFrame.
474
+
475
+ See Also
476
+ --------
477
+ MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
478
+ MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
479
+ MultiIndex.from_product : Make a MultiIndex from cartesian product
480
+ of iterables.
481
+
482
+ Examples
483
+ --------
484
+ >>> df = pd.DataFrame([['HI', 'Temp'], ['HI', 'Precip'],
485
+ ... ['NJ', 'Temp'], ['NJ', 'Precip']],
486
+ ... columns=['a', 'b'])
487
+ >>> df
488
+ a b
489
+ 0 HI Temp
490
+ 1 HI Precip
491
+ 2 NJ Temp
492
+ 3 NJ Precip
493
+
494
+ >>> pd.MultiIndex.from_frame(df)
495
+ MultiIndex(levels=[['HI', 'NJ'], ['Precip', 'Temp']],
496
+ labels=[[0, 0, 1, 1], [1, 0, 1, 0]],
497
+ names=['a', 'b'])
498
+
499
+ Using explicit names, instead of the column names
500
+
501
+ >>> pd.MultiIndex.from_frame(df, names=['state', 'observation'])
502
+ MultiIndex(levels=[['HI', 'NJ'], ['Precip', 'Temp']],
503
+ labels=[[0, 0, 1, 1], [1, 0, 1, 0]],
504
+ names=['state', 'observation'])
505
+ """
506
+ if not isinstance (df , ABCDataFrame ):
507
+ raise TypeError ("Input must be a DataFrame" )
508
+
509
+ column_names , columns = lzip (* df .iteritems ())
510
+ names = column_names if names is None else names
511
+ return cls .from_arrays (columns , sortorder = sortorder , names = names )
512
+
434
513
# --------------------------------------------------------------------
435
514
436
515
@property
@@ -1386,11 +1465,16 @@ def to_frame(self, index=True, name=None):
1386
1465
else :
1387
1466
idx_names = self .names
1388
1467
1389
- result = DataFrame ({(name or level ):
1390
- self ._get_level_values (level )
1391
- for name , level in
1392
- zip (idx_names , range (len (self .levels )))},
1393
- copy = False )
1468
+ # Guarantee resulting column order
1469
+ result = DataFrame (
1470
+ OrderedDict ([
1471
+ ((level if name is None else name ),
1472
+ self ._get_level_values (level ))
1473
+ for name , level in zip (idx_names , range (len (self .levels )))
1474
+ ]),
1475
+ copy = False
1476
+ )
1477
+
1394
1478
if index :
1395
1479
result .index = self
1396
1480
return result
0 commit comments