14
14
from pandas .compat .numpy import function as nv
15
15
from pandas import compat
16
16
17
-
18
17
from pandas .types .common import (_ensure_int64 ,
19
18
_ensure_platform_int ,
20
19
is_object_dtype ,
@@ -73,6 +72,7 @@ class MultiIndex(Index):
73
72
_levels = FrozenList ()
74
73
_labels = FrozenList ()
75
74
_comparables = ['names' ]
75
+ _engine_type = _index .MultiIndexEngine
76
76
rename = Index .set_names
77
77
78
78
def __new__ (cls , levels = None , labels = None , sortorder = None , names = None ,
@@ -114,7 +114,6 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None,
114
114
result ._verify_integrity ()
115
115
if _set_identity :
116
116
result ._reset_identity ()
117
-
118
117
return result
119
118
120
119
def _verify_integrity (self , labels = None , levels = None ):
@@ -619,6 +618,10 @@ def _get_level_number(self, level):
619
618
620
619
_tuples = None
621
620
621
+ @cache_readonly
622
+ def _engine (self ):
623
+ return self ._engine_type (lambda : self , len (self ))
624
+
622
625
@property
623
626
def values (self ):
624
627
if self ._tuples is not None :
@@ -655,10 +658,74 @@ def _has_complex_internals(self):
655
658
# to disable groupby tricks
656
659
return True
657
660
661
+ @cache_readonly
662
+ def is_monotonic (self ):
663
+
664
+ # TODO
665
+ # this is unfortunate we end up tupelizing
666
+ # just to determine monotonicity :<
667
+
668
+ # fast-path
669
+ if not self .levels [0 ].is_monotonic :
670
+ return False
671
+
672
+ return Index (self .values ).is_monotonic
673
+
658
674
@cache_readonly
659
675
def is_unique (self ):
660
676
return not self .duplicated ().any ()
661
677
678
+ @cache_readonly
679
+ def _have_mixed_levels (self ):
680
+ """ return a boolean list indicated if we have mixed levels """
681
+ return ['mixed' in l for l in self ._inferred_type_levels ]
682
+
683
+ @cache_readonly
684
+ def _inferred_type_levels (self ):
685
+ """ return a list of the inferred types, one for each level """
686
+ return [i .inferred_type for i in self .levels ]
687
+
688
+ @cache_readonly
689
+ def _hashed_values (self ):
690
+ """ return a uint64 ndarray of my hashed values """
691
+ from pandas .tools .hashing import hash_tuples
692
+ return hash_tuples (self )
693
+
694
+ def _hashed_indexing_key (self , key ):
695
+ """
696
+ validate and return the hash for the provided key
697
+
698
+ *this is internal for use for the cython routines*
699
+
700
+ Paramters
701
+ ---------
702
+ key : string or tuple
703
+
704
+ Returns
705
+ -------
706
+ np.uint64
707
+
708
+ Notes
709
+ -----
710
+ we need to stringify if we have mixed levels
711
+
712
+ """
713
+ from pandas .tools .hashing import hash_tuples
714
+
715
+ if not isinstance (key , tuple ):
716
+ return hash_tuples (key )
717
+
718
+ if not len (key ) == self .nlevels :
719
+ raise KeyError
720
+
721
+ def f (k , stringify ):
722
+ if stringify and not isinstance (k , compat .string_types ):
723
+ k = str (k )
724
+ return k
725
+ key = tuple ([f (k , stringify )
726
+ for k , stringify in zip (key , self ._have_mixed_levels )])
727
+ return hash_tuples (key )
728
+
662
729
@deprecate_kwarg ('take_last' , 'keep' , mapping = {True : 'last' ,
663
730
False : 'first' })
664
731
@Appender (base ._shared_docs ['duplicated' ] % ibase ._index_doc_kwargs )
@@ -852,7 +919,8 @@ def to_frame(self, index=True):
852
919
from pandas import DataFrame
853
920
result = DataFrame ({(name or level ): self .get_level_values (level )
854
921
for name , level in
855
- zip (self .names , range (len (self .levels )))})
922
+ zip (self .names , range (len (self .levels )))},
923
+ copy = False )
856
924
if index :
857
925
result .index = self
858
926
return result
@@ -1478,29 +1546,41 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
1478
1546
method = missing .clean_reindex_fill_method (method )
1479
1547
target = _ensure_index (target )
1480
1548
1481
- target_index = target
1482
- if isinstance (target , MultiIndex ):
1483
- target_index = target . _tuple_index
1549
+ # empty indexer
1550
+ if is_list_like (target ) and not len ( target ):
1551
+ return _ensure_platform_int ( np . array ([]))
1484
1552
1485
- if not is_object_dtype (target_index .dtype ):
1486
- return np .ones (len (target_index )) * - 1
1553
+ if not isinstance (target , MultiIndex ):
1554
+ try :
1555
+ target = MultiIndex .from_tuples (target )
1556
+ except (TypeError , ValueError ):
1557
+
1558
+ # let's instead try with a straight Index
1559
+ if method is None :
1560
+ return Index (self .values ).get_indexer (target ,
1561
+ method = method ,
1562
+ limit = limit ,
1563
+ tolerance = tolerance )
1487
1564
1488
1565
if not self .is_unique :
1489
1566
raise Exception ('Reindexing only valid with uniquely valued Index '
1490
1567
'objects' )
1491
1568
1492
- self_index = self ._tuple_index
1493
-
1494
1569
if method == 'pad' or method == 'backfill' :
1495
1570
if tolerance is not None :
1496
1571
raise NotImplementedError ("tolerance not implemented yet "
1497
1572
'for MultiIndex' )
1498
- indexer = self_index ._get_fill_indexer (target , method , limit )
1573
+ indexer = self ._get_fill_indexer (target , method , limit )
1499
1574
elif method == 'nearest' :
1500
1575
raise NotImplementedError ("method='nearest' not implemented yet "
1501
1576
'for MultiIndex; see GitHub issue 9365' )
1502
1577
else :
1503
- indexer = self_index ._engine .get_indexer (target ._values )
1578
+ # we may not compare equally because of hashing if we
1579
+ # don't have the same dtypes
1580
+ if self ._inferred_type_levels != target ._inferred_type_levels :
1581
+ return Index (self .values ).get_indexer (target .values )
1582
+
1583
+ indexer = self ._engine .get_indexer (target )
1504
1584
1505
1585
return _ensure_platform_int (indexer )
1506
1586
@@ -1567,17 +1647,6 @@ def reindex(self, target, method=None, level=None, limit=None,
1567
1647
1568
1648
return target , indexer
1569
1649
1570
- @cache_readonly
1571
- def _tuple_index (self ):
1572
- """
1573
- Convert MultiIndex to an Index of tuples
1574
-
1575
- Returns
1576
- -------
1577
- index : Index
1578
- """
1579
- return Index (self ._values )
1580
-
1581
1650
def get_slice_bound (self , label , side , kind ):
1582
1651
1583
1652
if not isinstance (label , tuple ):
@@ -1824,8 +1893,16 @@ def partial_selection(key, indexer=None):
1824
1893
1825
1894
key = tuple (self [indexer ].tolist ()[0 ])
1826
1895
1827
- return (self ._engine .get_loc (_values_from_object (key )),
1828
- None )
1896
+ try :
1897
+ return (self ._engine .get_loc (
1898
+ _values_from_object (key )), None )
1899
+ except ValueError :
1900
+ # if we hae a very odd MultiIndex,
1901
+ # e.g. with embedded tuples, this might fail
1902
+ # TODO: should prob not allow construction of a MI
1903
+ # like this in the first place
1904
+ return Index (self .values ).get_loc (key )
1905
+
1829
1906
else :
1830
1907
return partial_selection (key )
1831
1908
else :
@@ -2098,7 +2175,9 @@ def equals(self, other):
2098
2175
return True
2099
2176
2100
2177
if not isinstance (other , Index ):
2101
- return False
2178
+ if not isinstance (other , tuple ):
2179
+ return False
2180
+ other = Index ([other ])
2102
2181
2103
2182
if not isinstance (other , MultiIndex ):
2104
2183
return array_equivalent (self ._values ,
@@ -2111,10 +2190,24 @@ def equals(self, other):
2111
2190
return False
2112
2191
2113
2192
for i in range (self .nlevels ):
2193
+ slabels = self .labels [i ]
2194
+ slabels = slabels [slabels != - 1 ]
2114
2195
svalues = algos .take_nd (np .asarray (self .levels [i ]._values ),
2115
- self .labels [i ], allow_fill = False )
2196
+ slabels , allow_fill = False )
2197
+
2198
+ olabels = other .labels [i ]
2199
+ olabels = olabels [olabels != - 1 ]
2116
2200
ovalues = algos .take_nd (np .asarray (other .levels [i ]._values ),
2117
- other .labels [i ], allow_fill = False )
2201
+ olabels , allow_fill = False )
2202
+
2203
+ # since we use NaT both datetime64 and timedelta64
2204
+ # we can have a situation where a level is typed say
2205
+ # timedelta64 in self (IOW it has other values than NaT)
2206
+ # but types datetime64 in other (where its all NaT)
2207
+ # but these are equivalent
2208
+ if len (svalues ) == 0 and len (ovalues ) == 0 :
2209
+ continue
2210
+
2118
2211
if not array_equivalent (svalues , ovalues ):
2119
2212
return False
2120
2213
0 commit comments