|
14 | 14 | from pandas.compat.numpy import function as nv
|
15 | 15 | from pandas import compat
|
16 | 16 |
|
17 |
| - |
18 | 17 | from pandas.types.common import (_ensure_int64,
|
19 | 18 | _ensure_platform_int,
|
20 | 19 | is_object_dtype,
|
@@ -73,6 +72,7 @@ class MultiIndex(Index):
|
73 | 72 | _levels = FrozenList()
|
74 | 73 | _labels = FrozenList()
|
75 | 74 | _comparables = ['names']
|
| 75 | + _engine_type = _index.MultiIndexEngine |
76 | 76 | rename = Index.set_names
|
77 | 77 |
|
78 | 78 | def __new__(cls, levels=None, labels=None, sortorder=None, names=None,
|
@@ -114,7 +114,6 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None,
|
114 | 114 | result._verify_integrity()
|
115 | 115 | if _set_identity:
|
116 | 116 | result._reset_identity()
|
117 |
| - |
118 | 117 | return result
|
119 | 118 |
|
120 | 119 | def _verify_integrity(self, labels=None, levels=None):
|
@@ -619,6 +618,10 @@ def _get_level_number(self, level):
|
619 | 618 |
|
620 | 619 | _tuples = None
|
621 | 620 |
|
| 621 | + @cache_readonly |
| 622 | + def _engine(self): |
| 623 | + return self._engine_type(lambda: self, len(self)) |
| 624 | + |
622 | 625 | @property
|
623 | 626 | def values(self):
|
624 | 627 | if self._tuples is not None:
|
@@ -655,10 +658,59 @@ def _has_complex_internals(self):
|
655 | 658 | # to disable groupby tricks
|
656 | 659 | return True
|
657 | 660 |
|
| 661 | + @cache_readonly |
| 662 | + def is_monotonic(self): |
| 663 | + |
| 664 | + # TODO |
| 665 | + # this is unfortunate we end up tupelizing |
| 666 | + # just to determine monotonicity :< |
| 667 | + |
| 668 | + # fast-path |
| 669 | + if not self.levels[0].is_monotonic: |
| 670 | + return False |
| 671 | + |
| 672 | + return Index(self.values).is_monotonic |
| 673 | + |
658 | 674 | @cache_readonly
|
659 | 675 | def is_unique(self):
|
660 | 676 | return not self.duplicated().any()
|
661 | 677 |
|
| 678 | + @cache_readonly |
| 679 | + def _hashed_values(self): |
| 680 | + """ return a uint64 ndarray of my hashed values """ |
| 681 | + from pandas.tools.hashing import hash_tuples |
| 682 | + return hash_tuples(self) |
| 683 | + |
| 684 | + @cache_readonly |
| 685 | + def _have_mixed_levels(self): |
| 686 | + """ return a boolean list indicated if we have mixed levels """ |
| 687 | + return ['mixed' in l for l in self._inferred_type_levels] |
| 688 | + |
| 689 | + @cache_readonly |
| 690 | + def _inferred_type_levels(self): |
| 691 | + """ return a list of the inferred types, one for each level """ |
| 692 | + return [i.inferred_type for i in self.levels] |
| 693 | + |
| 694 | + def _as_valid_indexing_key(self, key): |
| 695 | + """ |
| 696 | + validate and return our key |
| 697 | + we need to stringify if we have mixed levels |
| 698 | +
|
| 699 | + this is internal for use for the cython routines |
| 700 | + """ |
| 701 | + if not isinstance(key, tuple): |
| 702 | + return key |
| 703 | + |
| 704 | + if not len(key) == self.nlevels: |
| 705 | + raise KeyError |
| 706 | + |
| 707 | + def f(k, stringify): |
| 708 | + if stringify and not isinstance(k, compat.string_types): |
| 709 | + k = str(k) |
| 710 | + return k |
| 711 | + return tuple([f(k, stringify) |
| 712 | + for k, stringify in zip(key, self._have_mixed_levels)]) |
| 713 | + |
662 | 714 | @deprecate_kwarg('take_last', 'keep', mapping={True: 'last',
|
663 | 715 | False: 'first'})
|
664 | 716 | @Appender(base._shared_docs['duplicated'] % ibase._index_doc_kwargs)
|
@@ -852,7 +904,8 @@ def to_frame(self, index=True):
|
852 | 904 | from pandas import DataFrame
|
853 | 905 | result = DataFrame({(name or level): self.get_level_values(level)
|
854 | 906 | for name, level in
|
855 |
| - zip(self.names, range(len(self.levels)))}) |
| 907 | + zip(self.names, range(len(self.levels)))}, |
| 908 | + copy=False) |
856 | 909 | if index:
|
857 | 910 | result.index = self
|
858 | 911 | return result
|
@@ -1478,29 +1531,41 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
|
1478 | 1531 | method = missing.clean_reindex_fill_method(method)
|
1479 | 1532 | target = _ensure_index(target)
|
1480 | 1533 |
|
1481 |
| - target_index = target |
1482 |
| - if isinstance(target, MultiIndex): |
1483 |
| - target_index = target._tuple_index |
| 1534 | + # empty indexer |
| 1535 | + if is_list_like(target) and not len(target): |
| 1536 | + return _ensure_platform_int(np.array([])) |
| 1537 | + |
| 1538 | + if not isinstance(target, MultiIndex): |
| 1539 | + try: |
| 1540 | + target = MultiIndex.from_tuples(target) |
| 1541 | + except (TypeError, ValueError): |
1484 | 1542 |
|
1485 |
| - if not is_object_dtype(target_index.dtype): |
1486 |
| - return np.ones(len(target_index)) * -1 |
| 1543 | + # let's instead try with a straight Index |
| 1544 | + if method is None: |
| 1545 | + return Index(self.values).get_indexer(target, |
| 1546 | + method=method, |
| 1547 | + limit=limit, |
| 1548 | + tolerance=tolerance) |
1487 | 1549 |
|
1488 | 1550 | if not self.is_unique:
|
1489 | 1551 | raise Exception('Reindexing only valid with uniquely valued Index '
|
1490 | 1552 | 'objects')
|
1491 | 1553 |
|
1492 |
| - self_index = self._tuple_index |
1493 |
| - |
1494 | 1554 | if method == 'pad' or method == 'backfill':
|
1495 | 1555 | if tolerance is not None:
|
1496 | 1556 | raise NotImplementedError("tolerance not implemented yet "
|
1497 | 1557 | 'for MultiIndex')
|
1498 |
| - indexer = self_index._get_fill_indexer(target, method, limit) |
| 1558 | + indexer = self._get_fill_indexer(target, method, limit) |
1499 | 1559 | elif method == 'nearest':
|
1500 | 1560 | raise NotImplementedError("method='nearest' not implemented yet "
|
1501 | 1561 | 'for MultiIndex; see GitHub issue 9365')
|
1502 | 1562 | else:
|
1503 |
| - indexer = self_index._engine.get_indexer(target._values) |
| 1563 | + # we may not compare equally because of hashing if we |
| 1564 | + # don't have the same dtypes |
| 1565 | + if self._inferred_type_levels != target._inferred_type_levels: |
| 1566 | + return Index(self.values).get_indexer(target.values) |
| 1567 | + |
| 1568 | + indexer = self._engine.get_indexer(target) |
1504 | 1569 |
|
1505 | 1570 | return _ensure_platform_int(indexer)
|
1506 | 1571 |
|
@@ -1567,17 +1632,6 @@ def reindex(self, target, method=None, level=None, limit=None,
|
1567 | 1632 |
|
1568 | 1633 | return target, indexer
|
1569 | 1634 |
|
1570 |
| - @cache_readonly |
1571 |
| - def _tuple_index(self): |
1572 |
| - """ |
1573 |
| - Convert MultiIndex to an Index of tuples |
1574 |
| -
|
1575 |
| - Returns |
1576 |
| - ------- |
1577 |
| - index : Index |
1578 |
| - """ |
1579 |
| - return Index(self._values) |
1580 |
| - |
1581 | 1635 | def get_slice_bound(self, label, side, kind):
|
1582 | 1636 |
|
1583 | 1637 | if not isinstance(label, tuple):
|
@@ -1824,8 +1878,16 @@ def partial_selection(key, indexer=None):
|
1824 | 1878 |
|
1825 | 1879 | key = tuple(self[indexer].tolist()[0])
|
1826 | 1880 |
|
1827 |
| - return (self._engine.get_loc(_values_from_object(key)), |
1828 |
| - None) |
| 1881 | + try: |
| 1882 | + return (self._engine.get_loc( |
| 1883 | + _values_from_object(key)), None) |
| 1884 | + except ValueError: |
| 1885 | + # if we hae a very odd MultiIndex, |
| 1886 | + # e.g. with embedded tuples, this might fail |
| 1887 | + # TODO: should prob not allow construction of a MI |
| 1888 | + # like this in the first place |
| 1889 | + return Index(self.values).get_loc(key) |
| 1890 | + |
1829 | 1891 | else:
|
1830 | 1892 | return partial_selection(key)
|
1831 | 1893 | else:
|
|
0 commit comments