@@ -44,11 +44,8 @@ from pandas._libs.missing cimport (
44
44
45
45
from decimal import InvalidOperation
46
46
47
- # Defines shift of MultiIndex codes to avoid negative codes (missing values)
48
- multiindex_nulls_shift = 2
49
47
50
-
51
- cdef bint is_definitely_invalid_key(object val):
48
+ def is_definitely_invalid_key (val ):
52
49
try :
53
50
hash (val)
54
51
except TypeError :
@@ -671,172 +668,6 @@ cdef class PeriodEngine(Int64Engine):
671
668
return algos.is_monotonic(values, timelike = True )
672
669
673
670
674
- cdef class BaseMultiIndexCodesEngine:
675
- """
676
- Base class for MultiIndexUIntEngine and MultiIndexPyIntEngine, which
677
- represent each label in a MultiIndex as an integer, by juxtaposing the bits
678
- encoding each level, with appropriate offsets.
679
-
680
- For instance: if 3 levels have respectively 3, 6 and 1 possible values,
681
- then their labels can be represented using respectively 2, 3 and 1 bits,
682
- as follows:
683
- _ _ _ _____ _ __ __ __
684
- |0|0|0| ... |0| 0|a1|a0| -> offset 0 (first level)
685
- — — — ————— — —— —— ——
686
- |0|0|0| ... |0|b2|b1|b0| -> offset 2 (bits required for first level)
687
- — — — ————— — —— —— ——
688
- |0|0|0| ... |0| 0| 0|c0| -> offset 5 (bits required for first two levels)
689
- ‾ ‾ ‾ ‾‾‾‾‾ ‾ ‾‾ ‾‾ ‾‾
690
- and the resulting unsigned integer representation will be:
691
- _ _ _ _____ _ __ __ __ __ __ __
692
- |0|0|0| ... |0|c0|b2|b1|b0|a1|a0|
693
- ‾ ‾ ‾ ‾‾‾‾‾ ‾ ‾‾ ‾‾ ‾‾ ‾‾ ‾‾ ‾‾
694
-
695
- Offsets are calculated at initialization, labels are transformed by method
696
- _codes_to_ints.
697
-
698
- Keys are located by first locating each component against the respective
699
- level, then locating (the integer representation of) codes.
700
- """
701
- def __init__ (self , object levels , object labels , ndarray offsets ):
702
- """
703
- Parameters
704
- ----------
705
- levels : list-like of numpy arrays
706
- Levels of the MultiIndex.
707
- labels : list-like of numpy arrays of integer dtype
708
- Labels of the MultiIndex.
709
- offsets : numpy array of int dtype
710
- Pre-calculated offsets, one for each level of the index.
711
- """
712
- self .levels = levels
713
- self .offsets = offsets
714
-
715
- # Transform labels in a single array, and add 2 so that we are working
716
- # with positive integers (-1 for NaN becomes 1). This enables us to
717
- # differentiate between values that are missing in other and matching
718
- # NaNs. We will set values that are not found to 0 later:
719
- codes = np.array(labels).T
720
- codes += multiindex_nulls_shift # inplace sum optimisation
721
-
722
- self .level_has_nans = [- 1 in lab for lab in labels]
723
-
724
- # Map each codes combination in the index to an integer unambiguously
725
- # (no collisions possible), based on the "offsets", which describe the
726
- # number of bits to switch labels for each level:
727
- lab_ints = self ._codes_to_ints(codes)
728
-
729
- # Initialize underlying index (e.g. libindex.UInt64Engine) with
730
- # integers representing labels: we will use its get_loc and get_indexer
731
- self ._base.__init__ (self , lab_ints)
732
-
733
- def _codes_to_ints (self , ndarray codes ) -> np.ndarray:
734
- """
735
- Transform combination(s ) of uint in one uint or Python integer (each ), in a
736
- strictly monotonic way (i.e. respecting the lexicographic order of integer
737
- combinations ).
738
-
739
- Parameters
740
- ----------
741
- codes : 1- or 2-dimensional array of dtype uint
742
- Combinations of integers (one per row )
743
-
744
- Returns
745
- -------
746
- scalar or 1-dimensional array , of dtype _codes_dtype
747
- Integer(s ) representing one combination (each ).
748
- """
749
- # To avoid overflows , first make sure we are working with the right dtype:
750
- codes = codes.astype(self ._codes_dtype, copy = False )
751
-
752
- # Shift the representation of each level by the pre-calculated number of bits:
753
- codes <<= self.offsets # inplace shift optimisation
754
-
755
- # Now sum and OR are in fact interchangeable. This is a simple
756
- # composition of the (disjunct ) significant bits of each level (i.e.
757
- # each column in "codes") in a single positive integer (per row ):
758
- if codes.ndim == 1 :
759
- # Single key
760
- return np.bitwise_or.reduce(codes)
761
-
762
- # Multiple keys
763
- return np.bitwise_or.reduce(codes, axis = 1 )
764
-
765
- def _extract_level_codes (self , target ) -> np.ndarray:
766
- """
767
- Map the requested list of (tuple ) keys to their integer representations
768
- for searching in the underlying integer index.
769
-
770
- Parameters
771
- ----------
772
- target : MultiIndex
773
-
774
- Returns
775
- ------
776
- int_keys : 1-dimensional array of dtype uint64 or object
777
- Integers representing one combination each
778
- """
779
- level_codes = list (target._recode_for_new_levels(self .levels))
780
- for i , codes in enumerate(level_codes ):
781
- if self .levels[i].hasnans:
782
- na_index = self .levels[i].isna().nonzero()[0 ][0 ]
783
- codes[target.codes[i] == - 1 ] = na_index
784
- codes += 1
785
- codes[codes > 0 ] += 1
786
- if self .level_has_nans[i]:
787
- codes[target.codes[i] == - 1 ] += 1
788
- return self ._codes_to_ints(np.array(level_codes, dtype = self ._codes_dtype).T)
789
-
790
- def get_indexer (self , target: np.ndarray ) -> np.ndarray:
791
- """
792
- Returns an array giving the positions of each value of `target` in
793
- `self.values`, where -1 represents a value in `target` which does not
794
- appear in `self.values`
795
-
796
- Parameters
797
- ----------
798
- target : np.ndarray
799
-
800
- Returns
801
- -------
802
- np.ndarray[intp_t , ndim = 1 ] of the indexer of `target` into
803
- `self.values`
804
- """
805
- return self._base.get_indexer(self , target )
806
-
807
- def get_loc(self , object key ):
808
- if is_definitely_invalid_key(key):
809
- raise TypeError (f" '{key}' is an invalid key" )
810
- if not isinstance (key, tuple ):
811
- raise KeyError (key)
812
- try :
813
- indices = [1 if checknull(v) else lev.get_loc(v) + multiindex_nulls_shift
814
- for lev, v in zip (self .levels, key)]
815
- except KeyError :
816
- raise KeyError (key)
817
-
818
- # Transform indices into single integer:
819
- lab_int = self ._codes_to_ints(np.array(indices, dtype = self ._codes_dtype))
820
-
821
- return self ._base.get_loc(self , lab_int)
822
-
823
- def get_indexer_non_unique (self , target: np.ndarray ) -> np.ndarray:
824
- indexer = self ._base.get_indexer_non_unique(self , target)
825
-
826
- return indexer
827
-
828
- def __contains__(self , val: object ) -> bool:
829
- # We assume before we get here:
830
- # - val is hashable
831
- # Default __contains__ looks in the underlying mapping , which in this
832
- # case only contains integer representations.
833
- try:
834
- self.get_loc(val )
835
- return True
836
- except (KeyError , TypeError , ValueError ):
837
- return False
838
-
839
-
840
671
# Generated from template.
841
672
include " index_class_helper.pxi"
842
673
0 commit comments