21
21
from pandas .core .common import (_values_from_object , is_float , is_integer ,
22
22
ABCSeries , _ensure_object , _ensure_int64 )
23
23
from pandas .core .config import get_option
24
+ from pandas .io .common import PerformanceWarning
24
25
25
26
# simplify
26
27
default_pprint = lambda x : com .pprint_thing (x , escape_chars = ('\t ' , '\r ' , '\n ' ),
@@ -4027,30 +4028,83 @@ def _partial_tup_index(self, tup, side='left'):
4027
4028
4028
4029
def get_loc (self , key ):
4029
4030
"""
4030
- Get integer location slice for requested label or tuple
4031
+ Get integer location, slice or boolean mask for requested label or tuple
4032
+ If the key is past the lexsort depth, the return may be a boolean mask
4033
+ array, otherwise it is always a slice or int.
4031
4034
4032
4035
Parameters
4033
4036
----------
4034
4037
key : label or tuple
4035
4038
4036
4039
Returns
4037
4040
-------
4038
- loc : int or slice object
4039
- """
4040
- if isinstance (key , tuple ):
4041
- if len (key ) == self .nlevels :
4042
- if self .is_unique :
4043
- return self ._engine .get_loc (_values_from_object (key ))
4044
- else :
4045
- return slice (* self .slice_locs (key , key ))
4046
- else :
4047
- # partial selection
4048
- result = slice (* self .slice_locs (key , key ))
4049
- if result .start == result .stop :
4050
- raise KeyError (key )
4051
- return result
4052
- else :
4053
- return self ._get_level_indexer (key , level = 0 )
4041
+ loc : int, slice object or boolean mask
4042
+ """
4043
+ def _maybe_to_slice (loc ):
4044
+ '''convert integer indexer to boolean mask or slice if possible'''
4045
+ if not isinstance (loc , np .ndarray ) or loc .dtype != 'int64' :
4046
+ return loc
4047
+
4048
+ loc = lib .maybe_indices_to_slice (loc )
4049
+ if isinstance (loc , slice ):
4050
+ return loc
4051
+
4052
+ mask = np .empty (len (self ), dtype = 'bool' )
4053
+ mask .fill (False )
4054
+ mask [loc ] = True
4055
+ return mask
4056
+
4057
+ if not isinstance (key , tuple ):
4058
+ loc = self ._get_level_indexer (key , level = 0 )
4059
+ return _maybe_to_slice (loc )
4060
+
4061
+ keylen = len (key )
4062
+ if self .nlevels < keylen :
4063
+ raise KeyError ('Key length ({0}) exceeds index depth ({1})'
4064
+ '' .format (keylen , self .nlevels ))
4065
+
4066
+ if keylen == self .nlevels and self .is_unique :
4067
+ def _maybe_str_to_time_stamp (key , lev ):
4068
+ if lev .is_all_dates and not isinstance (key , Timestamp ):
4069
+ try :
4070
+ return Timestamp (key , tz = getattr (lev , 'tz' , None ))
4071
+ except Exception :
4072
+ pass
4073
+ return key
4074
+ key = _values_from_object (key )
4075
+ key = tuple (map (_maybe_str_to_time_stamp , key , self .levels ))
4076
+ return self ._engine .get_loc (key )
4077
+
4078
+ # -- partial selection or non-unique index
4079
+ # break the key into 2 parts based on the lexsort_depth of the index;
4080
+ # the first part returns a continuous slice of the index; the 2nd part
4081
+ # needs linear search within the slice
4082
+ i = self .lexsort_depth
4083
+ lead_key , follow_key = key [:i ], key [i :]
4084
+ start , stop = self .slice_locs (lead_key , lead_key ) \
4085
+ if lead_key else (0 , len (self ))
4086
+
4087
+ if start == stop :
4088
+ raise KeyError (key )
4089
+
4090
+ if not follow_key :
4091
+ return slice (start , stop )
4092
+
4093
+ warnings .warn ('indexing past lexsort depth may impact performance.' ,
4094
+ PerformanceWarning )
4095
+
4096
+ loc = np .arange (start , stop , dtype = 'int64' )
4097
+
4098
+ for i , k in enumerate (follow_key , len (lead_key )):
4099
+ mask = self .labels [i ][loc ] == self .levels [i ].get_loc (k )
4100
+ if not mask .all ():
4101
+ loc = loc [mask ]
4102
+ if not len (loc ):
4103
+ raise KeyError (key )
4104
+
4105
+ return _maybe_to_slice (loc ) \
4106
+ if len (loc ) != stop - start \
4107
+ else slice (start , stop )
4054
4108
4055
4109
def get_loc_level (self , key , level = 0 , drop_level = True ):
4056
4110
"""
@@ -4115,10 +4169,10 @@ def _maybe_drop_levels(indexer, levels, drop_level):
4115
4169
if not any (isinstance (k , slice ) for k in key ):
4116
4170
4117
4171
# partial selection
4118
- def partial_selection ( key ):
4119
- indexer = slice ( * self . slice_locs ( key , key ))
4120
- if indexer . start == indexer . stop :
4121
- raise KeyError (key )
4172
+ # optionally get indexer to avoid re-calculation
4173
+ def partial_selection ( key , indexer = None ):
4174
+ if indexer is None :
4175
+ indexer = self . get_loc (key )
4122
4176
ilevels = [i for i in range (len (key ))
4123
4177
if key [i ] != slice (None , None )]
4124
4178
return indexer , _maybe_drop_levels (indexer , ilevels ,
@@ -4139,11 +4193,12 @@ def partial_selection(key):
4139
4193
if any ([
4140
4194
l .is_all_dates for k , l in zip (key , self .levels )
4141
4195
]) and not can_index_exactly :
4142
- indexer = slice ( * self .slice_locs (key , key ) )
4196
+ indexer = self .get_loc (key )
4143
4197
4144
4198
# we have a multiple selection here
4145
- if not indexer .stop - indexer .start == 1 :
4146
- return partial_selection (key )
4199
+ if not isinstance (indexer , slice ) \
4200
+ or indexer .stop - indexer .start != 1 :
4201
+ return partial_selection (key , indexer )
4147
4202
4148
4203
key = tuple (self [indexer ].tolist ()[0 ])
4149
4204
0 commit comments