@@ -105,6 +105,7 @@ class Index(IndexOpsMixin, PandasObject):
105
105
_is_numeric_dtype = False
106
106
107
107
_engine_type = _index .ObjectEngine
108
+ _isin_type = lib .ismember
108
109
109
110
def __new__ (cls , data = None , dtype = None , copy = False , name = None , fastpath = False ,
110
111
tupleize_cols = True , ** kwargs ):
@@ -1838,7 +1839,7 @@ def isin(self, values, level=None):
1838
1839
value_set = set (values )
1839
1840
if level is not None :
1840
1841
self ._validate_index_level (level )
1841
- return lib . ismember (np .array (self ), value_set )
1842
+ return self . _isin_type (np .array (self ), value_set )
1842
1843
1843
1844
def _can_reindex (self , indexer ):
1844
1845
"""
@@ -3381,6 +3382,7 @@ class Int64Index(NumericIndex):
3381
3382
_outer_indexer = _algos .outer_join_indexer_int64
3382
3383
3383
3384
_engine_type = _index .Int64Engine
3385
+ _isin_type = lib .ismember_int64
3384
3386
3385
3387
def __new__ (cls , data = None , dtype = None , copy = False , name = None , fastpath = False , ** kwargs ):
3386
3388
@@ -5237,13 +5239,39 @@ def partial_selection(key, indexer=None):
5237
5239
indexer = self ._get_level_indexer (key , level = level )
5238
5240
return indexer , maybe_droplevels (indexer , [level ], drop_level )
5239
5241
5240
- def _get_level_indexer (self , key , level = 0 ):
5241
- # return a boolean indexer or a slice showing where the key is
5242
+ def _get_level_indexer (self , key , level = 0 , indexer = None ):
5243
+ # return an indexer, boolean array or a slice showing where the key is
5242
5244
# in the totality of values
5245
+ # if the indexer is provided, then use this
5243
5246
5244
5247
level_index = self .levels [level ]
5245
5248
labels = self .labels [level ]
5246
5249
5250
+ def convert_indexer (start , stop , step , indexer = indexer , labels = labels ):
5251
+ # given the inputs and the labels/indexer, compute an indexer set
5252
+ # if we have a provided indexer, then this need not consider
5253
+ # the entire labels set
5254
+
5255
+ r = np .arange (start ,stop ,step )
5256
+ if indexer is not None and len (indexer ) != len (labels ):
5257
+
5258
+ # we have an indexer which maps the locations in the labels that we
5259
+ # have already selected (and is not an indexer for the entire set)
5260
+ # otherwise this is wasteful
5261
+ # so we only need to examine locations that are in this set
5262
+ # the only magic here is that the result are the mappings to the
5263
+ # set that we have selected
5264
+ from pandas import Series
5265
+ mapper = Series (indexer )
5266
+ result = Series (Index (labels .take (indexer )).isin (r ).nonzero ()[0 ])
5267
+ m = result .map (mapper ).values
5268
+
5269
+ else :
5270
+ m = np .zeros (len (labels ),dtype = bool )
5271
+ m [np .in1d (labels ,r ,assume_unique = True )] = True
5272
+
5273
+ return m
5274
+
5247
5275
if isinstance (key , slice ):
5248
5276
# handle a slice, returnig a slice if we can
5249
5277
# otherwise a boolean indexer
@@ -5269,17 +5297,13 @@ def _get_level_indexer(self, key, level=0):
5269
5297
# a partial date slicer on a DatetimeIndex generates a slice
5270
5298
# note that the stop ALREADY includes the stopped point (if
5271
5299
# it was a string sliced)
5272
- m = np .zeros (len (labels ),dtype = bool )
5273
- m [np .in1d (labels ,np .arange (start .start ,stop .stop ,step ))] = True
5274
- return m
5300
+ return convert_indexer (start .start ,stop .stop ,step )
5275
5301
5276
5302
elif level > 0 or self .lexsort_depth == 0 or step is not None :
5277
5303
# need to have like semantics here to right
5278
5304
# searching as when we are using a slice
5279
5305
# so include the stop+1 (so we include stop)
5280
- m = np .zeros (len (labels ),dtype = bool )
5281
- m [np .in1d (labels ,np .arange (start ,stop + 1 ,step ))] = True
5282
- return m
5306
+ return convert_indexer (start ,stop + 1 ,step )
5283
5307
else :
5284
5308
# sorted, so can return slice object -> view
5285
5309
i = labels .searchsorted (start , side = 'left' )
@@ -5317,59 +5341,73 @@ def get_locs(self, tup):
5317
5341
raise KeyError ('MultiIndex Slicing requires the index to be fully lexsorted'
5318
5342
' tuple len ({0}), lexsort depth ({1})' .format (len (tup ), self .lexsort_depth ))
5319
5343
5320
- def _convert_indexer (r ):
5344
+ # indexer
5345
+ # this is the list of all values that we want to select
5346
+ n = len (self )
5347
+ indexer = None
5348
+
5349
+ def _convert_to_indexer (r ):
5350
+ # return an indexer
5321
5351
if isinstance (r , slice ):
5322
- m = np .zeros (len ( self ) ,dtype = bool )
5352
+ m = np .zeros (n ,dtype = bool )
5323
5353
m [r ] = True
5324
- return m
5325
- return r
5354
+ r = m .nonzero ()[0 ]
5355
+ elif is_bool_indexer (r ):
5356
+ if len (r ) != n :
5357
+ raise ValueError ("cannot index with a boolean indexer that is"
5358
+ " not the same length as the index" )
5359
+ r = r .nonzero ()[0 ]
5360
+ return Int64Index (r )
5361
+
5362
+ def _update_indexer (idxr , indexer = indexer ):
5363
+ if indexer is None :
5364
+ indexer = Index (np .arange (n ))
5365
+ if idxr is None :
5366
+ return indexer
5367
+ return indexer & idxr
5326
5368
5327
- ranges = []
5328
5369
for i ,k in enumerate (tup ):
5329
5370
5330
5371
if is_bool_indexer (k ):
5331
5372
# a boolean indexer, must be the same length!
5332
5373
k = np .asarray (k )
5333
- if len (k ) != len (self ):
5334
- raise ValueError ("cannot index with a boolean indexer that is"
5335
- " not the same length as the index" )
5336
- ranges .append (k )
5374
+ indexer = _update_indexer (_convert_to_indexer (k ), indexer = indexer )
5375
+
5337
5376
elif is_list_like (k ):
5338
5377
# a collection of labels to include from this level (these are or'd)
5339
- indexers = []
5378
+ indexers = None
5340
5379
for x in k :
5341
5380
try :
5342
- indexers .append (_convert_indexer (self ._get_level_indexer (x , level = i )))
5381
+ idxrs = _convert_to_indexer (self ._get_level_indexer (x , level = i , indexer = indexer ))
5382
+ indexers = idxrs if indexers is None else indexers | idxrs
5343
5383
except (KeyError ):
5344
5384
5345
5385
# ignore not founds
5346
5386
continue
5347
- if len (k ):
5348
- ranges .append (reduce (np .logical_or , indexers ))
5387
+
5388
+ if indexers is not None :
5389
+ indexer = _update_indexer (indexers , indexer = indexer )
5349
5390
else :
5350
- ranges .append (np .zeros (self .labels [i ].shape , dtype = bool ))
5391
+
5392
+ # no matches we are done
5393
+ return Int64Index ([]).values
5351
5394
5352
5395
elif is_null_slice (k ):
5353
5396
# empty slice
5354
- pass
5397
+ indexer = _update_indexer ( None , indexer = indexer )
5355
5398
5356
5399
elif isinstance (k ,slice ):
5357
5400
5358
5401
# a slice, include BOTH of the labels
5359
- ranges . append ( self ._get_level_indexer (k ,level = i ) )
5402
+ indexer = _update_indexer ( _convert_to_indexer ( self ._get_level_indexer (k ,level = i , indexer = indexer )), indexer = indexer )
5360
5403
else :
5361
5404
# a single label
5362
- ranges .append (self .get_loc_level (k ,level = i ,drop_level = False )[0 ])
5363
-
5364
- # identity
5365
- if len (ranges ) == 0 :
5366
- return slice (0 ,len (self ))
5367
-
5368
- elif len (ranges ) == 1 :
5369
- return ranges [0 ]
5405
+ indexer = _update_indexer (_convert_to_indexer (self .get_loc_level (k ,level = i ,drop_level = False )[0 ]), indexer = indexer )
5370
5406
5371
- # construct a boolean indexer if we have a slice or boolean indexer
5372
- return reduce (np .logical_and ,[ _convert_indexer (r ) for r in ranges ])
5407
+ # empty indexer
5408
+ if indexer is None :
5409
+ return Int64Index ([]).values
5410
+ return indexer .values
5373
5411
5374
5412
def truncate (self , before = None , after = None ):
5375
5413
"""
0 commit comments