@@ -105,6 +105,7 @@ class Index(IndexOpsMixin, PandasObject):
105
105
_is_numeric_dtype = False
106
106
107
107
_engine_type = _index .ObjectEngine
108
+ _isin_type = lib .ismember
108
109
109
110
def __new__ (cls , data = None , dtype = None , copy = False , name = None , fastpath = False ,
110
111
tupleize_cols = True , ** kwargs ):
@@ -1838,7 +1839,7 @@ def isin(self, values, level=None):
1838
1839
value_set = set (values )
1839
1840
if level is not None :
1840
1841
self ._validate_index_level (level )
1841
- return lib . ismember (np .array (self ), value_set )
1842
+ return self . _isin_type (np .array (self ), value_set )
1842
1843
1843
1844
def _can_reindex (self , indexer ):
1844
1845
"""
@@ -3379,6 +3380,7 @@ class Int64Index(NumericIndex):
3379
3380
_outer_indexer = _algos .outer_join_indexer_int64
3380
3381
3381
3382
_engine_type = _index .Int64Engine
3383
+ _isin_type = lib .ismember_int64
3382
3384
3383
3385
def __new__ (cls , data = None , dtype = None , copy = False , name = None , fastpath = False , ** kwargs ):
3384
3386
@@ -5235,13 +5237,39 @@ def partial_selection(key, indexer=None):
5235
5237
indexer = self ._get_level_indexer (key , level = level )
5236
5238
return indexer , maybe_droplevels (indexer , [level ], drop_level )
5237
5239
5238
- def _get_level_indexer (self , key , level = 0 ):
5239
- # return a boolean indexer or a slice showing where the key is
5240
+ def _get_level_indexer (self , key , level = 0 , indexer = None ):
5241
+ # return an indexer, boolean array or a slice showing where the key is
5240
5242
# in the totality of values
5243
+ # if the indexer is provided, then use this
5241
5244
5242
5245
level_index = self .levels [level ]
5243
5246
labels = self .labels [level ]
5244
5247
5248
+ def convert_indexer (start , stop , step , indexer = indexer , labels = labels ):
5249
+ # given the inputs and the labels/indexer, compute an indexer set
5250
+ # if we have a provided indexer, then this need not consider
5251
+ # the entire labels set
5252
+
5253
+ r = np .arange (start ,stop ,step )
5254
+ if indexer is not None and len (indexer ) != len (labels ):
5255
+
5256
+ # we have an indexer which maps the locations in the labels that we
5257
+ # have already selected (and is not an indexer for the entire set)
5258
+ # otherwise this is wasteful
5259
+ # so we only need to examine locations that are in this set
5260
+ # the only magic here is that the result are the mappings to the
5261
+ # set that we have selected
5262
+ from pandas import Series
5263
+ mapper = Series (indexer )
5264
+ result = Series (Index (labels .take (indexer )).isin (r ).nonzero ()[0 ])
5265
+ m = result .map (mapper ).values
5266
+
5267
+ else :
5268
+ m = np .zeros (len (labels ),dtype = bool )
5269
+ m [np .in1d (labels ,r ,assume_unique = True )] = True
5270
+
5271
+ return m
5272
+
5245
5273
if isinstance (key , slice ):
5246
5274
# handle a slice, returnig a slice if we can
5247
5275
# otherwise a boolean indexer
@@ -5267,17 +5295,13 @@ def _get_level_indexer(self, key, level=0):
5267
5295
# a partial date slicer on a DatetimeIndex generates a slice
5268
5296
# note that the stop ALREADY includes the stopped point (if
5269
5297
# it was a string sliced)
5270
- m = np .zeros (len (labels ),dtype = bool )
5271
- m [np .in1d (labels ,np .arange (start .start ,stop .stop ,step ))] = True
5272
- return m
5298
+ return convert_indexer (start .start ,stop .stop ,step )
5273
5299
5274
5300
elif level > 0 or self .lexsort_depth == 0 or step is not None :
5275
5301
# need to have like semantics here to right
5276
5302
# searching as when we are using a slice
5277
5303
# so include the stop+1 (so we include stop)
5278
- m = np .zeros (len (labels ),dtype = bool )
5279
- m [np .in1d (labels ,np .arange (start ,stop + 1 ,step ))] = True
5280
- return m
5304
+ return convert_indexer (start ,stop + 1 ,step )
5281
5305
else :
5282
5306
# sorted, so can return slice object -> view
5283
5307
i = labels .searchsorted (start , side = 'left' )
@@ -5315,59 +5339,73 @@ def get_locs(self, tup):
5315
5339
raise KeyError ('MultiIndex Slicing requires the index to be fully lexsorted'
5316
5340
' tuple len ({0}), lexsort depth ({1})' .format (len (tup ), self .lexsort_depth ))
5317
5341
5318
- def _convert_indexer (r ):
5342
+ # indexer
5343
+ # this is the list of all values that we want to select
5344
+ n = len (self )
5345
+ indexer = None
5346
+
5347
+ def _convert_to_indexer (r ):
5348
+ # return an indexer
5319
5349
if isinstance (r , slice ):
5320
- m = np .zeros (len ( self ) ,dtype = bool )
5350
+ m = np .zeros (n ,dtype = bool )
5321
5351
m [r ] = True
5322
- return m
5323
- return r
5352
+ r = m .nonzero ()[0 ]
5353
+ elif is_bool_indexer (r ):
5354
+ if len (r ) != n :
5355
+ raise ValueError ("cannot index with a boolean indexer that is"
5356
+ " not the same length as the index" )
5357
+ r = r .nonzero ()[0 ]
5358
+ return Int64Index (r )
5359
+
5360
+ def _update_indexer (idxr , indexer = indexer ):
5361
+ if indexer is None :
5362
+ indexer = Index (np .arange (n ))
5363
+ if idxr is None :
5364
+ return indexer
5365
+ return indexer & idxr
5324
5366
5325
- ranges = []
5326
5367
for i ,k in enumerate (tup ):
5327
5368
5328
5369
if is_bool_indexer (k ):
5329
5370
# a boolean indexer, must be the same length!
5330
5371
k = np .asarray (k )
5331
- if len (k ) != len (self ):
5332
- raise ValueError ("cannot index with a boolean indexer that is"
5333
- " not the same length as the index" )
5334
- ranges .append (k )
5372
+ indexer = _update_indexer (_convert_to_indexer (k ), indexer = indexer )
5373
+
5335
5374
elif is_list_like (k ):
5336
5375
# a collection of labels to include from this level (these are or'd)
5337
- indexers = []
5376
+ indexers = None
5338
5377
for x in k :
5339
5378
try :
5340
- indexers .append (_convert_indexer (self ._get_level_indexer (x , level = i )))
5379
+ idxrs = _convert_to_indexer (self ._get_level_indexer (x , level = i , indexer = indexer ))
5380
+ indexers = idxrs if indexers is None else indexers | idxrs
5341
5381
except (KeyError ):
5342
5382
5343
5383
# ignore not founds
5344
5384
continue
5345
- if len (k ):
5346
- ranges .append (reduce (np .logical_or , indexers ))
5385
+
5386
+ if indexers is not None :
5387
+ indexer = _update_indexer (indexers , indexer = indexer )
5347
5388
else :
5348
- ranges .append (np .zeros (self .labels [i ].shape , dtype = bool ))
5389
+
5390
+ # no matches we are done
5391
+ return Int64Index ([]).values
5349
5392
5350
5393
elif is_null_slice (k ):
5351
5394
# empty slice
5352
- pass
5395
+ indexer = _update_indexer ( None , indexer = indexer )
5353
5396
5354
5397
elif isinstance (k ,slice ):
5355
5398
5356
5399
# a slice, include BOTH of the labels
5357
- ranges . append ( self ._get_level_indexer (k ,level = i ) )
5400
+ indexer = _update_indexer ( _convert_to_indexer ( self ._get_level_indexer (k ,level = i , indexer = indexer )), indexer = indexer )
5358
5401
else :
5359
5402
# a single label
5360
- ranges .append (self .get_loc_level (k ,level = i ,drop_level = False )[0 ])
5361
-
5362
- # identity
5363
- if len (ranges ) == 0 :
5364
- return slice (0 ,len (self ))
5365
-
5366
- elif len (ranges ) == 1 :
5367
- return ranges [0 ]
5403
+ indexer = _update_indexer (_convert_to_indexer (self .get_loc_level (k ,level = i ,drop_level = False )[0 ]), indexer = indexer )
5368
5404
5369
- # construct a boolean indexer if we have a slice or boolean indexer
5370
- return reduce (np .logical_and ,[ _convert_indexer (r ) for r in ranges ])
5405
+ # empty indexer
5406
+ if indexer is None :
5407
+ return Int64Index ([]).values
5408
+ return indexer .values
5371
5409
5372
5410
def truncate (self , before = None , after = None ):
5373
5411
"""
0 commit comments