@@ -44,18 +44,27 @@ def get_value_at(ndarray arr, object loc):
44
44
def set_value_at (ndarray arr , object loc , object val ):
45
45
return util.set_value_at(arr, loc, val)
46
46
47
+
48
+ # Don't populate hash tables in monotonic indexes larger than this
49
+ cdef int _SIZE_CUTOFF = 1000000
50
+
51
+
47
52
cdef class IndexEngine:
48
53
49
54
cdef readonly:
50
55
object index_weakref
51
56
HashTable mapping
57
+ bint over_size_threshold
52
58
53
59
cdef:
54
60
bint unique, monotonic
55
61
bint initialized, monotonic_check, unique_check
56
62
57
63
def __init__ (self , index_weakref ):
58
64
self .index_weakref = index_weakref
65
+
66
+ self .over_size_threshold = len (index_weakref()) >= _SIZE_CUTOFF
67
+
59
68
self .initialized = 0
60
69
self .monotonic_check = 0
61
70
@@ -101,6 +110,15 @@ cdef class IndexEngine:
101
110
if is_definitely_invalid_key(val):
102
111
raise TypeError
103
112
113
+ if self .over_size_threshold and self .is_monotonic:
114
+ if not self .is_unique:
115
+ return self ._get_loc_duplicates(val)
116
+ values = self ._get_index_values()
117
+ loc = values.searchsorted(val, side = ' left' )
118
+ if util.get_value_at(values, loc) != val:
119
+ raise KeyError (val)
120
+ return loc
121
+
104
122
self ._ensure_mapping_populated()
105
123
if not self .unique:
106
124
return self ._get_loc_duplicates(val)
@@ -337,19 +355,17 @@ cdef class ObjectEngine(IndexEngine):
337
355
338
356
cdef class DatetimeEngine(Int64Engine):
339
357
340
- # cdef Int64HashTable mapping
341
-
342
358
def __contains__ (self , object val ):
343
- self ._ensure_mapping_populated()
344
-
345
- if util.is_datetime64_object(val):
346
- return val.view(' i8' ) in self .mapping
347
-
348
- if PyDateTime_Check(val):
349
- key = np.datetime64(val)
350
- return key.view(' i8' ) in self .mapping
359
+ if self .over_size_threshold and self .is_monotonic:
360
+ if not self .is_unique:
361
+ return self ._get_loc_duplicates(val)
362
+ values = self ._get_index_values()
363
+ conv = _to_i8(val)
364
+ loc = values.searchsorted(conv, side = ' left' )
365
+ return util.get_value_at(values, loc) == conv
351
366
352
- return val in self .mapping
367
+ self ._ensure_mapping_populated()
368
+ return _to_i8(val) in self .mapping
353
369
354
370
cdef _get_index_values(self ):
355
371
return self .index_weakref().values.view(' i8' )
@@ -363,13 +379,19 @@ cdef class DatetimeEngine(Int64Engine):
363
379
364
380
# Welcome to the spaghetti factory
365
381
382
+ if self .over_size_threshold and self .is_monotonic:
383
+ if not self .is_unique:
384
+ return self ._get_loc_duplicates(val)
385
+ values = self ._get_index_values()
386
+ conv = _to_i8(val)
387
+ loc = values.searchsorted(conv, side = ' left' )
388
+ if util.get_value_at(values, loc) != conv:
389
+ raise KeyError (val)
390
+ return loc
391
+
366
392
self ._ensure_mapping_populated()
367
393
if not self .unique:
368
- if util.is_datetime64_object(val):
369
- val = val.view(' i8' )
370
- elif PyDateTime_Check(val):
371
- val = np.datetime64(val)
372
- val = val.view(' i8' )
394
+ val = _to_i8(val)
373
395
return self ._get_loc_duplicates(val)
374
396
375
397
try :
@@ -380,11 +402,7 @@ cdef class DatetimeEngine(Int64Engine):
380
402
pass
381
403
382
404
try :
383
- if util.is_datetime64_object(val):
384
- val = val.view(' i8' )
385
- elif PyDateTime_Check(val):
386
- val = np.datetime64(val)
387
- val = val.view(' i8' )
405
+ val = _to_i8(val)
388
406
return self .mapping.get_item(val)
389
407
except TypeError :
390
408
self ._date_check_type(val)
@@ -417,6 +435,14 @@ cdef class DatetimeEngine(Int64Engine):
417
435
limit = limit)
418
436
419
437
438
+ cdef inline _to_i8(object val):
439
+ if util.is_datetime64_object(val):
440
+ val = unbox_datetime64_scalar(val)
441
+ elif PyDateTime_Check(val):
442
+ val = np.datetime64(val)
443
+ val = unbox_datetime64_scalar(val)
444
+ return val
445
+
420
446
# ctypedef fused idxvalue_t:
421
447
# object
422
448
# int
0 commit comments