Skip to content

Commit 52db085

Browse files
jbrockmendelphofl
authored andcommitted
REF/PERF: dont use hashtable in IndexEngine.__contains__ (pandas-dev#45192)
1 parent ebd2049 commit 52db085

File tree

4 files changed

+13
-24
lines changed

4 files changed

+13
-24
lines changed

pandas/_libs/index.pyx

+8-5
Original file line numberDiff line numberDiff line change
@@ -128,10 +128,12 @@ cdef class IndexEngine:
128128
self._np_type = values.dtype.type
129129

130130
def __contains__(self, val: object) -> bool:
131-
# We assume before we get here:
132-
# - val is hashable
133-
self._ensure_mapping_populated()
134-
return val in self.mapping
131+
hash(val)
132+
try:
133+
self.get_loc(val)
134+
except KeyError:
135+
return False
136+
return True
135137

136138
cpdef get_loc(self, object val):
137139
# -> Py_ssize_t | slice | ndarray[bool]
@@ -141,7 +143,7 @@ cdef class IndexEngine:
141143
if is_definitely_invalid_key(val):
142144
raise TypeError(f"'{val}' is an invalid key")
143145

144-
self._check_type(val)
146+
val = self._check_type(val)
145147

146148
if self.over_size_threshold and self.is_monotonic_increasing:
147149
if not self.is_unique:
@@ -270,6 +272,7 @@ cdef class IndexEngine:
270272

271273
cdef _check_type(self, object val):
272274
hash(val)
275+
return val
273276

274277
@property
275278
def is_mapping_populated(self) -> bool:

pandas/_libs/index_class_helper.pxi.in

+5
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@ cdef class {{name}}Engine(IndexEngine):
3535
cdef _check_type(self, object val):
3636
{{if name not in {'Float64', 'Float32'} }}
3737
if not util.is_integer_object(val):
38+
if util.is_float_object(val):
39+
# Make sure Int64Index.get_loc(2.0) works
40+
if val.is_integer():
41+
return int(val)
3842
raise KeyError(val)
3943
{{if name.startswith("U")}}
4044
if val < 0:
@@ -46,6 +50,7 @@ cdef class {{name}}Engine(IndexEngine):
4650
# in particular catch bool and avoid casting True -> 1.0
4751
raise KeyError(val)
4852
{{endif}}
53+
return val
4954

5055

5156
{{endfor}}

pandas/core/indexes/base.py

-2
Original file line numberDiff line numberDiff line change
@@ -6422,8 +6422,6 @@ def _maybe_cast_indexer(self, key):
64226422
If we have a float key and are not a floating index, then try to cast
64236423
to an int if equivalent.
64246424
"""
6425-
if not self.is_floating():
6426-
return com.cast_scalar_indexer(key)
64276425
return key
64286426

64296427
def _maybe_cast_listlike_indexer(self, target) -> Index:

pandas/core/indexes/numeric.py

-17
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525

2626
from pandas.core.dtypes.common import (
2727
is_dtype_equal,
28-
is_float,
2928
is_float_dtype,
3029
is_integer_dtype,
3130
is_numeric_dtype,
@@ -213,22 +212,6 @@ def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None:
213212
# dtype for Int64Index, UInt64Index etc. Needed for backwards compat.
214213
return cls._default_dtype
215214

216-
def __contains__(self, key) -> bool:
217-
"""
218-
Check if key is a float and has a decimal. If it has, return False.
219-
"""
220-
if not is_integer_dtype(self.dtype):
221-
return super().__contains__(key)
222-
223-
hash(key)
224-
try:
225-
if is_float(key) and int(key) != key:
226-
# otherwise the `key in self._engine` check casts e.g. 1.1 -> 1
227-
return False
228-
return key in self._engine
229-
except (OverflowError, TypeError, ValueError):
230-
return False
231-
232215
# ----------------------------------------------------------------
233216
# Indexing Methods
234217

0 commit comments

Comments
 (0)