Skip to content

Commit f4364a6

Browse files
committed
BUG: #10645 in using MultiIndex.__contains__
1 parent 355b462 commit f4364a6

File tree

3 files changed

+14
-0
lines changed

3 files changed

+14
-0
lines changed

doc/source/whatsnew/v0.17.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -398,3 +398,4 @@ Bug Fixes
398398
- Bug in vectorised setting of timestamp columns with python ``datetime.date`` and numpy ``datetime64`` (:issue:`10408`, :issue:`10412`)
399399
- Bug in ``pd.DataFrame`` when constructing an empty DataFrame with a string dtype (:issue:`9428`)
400400

401+
- Bug in ``MultiIndex.__contains__`` throws an ``IndexError`` for large multiindices (:issue:`10645`)

pandas/index.pyx

+5
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,11 @@ cdef class IndexEngine:
143143
return self._get_loc_duplicates(val)
144144
values = self._get_index_values()
145145
loc = _bin_search(values, val) # .searchsorted(val, side='left')
146+
147+
# GH10675
148+
if len(values) <= loc or 0 > loc:
149+
raise KeyError(val)
150+
146151
if util.get_value_at(values, loc) != val:
147152
raise KeyError(val)
148153
return loc

pandas/tests/test_index.py

+8
Original file line numberDiff line numberDiff line change
@@ -1220,6 +1220,14 @@ def test_get_loc(self):
12201220
with tm.assertRaises(TypeError):
12211221
idx.get_loc('a', method='nearest')
12221222

1223+
def test_get_loc_keyerror(self):
1224+
# GH10645
1225+
mi = pd.MultiIndex.from_arrays([range(100), range(100)])
1226+
self.assertRaises(KeyError, lambda: mi.get_loc((1000001, 0)))
1227+
1228+
mi = pd.MultiIndex.from_arrays([range(1000000), range(1000000)])
1229+
self.assertRaises(KeyError, lambda: mi.get_loc((1000001, 0)))
1230+
12231231
def test_slice_locs(self):
12241232
for dtype in [int, float]:
12251233
idx = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=dtype))

0 commit comments

Comments
 (0)