Skip to content

Commit 1320ef7

Browse files
gliptakjreback
authored andcommitted
BUG: Correct out-of-bounds error with large indeces
closes pandas-dev#12527 Author: Gábor Lipták <[email protected]> Closes pandas-dev#12921 from gliptak/keyerror1 and squashes the following commits: 02ca451 [Gábor Lipták] Correct out-of-bounds error with large indeces
1 parent fe8f8f4 commit 1320ef7

File tree

4 files changed

+21
-2
lines changed

4 files changed

+21
-2
lines changed

doc/source/whatsnew/v0.18.1.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ Bug Fixes
286286
- Bug in ``.quantile()`` with empty Series may return scalar rather than empty Series (:issue:`12772`)
287287

288288

289-
289+
- Bug in ``.loc`` with out-of-bounds in a large indexer would raise ``IndexError`` rather than ``KeyError`` (:issue:`12527`)
290290
- Bug in equality testing with a ``Categorical`` in a ``DataFrame`` (:issue:`12564`)
291291
- Bug in ``GroupBy.first()``, ``.last()`` returns incorrect row when ``TimeGrouper`` is used (:issue:`7453`)
292292

pandas/index.pyx

+2
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,8 @@ cdef class IndexEngine:
143143
return self._get_loc_duplicates(val)
144144
values = self._get_index_values()
145145
loc = _bin_search(values, val) # .searchsorted(val, side='left')
146+
if loc >= len(values):
147+
raise KeyError(val)
146148
if util.get_value_at(values, loc) != val:
147149
raise KeyError(val)
148150
return loc

pandas/indexes/multi.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -769,7 +769,7 @@ def to_hierarchical(self, n_repeat, n_shuffle=1):
769769
levels = self.levels
770770
labels = [np.repeat(x, n_repeat) for x in self.labels]
771771
# Assumes that each label is divisible by n_shuffle
772-
labels = [x.reshape(n_shuffle, -1).ravel(1) for x in labels]
772+
labels = [x.reshape(n_shuffle, -1).ravel('F') for x in labels]
773773
names = self.names
774774
return MultiIndex(levels=levels, labels=labels, names=names)
775775

pandas/tests/indexes/test_multi.py

+17
Original file line numberDiff line numberDiff line change
@@ -2052,6 +2052,23 @@ def test_equals_operator(self):
20522052
# GH9785
20532053
self.assertTrue((self.index == self.index).all())
20542054

2055+
def test_large_multiindex_error(self):
2056+
# GH12527
2057+
df_below_1000000 = pd.DataFrame(
2058+
1, index=pd.MultiIndex.from_product([[1, 2], range(499999)]),
2059+
columns=['dest'])
2060+
with assertRaises(KeyError):
2061+
df_below_1000000.loc[(-1, 0), 'dest']
2062+
with assertRaises(KeyError):
2063+
df_below_1000000.loc[(3, 0), 'dest']
2064+
df_above_1000000 = pd.DataFrame(
2065+
1, index=pd.MultiIndex.from_product([[1, 2], range(500001)]),
2066+
columns=['dest'])
2067+
with assertRaises(KeyError):
2068+
df_above_1000000.loc[(-1, 0), 'dest']
2069+
with assertRaises(KeyError):
2070+
df_above_1000000.loc[(3, 0), 'dest']
2071+
20552072
def test_partial_string_timestamp_multiindex(self):
20562073
# GH10331
20572074
dr = pd.date_range('2016-01-01', '2016-01-03', freq='12H')

0 commit comments

Comments
 (0)