diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 0b6f6522dfde0..1626ecbf98e80 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -1033,7 +1033,7 @@ Bug Fixes - Bug in ``Index.take`` may add unnecessary ``freq`` attribute (:issue:`10791`) - Bug in ``merge`` with empty ``DataFrame`` may raise ``IndexError`` (:issue:`10824`) - Bug in ``to_latex`` where unexpected keyword argument for some documented arguments (:issue:`10888`) - +- Bug in indexing of large ``DataFrame`` where ``IndexError`` is uncaught (:issue:`10645` and :issue:`10692`) - Bug in ``read_csv`` when using the ``nrows`` or ``chunksize`` parameters if file contains only a header line (:issue:`9535`) - Bug in serialization of ``category`` types in HDF5 in presence of alternate encodings. (:issue:`10366`) - Bug in ``pd.DataFrame`` when constructing an empty DataFrame with a string dtype (:issue:`9428`) diff --git a/pandas/core/index.py b/pandas/core/index.py index 025b8c9d0e250..c60509f00ebac 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -4727,7 +4727,7 @@ def __contains__(self, key): try: self.get_loc(key) return True - except KeyError: + except LookupError: return False def __reduce__(self): diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 71bba3a9edea2..8b4528ef451ef 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1040,7 +1040,7 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False): # if we are a label return me try: return labels.get_loc(obj) - except KeyError: + except LookupError: if isinstance(obj, tuple) and isinstance(labels, MultiIndex): if is_setter and len(obj) == labels.nlevels: return {'key': obj} @@ -1125,7 +1125,7 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False): else: try: return labels.get_loc(obj) - except KeyError: + except LookupError: # allow a not found key only if we are a setter if not is_list_like_indexer(obj) and is_setter: return {'key': obj} diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index c48807365913c..acffbd12d1c9a 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -4602,7 +4602,17 @@ def test_indexing_dtypes_on_empty(self): assert_series_equal(df2.loc[:,'a'], df2.iloc[:,0]) assert_series_equal(df2.loc[:,'a'], df2.ix[:,0]) + def test_large_dataframe_indexing(self): + #GH10692 + result = DataFrame({'x': range(10**6)}) + result.loc[len(result)] = len(result) + 1 + expected = DataFrame({'x': range(10**6 + 1)}) + assert_frame_equal(result, expected) + def test_large_mi_dataframe_indexing(self): + #GH10645 + result = MultiIndex.from_arrays([range(10**6), range(10**6)]) + assert(not (10**6, 0) in result) class TestCategoricalIndex(tm.TestCase):