Skip to content

PERF: do not check for label presence preventively #21594

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 25, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ Other Enhancements
- :func:`to_datetime` now supports the ``%Z`` and ``%z`` directive when passed into ``format`` (:issue:`13486`)
- :func:`Series.mode` and :func:`DataFrame.mode` now support the ``dropna`` parameter which can be used to specify whether NaN/NaT values should be considered (:issue:`17534`)
- :func:`to_csv` now supports ``compression`` keyword when a file handle is passed. (:issue:`21227`)
- :meth:`Index.droplevel` is now implemented also for flat indexes, for compatibility with MultiIndex (:issue:`21115`)
- :meth:`Index.droplevel` is now implemented also for flat indexes, for compatibility with :class:`MultiIndex` (:issue:`21115`)


.. _whatsnew_0240.api_breaking:
Expand Down Expand Up @@ -199,6 +199,7 @@ Indexing
^^^^^^^^

- The traceback from a ``KeyError`` when asking ``.loc`` for a single missing label is now shorter and more clear (:issue:`21557`)
- When ``.ix`` is asked for a missing integer label in a :class:`MultiIndex` with a first level of integer type, it now raises a ``KeyError`` - consistently with the case of a flat :class:`Int64Index` - rather than falling back to positional indexing (:issue:`21593`)
-
-

Expand Down
36 changes: 5 additions & 31 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
is_iterator,
is_scalar,
is_sparse,
_is_unorderable_exception,
_ensure_platform_int)
from pandas.core.dtypes.missing import isna, _infer_fill_value
from pandas.errors import AbstractMethodError
Expand Down Expand Up @@ -139,10 +138,7 @@ def _get_label(self, label, axis=None):
# as its basically direct indexing
# but will fail when the index is not present
# see GH5667
try:
return self.obj._xs(label, axis=axis)
except:
return self.obj[label]
return self.obj._xs(label, axis=axis)
elif isinstance(label, tuple) and isinstance(label[axis], slice):
raise IndexingError('no slices here, handle elsewhere')

Expand Down Expand Up @@ -1797,42 +1793,20 @@ class _LocIndexer(_LocationIndexer):

@Appender(_NDFrameIndexer._validate_key.__doc__)
def _validate_key(self, key, axis):
ax = self.obj._get_axis(axis)

# valid for a label where all labels are in the index
# valid for a collection of labels (we check their presence later)
# slice of labels (where start-end in labels)
# slice of integers (only if in the labels)
# boolean

if isinstance(key, slice):
return

elif com.is_bool_indexer(key):
if com.is_bool_indexer(key):
return

elif not is_list_like_indexer(key):

def error():
if isna(key):
raise TypeError("cannot use label indexing with a null "
"key")
raise KeyError(u"the label [{key}] is not in the [{axis}]"
.format(key=key,
axis=self.obj._get_axis_name(axis)))

try:
key = self._convert_scalar_indexer(key, axis)
except TypeError as e:

# python 3 type errors should be raised
if _is_unorderable_exception(e):
error()
raise
except:
error()

if not ax.contains(key):
error()
if not is_list_like_indexer(key):
self._convert_scalar_indexer(key, axis)

def _is_scalar_access(self, key):
# this is a shortcut accessor to both .loc and .iloc
Expand Down
8 changes: 5 additions & 3 deletions pandas/tests/indexes/datetimes/test_partial_slicing.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
date_range, Index, Timedelta, Timestamp)
from pandas.util import testing as tm

from pandas.core.indexing import IndexingError


class TestSlicing(object):
def test_dti_slicing(self):
Expand Down Expand Up @@ -313,12 +315,12 @@ def test_partial_slicing_with_multiindex(self):
result = df_multi.loc[('2013-06-19 09:30:00', 'ACCT1', 'ABC')]
tm.assert_series_equal(result, expected)

# this is a KeyError as we don't do partial string selection on
# multi-levels
# this is an IndexingError as we don't do partial string selection on
# multi-levels.
def f():
df_multi.loc[('2013-06-19', 'ACCT1', 'ABC')]

pytest.raises(KeyError, f)
pytest.raises(IndexingError, f)

# GH 4294
# partial slice on a series mi
Expand Down
9 changes: 8 additions & 1 deletion pandas/tests/indexing/test_multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,8 @@ def test_iloc_getitem_multiindex(self):
# corner column
rs = mi_int.iloc[2, 2]
with catch_warnings(record=True):
xp = mi_int.ix[:, 2].ix[2]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not that we care too much, but is this case looks slightly different from below (where testing for .ix)?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, because as long as first level is non-int, .ix is fine. Adding a comment.

# First level is int - so use .loc rather than .ix (GH 21593)
xp = mi_int.loc[(8, 12), (4, 10)]
assert rs == xp

# this is basically regular indexing
Expand Down Expand Up @@ -278,6 +279,12 @@ def test_loc_multiindex(self):
xp = mi_int.ix[4]
tm.assert_frame_equal(rs, xp)

# missing label
pytest.raises(KeyError, lambda: mi_int.loc[2])
with catch_warnings(record=True):
# GH 21593
pytest.raises(KeyError, lambda: mi_int.ix[2])

def test_getitem_partial_int(self):
# GH 12416
# with single item
Expand Down