Skip to content

Commit f8225c0

Browse files
committed
Merge pull request #8003 from jreback/indexing
API: consistency in .loc indexing when no values are found in a list-like indexer GH7999)
2 parents 70be935 + 8bf187c commit f8225c0

File tree

4 files changed

+117
-40
lines changed

4 files changed

+117
-40
lines changed

doc/source/indexing.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,7 @@ Selection By Label
282282
See :ref:`Returning a View versus Copy <indexing.view_versus_copy>`
283283

284284
pandas provides a suite of methods in order to have **purely label based indexing**. This is a strict inclusion based protocol.
285-
**ALL** of the labels for which you ask, must be in the index or a ``KeyError`` will be raised! When slicing, the start bound is *included*, **AND** the stop bound is *included*. Integers are valid labels, but they refer to the label **and not the position**.
285+
**at least 1** of the labels for which you ask, must be in the index or a ``KeyError`` will be raised! When slicing, the start bound is *included*, **AND** the stop bound is *included*. Integers are valid labels, but they refer to the label **and not the position**.
286286

287287
The ``.loc`` attribute is the primary access method. The following are valid inputs:
288288

doc/source/v0.15.0.txt

+44
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,50 @@ API changes
178178
as the ``left`` argument. (:issue:`7737`)
179179

180180
- Histogram from ``DataFrame.plot`` with ``kind='hist'`` (:issue:`7809`), See :ref:`the docs<visualization.hist>`.
181+
- Consistency when indexing with ``.loc`` and a list-like indexer when no values are found.
182+
183+
.. ipython:: python
184+
185+
df = DataFrame([['a'],['b']],index=[1,2])
186+
df
187+
188+
In prior versions there was a difference in these two constructs:
189+
190+
- ``df.loc[[3]]`` would (prior to 0.15.0) return a frame reindexed by 3 (with all ``np.nan`` values)
191+
- ``df.loc[[3],:]`` would raise ``KeyError``.
192+
193+
Both will now raise a ``KeyError``. The rule is that *at least 1* indexer must be found when using a list-like and ``.loc`` (:issue:`7999`)
194+
195+
There was also a difference between ``df.loc[[1,3]]`` (returns a frame reindexed by ``[1, 3]``) and ``df.loc[[1, 3],:]`` (would raise ``KeyError`` prior to 0.15.0). Both will now return a reindexed frame.
196+
197+
.. ipython:: python
198+
199+
df.loc[[1,3]]
200+
df.loc[[1,3],:]
201+
202+
This can also be seen in multi-axis indexing with a ``Panel``.
203+
204+
.. ipython:: python
205+
206+
p = Panel(np.arange(2*3*4).reshape(2,3,4),
207+
items=['ItemA','ItemB'],major_axis=[1,2,3],minor_axis=['A','B','C','D'])
208+
p
209+
210+
The following would raise ``KeyError`` prior to 0.15.0:
211+
212+
.. ipython:: python
213+
214+
p.loc[['ItemA','ItemD'],:,'D']
215+
216+
Furthermore, ``.loc`` will raise If no values are found in a multi-index with a list-like indexer:
217+
218+
.. ipython:: python
219+
:okexcept:
220+
221+
s = Series(np.arange(3,dtype='int64'),index=MultiIndex.from_product([['A'],['foo','bar','baz']],
222+
names=['one','two'])).sortlevel()
223+
s
224+
s.loc[['D']]
181225

182226
.. _whatsnew_0150.dt:
183227

pandas/core/indexing.py

+36-15
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,16 @@ def _has_valid_tuple(self, key):
132132
raise ValueError("Location based indexing can only have [%s] "
133133
"types" % self._valid_types)
134134

135+
def _should_validate_iterable(self, axis=0):
136+
""" return a boolean whether this axes needs validation for a passed iterable """
137+
ax = self.obj._get_axis(axis)
138+
if isinstance(ax, MultiIndex):
139+
return False
140+
elif ax.is_floating():
141+
return False
142+
143+
return True
144+
135145
def _is_nested_tuple_indexer(self, tup):
136146
if any([ isinstance(ax, MultiIndex) for ax in self.obj.axes ]):
137147
return any([ _is_nested_tuple(tup,ax) for ax in self.obj.axes ])
@@ -762,7 +772,7 @@ def _getitem_lowerdim(self, tup):
762772
# we can directly get the axis result since the axis is specified
763773
if self.axis is not None:
764774
axis = self.obj._get_axis_number(self.axis)
765-
return self._getitem_axis(tup, axis=axis, validate_iterable=True)
775+
return self._getitem_axis(tup, axis=axis)
766776

767777
# we may have a nested tuples indexer here
768778
if self._is_nested_tuple_indexer(tup):
@@ -825,7 +835,7 @@ def _getitem_nested_tuple(self, tup):
825835
return result
826836

827837
# this is a series with a multi-index specified a tuple of selectors
828-
return self._getitem_axis(tup, axis=0, validate_iterable=True)
838+
return self._getitem_axis(tup, axis=0)
829839

830840
# handle the multi-axis by taking sections and reducing
831841
# this is iterative
@@ -838,7 +848,7 @@ def _getitem_nested_tuple(self, tup):
838848
continue
839849

840850
current_ndim = obj.ndim
841-
obj = getattr(obj, self.name)._getitem_axis(key, axis=axis, validate_iterable=True)
851+
obj = getattr(obj, self.name)._getitem_axis(key, axis=axis)
842852
axis += 1
843853

844854
# if we have a scalar, we are done
@@ -859,9 +869,11 @@ def _getitem_nested_tuple(self, tup):
859869

860870
return obj
861871

862-
def _getitem_axis(self, key, axis=0, validate_iterable=False):
872+
def _getitem_axis(self, key, axis=0):
873+
874+
if self._should_validate_iterable(axis):
875+
self._has_valid_type(key, axis)
863876

864-
self._has_valid_type(key, axis)
865877
labels = self.obj._get_axis(axis)
866878
if isinstance(key, slice):
867879
return self._get_slice_axis(key, axis=axis)
@@ -888,17 +900,29 @@ def _getitem_axis(self, key, axis=0, validate_iterable=False):
888900
return self._get_label(key, axis=axis)
889901

890902
def _getitem_iterable(self, key, axis=0):
903+
if self._should_validate_iterable(axis):
904+
self._has_valid_type(key, axis)
905+
891906
labels = self.obj._get_axis(axis)
892907

893908
def _reindex(keys, level=None):
909+
894910
try:
895-
return self.obj.reindex_axis(keys, axis=axis, level=level)
911+
result = self.obj.reindex_axis(keys, axis=axis, level=level)
896912
except AttributeError:
897913
# Series
898914
if axis != 0:
899915
raise AssertionError('axis must be 0')
900916
return self.obj.reindex(keys, level=level)
901917

918+
# this is an error as we are trying to find
919+
# keys in a multi-index that don't exist
920+
if isinstance(labels, MultiIndex) and level is not None:
921+
if hasattr(result,'ndim') and not np.prod(result.shape) and len(keys):
922+
raise KeyError("cannot index a multi-index axis with these keys")
923+
924+
return result
925+
902926
if com._is_bool_indexer(key):
903927
key = _check_bool_indexer(labels, key)
904928
inds, = key.nonzero()
@@ -1149,7 +1173,7 @@ def __getitem__(self, key):
11491173
else:
11501174
return self._getitem_axis(key, axis=0)
11511175

1152-
def _getitem_axis(self, key, axis=0, validate_iterable=False):
1176+
def _getitem_axis(self, key, axis=0):
11531177
raise NotImplementedError()
11541178

11551179
def _getbool_axis(self, key, axis=0):
@@ -1223,11 +1247,11 @@ def _has_valid_type(self, key, axis):
12231247
if isinstance(key, tuple) and isinstance(ax, MultiIndex):
12241248
return True
12251249

1226-
# require all elements in the index
1250+
# require at least 1 element in the index
12271251
idx = _ensure_index(key)
1228-
if not idx.isin(ax).all():
1252+
if len(idx) and not idx.isin(ax).any():
12291253

1230-
raise KeyError("[%s] are not in ALL in the [%s]" %
1254+
raise KeyError("None of [%s] are in the [%s]" %
12311255
(key, self.obj._get_axis_name(axis)))
12321256

12331257
return True
@@ -1256,7 +1280,7 @@ def error():
12561280

12571281
return True
12581282

1259-
def _getitem_axis(self, key, axis=0, validate_iterable=False):
1283+
def _getitem_axis(self, key, axis=0):
12601284
labels = self.obj._get_axis(axis)
12611285

12621286
if isinstance(key, slice):
@@ -1280,9 +1304,6 @@ def _getitem_axis(self, key, axis=0, validate_iterable=False):
12801304
if hasattr(key, 'ndim') and key.ndim > 1:
12811305
raise ValueError('Cannot index with multidimensional key')
12821306

1283-
if validate_iterable:
1284-
self._has_valid_type(key, axis)
1285-
12861307
return self._getitem_iterable(key, axis=axis)
12871308

12881309
# nested tuple slicing
@@ -1389,7 +1410,7 @@ def _get_slice_axis(self, slice_obj, axis=0):
13891410
else:
13901411
return self.obj.take(slice_obj, axis=axis, convert=False)
13911412

1392-
def _getitem_axis(self, key, axis=0, validate_iterable=False):
1413+
def _getitem_axis(self, key, axis=0):
13931414

13941415
if isinstance(key, slice):
13951416
self._has_valid_type(key, axis)

pandas/tests/test_indexing.py

+36-24
Original file line numberDiff line numberDiff line change
@@ -743,11 +743,14 @@ def test_loc_getitem_label_list(self):
743743
self.check_result('list lbl', 'loc', [Timestamp('20130102'),Timestamp('20130103')], 'ix',
744744
[Timestamp('20130102'),Timestamp('20130103')], typs = ['ts'], axes=0)
745745

746-
# fails
747746
self.check_result('list lbl', 'loc', [0,1,2], 'indexer', [0,1,2], typs = ['empty'], fails = KeyError)
748747
self.check_result('list lbl', 'loc', [0,2,3], 'ix', [0,2,3], typs = ['ints'], axes=0, fails = KeyError)
749-
self.check_result('list lbl', 'loc', [3,6,7], 'ix', [3,6,9], typs = ['ints'], axes=1, fails = KeyError)
750-
self.check_result('list lbl', 'loc', [4,8,10], 'ix', [4,8,12], typs = ['ints'], axes=2, fails = KeyError)
748+
self.check_result('list lbl', 'loc', [3,6,7], 'ix', [3,6,7], typs = ['ints'], axes=1, fails = KeyError)
749+
self.check_result('list lbl', 'loc', [4,8,10], 'ix', [4,8,10], typs = ['ints'], axes=2, fails = KeyError)
750+
751+
# fails
752+
self.check_result('list lbl', 'loc', [20,30,40], 'ix', [20,30,40], typs = ['ints'], axes=1, fails = KeyError)
753+
self.check_result('list lbl', 'loc', [20,30,40], 'ix', [20,30,40], typs = ['ints'], axes=2, fails = KeyError)
751754

752755
# array like
753756
self.check_result('array like', 'loc', Series(index=[0,2,4]).index, 'ix', [0,2,4], typs = ['ints'], axes=0)
@@ -815,30 +818,35 @@ def test_loc_to_fail(self):
815818
s.loc['a'] = 2
816819

817820
self.assertRaises(KeyError, lambda : s.loc[-1])
821+
self.assertRaises(KeyError, lambda : s.loc[[-1, -2]])
818822

819-
result = s.loc[[-1, -2]]
820-
expected = Series(np.nan,index=[-1,-2])
821-
assert_series_equal(result, expected)
822-
823-
result = s.loc[['4']]
824-
expected = Series(np.nan,index=['4'])
825-
assert_series_equal(result, expected)
823+
self.assertRaises(KeyError, lambda : s.loc[['4']])
826824

827825
s.loc[-1] = 3
828826
result = s.loc[[-1,-2]]
829827
expected = Series([3,np.nan],index=[-1,-2])
830828
assert_series_equal(result, expected)
831829

832830
s['a'] = 2
833-
result = s.loc[[-2]]
834-
expected = Series([np.nan],index=[-2])
835-
assert_series_equal(result, expected)
831+
self.assertRaises(KeyError, lambda : s.loc[[-2]])
836832

837833
del s['a']
838834
def f():
839835
s.loc[[-2]] = 0
840836
self.assertRaises(KeyError, f)
841837

838+
# inconsistency between .loc[values] and .loc[values,:]
839+
# GH 7999
840+
df = DataFrame([['a'],['b']],index=[1,2],columns=['value'])
841+
842+
def f():
843+
df.loc[[3],:]
844+
self.assertRaises(KeyError, f)
845+
846+
def f():
847+
df.loc[[3]]
848+
self.assertRaises(KeyError, f)
849+
842850
def test_loc_getitem_label_slice(self):
843851

844852
# label slices (with ints)
@@ -1575,11 +1583,13 @@ def f():
15751583
self.assertRaises(ValueError, f)
15761584

15771585
# ambiguous cases
1578-
# these can be multiply interpreted
1579-
# but we can catch this in some cases
1580-
def f():
1581-
df.loc[(slice(None),[1])]
1582-
self.assertRaises(KeyError, f)
1586+
# these can be multiply interpreted (e.g. in this case
1587+
# as df.loc[slice(None),[1]] as well
1588+
self.assertRaises(KeyError, lambda : df.loc[slice(None),[1]])
1589+
1590+
result = df.loc[(slice(None),[1]),:]
1591+
expected = df.iloc[[0,3]]
1592+
assert_frame_equal(result, expected)
15831593

15841594
# not lexsorted
15851595
self.assertEqual(df.index.lexsort_depth,2)
@@ -1960,9 +1970,12 @@ def f():
19601970
result = s.loc[['A','D']]
19611971
assert_series_equal(result,expected)
19621972

1963-
# empty series
1964-
result = s.loc[['D']]
1965-
expected = s.loc[[]]
1973+
# not any values found
1974+
self.assertRaises(KeyError, lambda : s.loc[['D']])
1975+
1976+
# empty ok
1977+
result = s.loc[[]]
1978+
expected = s.iloc[[]]
19661979
assert_series_equal(result,expected)
19671980

19681981
idx = pd.IndexSlice
@@ -2788,9 +2801,8 @@ def test_series_partial_set(self):
27882801
result = ser.loc[[3, 2, 3]]
27892802
assert_series_equal(result, expected)
27902803

2791-
expected = Series([np.nan, np.nan, np.nan], index=[3, 3, 3])
2792-
result = ser.loc[[3, 3, 3]]
2793-
assert_series_equal(result, expected)
2804+
# raises as nothing in in the index
2805+
self.assertRaises(KeyError, lambda : ser.loc[[3, 3, 3]])
27942806

27952807
expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3])
27962808
result = ser.loc[[2, 2, 3]]

0 commit comments

Comments
 (0)