Skip to content

Commit 8eaf19a

Browse files
committed
Merge pull request #3635 from jreback/iloc_mask
API: Raise on iloc indexing with a non-integer based boolean mask (GH3631)
2 parents 83a6e09 + d2a80a8 commit 8eaf19a

File tree

5 files changed

+90
-9
lines changed

5 files changed

+90
-9
lines changed

RELEASE.rst

+4
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ pandas 0.11.1
7070
- Add ``squeeze`` keyword to ``groupby`` to allow reduction from
7171
DataFrame -> Series if groups are unique. Regression from 0.10.1,
7272
partial revert on (GH2893_) with (GH3596_)
73+
- Raise on ``iloc`` when boolean indexing with a label based indexer mask
74+
e.g. a boolean Series, even with integer labels, will raise. Since ``iloc``
75+
is purely positional based, the labels on the Series are not alignable (GH3631_)
7376

7477
**Bug Fixes**
7578

@@ -182,6 +185,7 @@ pandas 0.11.1
182185
.. _GH3624: https://github.com/pydata/pandas/issues/3624
183186
.. _GH3626: https://github.com/pydata/pandas/issues/3626
184187
.. _GH3601: https://github.com/pydata/pandas/issues/3601
188+
.. _GH3631: https://github.com/pydata/pandas/issues/3631
185189
.. _GH1512: https://github.com/pydata/pandas/issues/1512
186190

187191

doc/source/indexing.rst

-8
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@ three types of multi-axis indexing.
6868
- An integer e.g. ``5``
6969
- A list or array of integers ``[4, 3, 0]``
7070
- A slice object with ints ``1:7``
71-
- A boolean array
7271

7372
See more at :ref:`Selection by Position <indexing.integer>`
7473

@@ -291,7 +290,6 @@ The ``.iloc`` attribute is the primary access method. The following are valid in
291290
- An integer e.g. ``5``
292291
- A list or array of integers ``[4, 3, 0]``
293292
- A slice object with ints ``1:7``
294-
- A boolean array
295293

296294
.. ipython:: python
297295
@@ -329,12 +327,6 @@ Select via integer list
329327
330328
df1.iloc[[1,3,5],[1,3]]
331329
332-
Select via boolean array
333-
334-
.. ipython:: python
335-
336-
df1.iloc[:,df1.iloc[0]>0]
337-
338330
For slicing rows explicitly (equiv to deprecated ``df.irow(slice(1,3))``).
339331

340332
.. ipython:: python

doc/source/v0.11.1.txt

+22
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,27 @@ API changes
4040
# no squeezing (the default, and behavior in 0.10.1)
4141
df2.groupby("val1").apply(func)
4242

43+
- Raise on ``iloc`` when boolean indexing with a label based indexer mask
44+
e.g. a boolean Series, even with integer labels, will raise. Since ``iloc``
45+
is purely positional based, the labels on the Series are not alignable (GH3631_)
46+
47+
This case is rarely used, and there are plently of alternatives. This preserves the
48+
``iloc`` API to be *purely* positional based.
49+
50+
.. ipython:: python
51+
52+
df = DataFrame(range(5), list('ABCDE'), columns=['a'])
53+
mask = (df.a%2 == 0)
54+
mask
55+
56+
# this is what you should use
57+
df.loc[mask]
58+
59+
# this will work as well
60+
df.iloc[mask.values]
61+
62+
``df.iloc[mask]`` will raise a ``ValueError``
63+
4364

4465
Enhancements
4566
~~~~~~~~~~~~
@@ -74,3 +95,4 @@ on GitHub for a complete list.
7495
.. _GH3435: https://github.com/pydata/pandas/issues/3435
7596
.. _GH1512: https://github.com/pydata/pandas/issues/1512
7697
.. _GH2285: https://github.com/pydata/pandas/issues/2285
98+
.. _GH3631: https://github.com/pydata/pandas/issues/3631

pandas/core/indexing.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -775,7 +775,14 @@ class _iLocIndexer(_LocationIndexer):
775775
_exception = IndexError
776776

777777
def _has_valid_type(self, key, axis):
778-
return isinstance(key, slice) or com.is_integer(key) or com._is_bool_indexer(key) or _is_list_like(key)
778+
if com._is_bool_indexer(key):
779+
if hasattr(key,'index') and isinstance(key.index,Index):
780+
if key.index.inferred_type == 'integer':
781+
raise NotImplementedError("iLocation based boolean indexing on an integer type is not available")
782+
raise ValueError("iLocation based boolean indexing cannot use an indexable as a mask")
783+
return True
784+
785+
return isinstance(key, slice) or com.is_integer(key) or _is_list_like(key)
779786

780787
def _getitem_tuple(self, tup):
781788

@@ -811,9 +818,11 @@ def _get_slice_axis(self, slice_obj, axis=0):
811818
def _getitem_axis(self, key, axis=0):
812819

813820
if isinstance(key, slice):
821+
self._has_valid_type(key,axis)
814822
return self._get_slice_axis(key, axis=axis)
815823

816824
elif com._is_bool_indexer(key):
825+
self._has_valid_type(key,axis)
817826
return self._getbool_axis(key, axis=axis)
818827

819828
# a single integer or a list of integers

pandas/tests/test_indexing.py

+54
Original file line numberDiff line numberDiff line change
@@ -888,6 +888,60 @@ def test_multi_assign(self):
888888
df2.ix[mask, cols]= dft.ix[mask, cols].values
889889
assert_frame_equal(df2,expected)
890890

891+
def test_iloc_mask(self):
892+
893+
# GH 3631, iloc with a mask (of a series) should raise
894+
df = DataFrame(range(5), list('ABCDE'), columns=['a'])
895+
mask = (df.a%2 == 0)
896+
self.assertRaises(ValueError, df.iloc.__getitem__, tuple([mask]))
897+
mask.index = range(len(mask))
898+
self.assertRaises(NotImplementedError, df.iloc.__getitem__, tuple([mask]))
899+
900+
# ndarray ok
901+
result = df.iloc[np.array([True] * len(mask),dtype=bool)]
902+
assert_frame_equal(result,df)
903+
904+
# the possibilities
905+
locs = np.arange(4)
906+
nums = 2**locs
907+
reps = map(bin, nums)
908+
df = DataFrame({'locs':locs, 'nums':nums}, reps)
909+
910+
expected = {
911+
(None,'') : '0b1100',
912+
(None,'.loc') : '0b1100',
913+
(None,'.iloc') : '0b1100',
914+
('index','') : '0b11',
915+
('index','.loc') : '0b11',
916+
('index','.iloc') : 'iLocation based boolean indexing cannot use an indexable as a mask',
917+
('locs','') : 'Unalignable boolean Series key provided',
918+
('locs','.loc') : 'Unalignable boolean Series key provided',
919+
('locs','.iloc') : 'iLocation based boolean indexing on an integer type is not available',
920+
}
921+
922+
import warnings
923+
warnings.filterwarnings(action='ignore', category=UserWarning)
924+
result = dict()
925+
for idx in [None, 'index', 'locs']:
926+
mask = (df.nums>2).values
927+
if idx:
928+
mask = Series(mask, list(reversed(getattr(df, idx))))
929+
for method in ['', '.loc', '.iloc']:
930+
try:
931+
if method:
932+
accessor = getattr(df, method[1:])
933+
else:
934+
accessor = df
935+
ans = str(bin(accessor[mask]['nums'].sum()))
936+
except Exception, e:
937+
ans = str(e)
938+
939+
key = tuple([idx,method])
940+
r = expected.get(key)
941+
if r != ans:
942+
raise AssertionError("[%s] does not match [%s], received [%s]" %
943+
(key,ans,r))
944+
warnings.filterwarnings(action='always', category=UserWarning)
891945

892946
if __name__ == '__main__':
893947
import nose

0 commit comments

Comments
 (0)