diff --git a/RELEASE.rst b/RELEASE.rst index 9147968997fc7..acb4f429e81b0 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -70,6 +70,9 @@ pandas 0.11.1 - Add ``squeeze`` keyword to ``groupby`` to allow reduction from DataFrame -> Series if groups are unique. Regression from 0.10.1, partial revert on (GH2893_) with (GH3596_) + - Raise on ``iloc`` when boolean indexing with a label based indexer mask + e.g. a boolean Series, even with integer labels, will raise. Since ``iloc`` + is purely positional based, the labels on the Series are not alignable (GH3631_) **Bug Fixes** @@ -182,6 +185,7 @@ pandas 0.11.1 .. _GH3624: https://github.com/pydata/pandas/issues/3624 .. _GH3626: https://github.com/pydata/pandas/issues/3626 .. _GH3601: https://github.com/pydata/pandas/issues/3601 +.. _GH3631: https://github.com/pydata/pandas/issues/3631 .. _GH1512: https://github.com/pydata/pandas/issues/1512 diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 55b7e653c3630..43b512a934558 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -68,7 +68,6 @@ three types of multi-axis indexing. - An integer e.g. ``5`` - A list or array of integers ``[4, 3, 0]`` - A slice object with ints ``1:7`` - - A boolean array See more at :ref:`Selection by Position ` @@ -291,7 +290,6 @@ The ``.iloc`` attribute is the primary access method. The following are valid in - An integer e.g. ``5`` - A list or array of integers ``[4, 3, 0]`` - A slice object with ints ``1:7`` -- A boolean array .. ipython:: python @@ -329,12 +327,6 @@ Select via integer list df1.iloc[[1,3,5],[1,3]] -Select via boolean array - -.. ipython:: python - - df1.iloc[:,df1.iloc[0]>0] - For slicing rows explicitly (equiv to deprecated ``df.irow(slice(1,3))``). .. ipython:: python diff --git a/doc/source/v0.11.1.txt b/doc/source/v0.11.1.txt index d5256bcf26d25..aed95188db26e 100644 --- a/doc/source/v0.11.1.txt +++ b/doc/source/v0.11.1.txt @@ -40,6 +40,27 @@ API changes # no squeezing (the default, and behavior in 0.10.1) df2.groupby("val1").apply(func) + - Raise on ``iloc`` when boolean indexing with a label based indexer mask + e.g. a boolean Series, even with integer labels, will raise. Since ``iloc`` + is purely positional based, the labels on the Series are not alignable (GH3631_) + + This case is rarely used, and there are plently of alternatives. This preserves the + ``iloc`` API to be *purely* positional based. + + .. ipython:: python + + df = DataFrame(range(5), list('ABCDE'), columns=['a']) + mask = (df.a%2 == 0) + mask + + # this is what you should use + df.loc[mask] + + # this will work as well + df.iloc[mask.values] + + ``df.iloc[mask]`` will raise a ``ValueError`` + Enhancements ~~~~~~~~~~~~ @@ -74,3 +95,4 @@ on GitHub for a complete list. .. _GH3435: https://github.com/pydata/pandas/issues/3435 .. _GH1512: https://github.com/pydata/pandas/issues/1512 .. _GH2285: https://github.com/pydata/pandas/issues/2285 +.. _GH3631: https://github.com/pydata/pandas/issues/3631 diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 1cbc5abdc3ea3..02f1cf4539ac4 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -775,7 +775,14 @@ class _iLocIndexer(_LocationIndexer): _exception = IndexError def _has_valid_type(self, key, axis): - return isinstance(key, slice) or com.is_integer(key) or com._is_bool_indexer(key) or _is_list_like(key) + if com._is_bool_indexer(key): + if hasattr(key,'index') and isinstance(key.index,Index): + if key.index.inferred_type == 'integer': + raise NotImplementedError("iLocation based boolean indexing on an integer type is not available") + raise ValueError("iLocation based boolean indexing cannot use an indexable as a mask") + return True + + return isinstance(key, slice) or com.is_integer(key) or _is_list_like(key) def _getitem_tuple(self, tup): @@ -811,9 +818,11 @@ def _get_slice_axis(self, slice_obj, axis=0): def _getitem_axis(self, key, axis=0): if isinstance(key, slice): + self._has_valid_type(key,axis) return self._get_slice_axis(key, axis=axis) elif com._is_bool_indexer(key): + self._has_valid_type(key,axis) return self._getbool_axis(key, axis=axis) # a single integer or a list of integers diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index f6d106f422911..d90aa369aa46e 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -888,6 +888,60 @@ def test_multi_assign(self): df2.ix[mask, cols]= dft.ix[mask, cols].values assert_frame_equal(df2,expected) + def test_iloc_mask(self): + + # GH 3631, iloc with a mask (of a series) should raise + df = DataFrame(range(5), list('ABCDE'), columns=['a']) + mask = (df.a%2 == 0) + self.assertRaises(ValueError, df.iloc.__getitem__, tuple([mask])) + mask.index = range(len(mask)) + self.assertRaises(NotImplementedError, df.iloc.__getitem__, tuple([mask])) + + # ndarray ok + result = df.iloc[np.array([True] * len(mask),dtype=bool)] + assert_frame_equal(result,df) + + # the possibilities + locs = np.arange(4) + nums = 2**locs + reps = map(bin, nums) + df = DataFrame({'locs':locs, 'nums':nums}, reps) + + expected = { + (None,'') : '0b1100', + (None,'.loc') : '0b1100', + (None,'.iloc') : '0b1100', + ('index','') : '0b11', + ('index','.loc') : '0b11', + ('index','.iloc') : 'iLocation based boolean indexing cannot use an indexable as a mask', + ('locs','') : 'Unalignable boolean Series key provided', + ('locs','.loc') : 'Unalignable boolean Series key provided', + ('locs','.iloc') : 'iLocation based boolean indexing on an integer type is not available', + } + + import warnings + warnings.filterwarnings(action='ignore', category=UserWarning) + result = dict() + for idx in [None, 'index', 'locs']: + mask = (df.nums>2).values + if idx: + mask = Series(mask, list(reversed(getattr(df, idx)))) + for method in ['', '.loc', '.iloc']: + try: + if method: + accessor = getattr(df, method[1:]) + else: + accessor = df + ans = str(bin(accessor[mask]['nums'].sum())) + except Exception, e: + ans = str(e) + + key = tuple([idx,method]) + r = expected.get(key) + if r != ans: + raise AssertionError("[%s] does not match [%s], received [%s]" % + (key,ans,r)) + warnings.filterwarnings(action='always', category=UserWarning) if __name__ == '__main__': import nose