diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index e3ee7d7c64c44..bca009c6b8931 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -77,9 +77,9 @@ of multi-axis indexing. See more at :ref:`Selection by Label ` - ``.iloc`` is strictly integer position based (from ``0`` to ``length-1`` of - the axis), will raise ``IndexError`` if a single index is requested and it - is out-of-bounds, otherwise it will conform the bounds to size of the object. - Allowed inputs are: + the axis), will raise ``IndexError`` if an indexer is requested and it + is out-of-bounds, except *slice* indexers which allow out-of-bounds indexing. + (this conforms with python/numpy *slice* semantics). Allowed inputs are: - An integer e.g. ``5`` - A list or array of integers ``[4, 3, 0]`` @@ -421,19 +421,28 @@ python/numpy allow slicing past the end of an array without an associated error. x[4:10] x[8:10] -- as of v0.14.0, ``iloc`` will now accept out-of-bounds indexers, e.g. a value that exceeds the length of the object being +- as of v0.14.0, ``iloc`` will now accept out-of-bounds indexers for slices, e.g. a value that exceeds the length of the object being indexed. These will be excluded. This will make pandas conform more with pandas/numpy indexing of out-of-bounds - values. A single indexer that is out-of-bounds and drops the dimensions of the object will still raise - ``IndexError`` (:issue:`6296`). This could result in an empty axis (e.g. an empty DataFrame being returned) + values. A single indexer / list of indexers that is out-of-bounds will still raise + ``IndexError`` (:issue:`6296`, :issue:`6299`). This could result in an empty axis (e.g. an empty DataFrame being returned) .. ipython:: python dfl = DataFrame(np.random.randn(5,2),columns=list('AB')) dfl - dfl.iloc[[4,5,6]] - dfl.iloc[4:6] dfl.iloc[:,2:3] dfl.iloc[:,1:3] + dfl.iloc[4:6] + +These are out-of-bounds selections + +.. code-block:: python + + dfl.iloc[[4,5,6]] + IndexError: positional indexers are out-of-bounds + + dfl.iloc[:,4] + IndexError: single positional indexer is out-of-bounds .. _indexing.basics.partial_setting: @@ -911,9 +920,9 @@ You can combine this with other expressions for very succinct queries: **expression itself** is evaluated in vanilla Python. For example, in the expression - .. code-block:: python + .. code-block:: python - df.query('a in b + c + d') + df.query('a in b + c + d') ``(b + c + d)`` is evaluated by ``numexpr`` and *then* the ``in`` operation is evaluated in plain Python. In general, any operations that can diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index 106e0b1f1ec77..7ef95e0c0f7f4 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -15,19 +15,29 @@ Highlights include: API changes ~~~~~~~~~~~ -- ``iloc`` will now accept out-of-bounds indexers, e.g. a value that exceeds the length of the object being +- ``iloc`` will now accept out-of-bounds indexers for slices, e.g. a value that exceeds the length of the object being indexed. These will be excluded. This will make pandas conform more with pandas/numpy indexing of out-of-bounds - values. A single indexer that is out-of-bounds and drops the dimensions of the object will still raise - ``IndexError`` (:issue:`6296`). This could result in an empty axis (e.g. an empty DataFrame being returned) + values. A single indexer / list of indexers that is out-of-bounds will still raise + ``IndexError`` (:issue:`6296`, :issue:`6299`). This could result in an empty axis (e.g. an empty DataFrame being returned) - .. ipython:: python +.. ipython:: python + + dfl = DataFrame(np.random.randn(5,2),columns=list('AB')) + dfl + dfl.iloc[:,2:3] + dfl.iloc[:,1:3] + dfl.iloc[4:6] + +These are out-of-bounds selections + +.. code-block:: python + + dfl.iloc[[4,5,6]] + IndexError: positional indexers are out-of-bounds + + dfl.iloc[:,4] + IndexError: single positional indexer is out-of-bounds - df = DataFrame(np.random.randn(5,2),columns=list('AB')) - df - df.iloc[[4,5,6]] - df.iloc[4:6] - df.iloc[:,2:3] - df.iloc[:,1:3] - The ``DataFrame.interpolate()`` ``downcast`` keyword default has been changed from ``infer`` to ``None``. This is to preseve the original dtype unless explicitly requested otherwise (:issue:`6290`). diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 830051ed41d44..40c6091df64ab 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1376,7 +1376,7 @@ def _getitem_axis(self, key, axis=0, validate_iterable=False): arr = np.array(key) l = len(ax) if len(arr) and (arr.max() >= l or arr.min() <= -l): - key = arr[(arr>-l) & (arr len(ax): - raise IndexError("single indexer is out-of-bounds") + raise IndexError("single positional indexer is out-of-bounds") return self._get_loc(key, axis=axis) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 3fb0d44529569..4eee1d3a212e0 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -961,6 +961,7 @@ def test_frame_groupby(self): assert_frame_equal(stragged, aggregated, check_names=False) # transform + grouped = self.tsframe.head(30).groupby(lambda x: x.weekday()) transformed = grouped.transform(lambda x: x - x.mean()) self.assertEqual(len(transformed), 30) self.assertEqual(len(transformed.columns), 4) @@ -2203,7 +2204,7 @@ def test_panel_groupby(self): grouped = self.panel.groupby(lambda x: x.month, axis='major') agged = grouped.mean() - self.assert_numpy_array_equal(agged.major_axis, [1, 2]) + self.assert_numpy_array_equal(agged.major_axis, sorted(list(set(self.panel.major_axis.month)))) grouped = self.panel.groupby({'A': 0, 'B': 0, 'C': 1, 'D': 1}, axis='minor') diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index 54cf8046b90d0..eac7430a9ee19 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -348,17 +348,24 @@ def test_iloc_exceeds_bounds(self): # iloc should allow indexers that exceed the bounds df = DataFrame(np.random.random_sample((20,5)), columns=list('ABCDE')) expected = df - result = df.iloc[:,[0,1,2,3,4,5]] - assert_frame_equal(result,expected) - result = df.iloc[[1,30]] - expected = df.iloc[[1]] - assert_frame_equal(result,expected) + # lists of positions should raise IndexErrror! + with tm.assertRaisesRegexp(IndexError, 'positional indexers are out-of-bounds'): + df.iloc[:,[0,1,2,3,4,5]] + self.assertRaises(IndexError, lambda : df.iloc[[1,30]]) + self.assertRaises(IndexError, lambda : df.iloc[[1,-30]]) + self.assertRaises(IndexError, lambda : df.iloc[[100]]) - result = df.iloc[[1,-30]] - expected = df.iloc[[1]] - assert_frame_equal(result,expected) + s = df['A'] + self.assertRaises(IndexError, lambda : s.iloc[[100]]) + self.assertRaises(IndexError, lambda : s.iloc[[-100]]) + # still raise on a single indexer + with tm.assertRaisesRegexp(IndexError, 'single positional indexer is out-of-bounds'): + df.iloc[30] + self.assertRaises(IndexError, lambda : df.iloc[-30]) + + # slices are ok result = df.iloc[:,4:10] expected = df.iloc[:,4:] assert_frame_equal(result,expected) @@ -367,34 +374,15 @@ def test_iloc_exceeds_bounds(self): expected = df.iloc[:,-4:] assert_frame_equal(result,expected) - result = df.iloc[[100]] - expected = DataFrame(columns=df.columns) - assert_frame_equal(result,expected) - - # still raise on a single indexer - def f(): - df.iloc[30] - self.assertRaises(IndexError, f) - - s = df['A'] - result = s.iloc[[100]] - expected = Series() - assert_series_equal(result,expected) - - result = s.iloc[[-100]] - expected = Series() - assert_series_equal(result,expected) - - # slice + # slice bounds exceeding is ok result = s.iloc[18:30] expected = s.iloc[18:] assert_series_equal(result,expected) # doc example df = DataFrame(np.random.randn(5,2),columns=list('AB')) - result = df.iloc[[4,5,6]] - expected = df.iloc[[4]] - assert_frame_equal(result,expected) + self.assertRaises(IndexError, lambda : df.iloc[[4,5,6]]) + self.assertRaises(IndexError, lambda : df.iloc[:,4]) result = df.iloc[4:6] expected = df.iloc[[4]] diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index b0fe3efde3260..a8dacbe40aac0 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -144,6 +144,7 @@ class DatetimeIndex(Int64Index): _engine_type = _index.DatetimeEngine + tz = None offset = None _comparables = ['name','freqstr','tz'] _allow_datetime_index_ops = True