From 463991b212e9ecf5bd98f67972f888011edfa777 Mon Sep 17 00:00:00 2001 From: immerrr Date: Mon, 3 Mar 2014 20:29:57 +0400 Subject: [PATCH] BUG/TST: fix several issues with slice bound checking code BUG/TST: fix handling of slice.stop < -len, obj.iloc[:-len(obj)] should be empty BUG/TST: fix exceptions raised by Series.iloc when slice.start > len CLN: remove unused _check_slice_bound function and raise_on_error params --- doc/source/release.rst | 3 +++ pandas/core/frame.py | 5 ---- pandas/core/generic.py | 10 ++++++++ pandas/core/indexing.py | 43 +++-------------------------------- pandas/core/internals.py | 12 +++------- pandas/core/panel.py | 6 ----- pandas/core/series.py | 6 ++--- pandas/sparse/frame.py | 8 ++----- pandas/sparse/panel.py | 2 +- pandas/tests/test_indexing.py | 38 ++++++++++++++++++++++++++++--- 10 files changed, 59 insertions(+), 74 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index f4f376026225e..1819272c59243 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -108,6 +108,9 @@ API Changes - Slicing and advanced/boolean indexing operations on ``Index`` classes will no longer change type of the resulting index (:issue:`6440`). - ``set_index`` no longer converts MultiIndexes to an Index of tuples (:issue:`6459`). +- Slicing with negative start, stop & step values handles corner cases better (:issue:`6531`): + - ``df.iloc[:-len(df)]`` is now empty + - ``df.iloc[len(df)::-1]`` now enumerates all elements in reverse Experimental Features ~~~~~~~~~~~~~~~~~~~~~ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 05f7785a401f8..4c02c8abab353 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1867,11 +1867,6 @@ def eval(self, expr, **kwargs): kwargs['resolvers'] = kwargs.get('resolvers', ()) + resolvers return _eval(expr, **kwargs) - def _slice(self, slobj, axis=0, raise_on_error=False, typ=None): - axis = self._get_block_manager_axis(axis) - new_data = self._data.get_slice( - slobj, axis=axis, raise_on_error=raise_on_error) - return self._constructor(new_data) def _box_item_values(self, key, values): items = self.columns[self.columns.get_loc(key)] diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8ca397eda17e9..120e03e9962d8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1079,6 +1079,16 @@ def _clear_item_cache(self, i=None): else: self._item_cache.clear() + def _slice(self, slobj, axis=0, typ=None): + """ + Construct a slice of this container. + + typ parameter is maintained for compatibility with Series slicing. + + """ + axis = self._get_block_manager_axis(axis) + return self._constructor(self._data.get_slice(slobj, axis=axis)) + def _set_item(self, key, value): self._data.set(key, value) self._clear_item_cache() diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index c7970309a6558..e3cbddebb6643 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -91,32 +91,8 @@ def _get_label(self, label, axis=0): def _get_loc(self, key, axis=0): return self.obj._ixs(key, axis=axis) - def _slice(self, obj, axis=0, raise_on_error=False, typ=None): - - # make out-of-bounds into bounds of the object - if typ == 'iloc': - ax = self.obj._get_axis(axis) - l = len(ax) - start = obj.start - stop = obj.stop - step = obj.step - if start is not None: - # degenerate to return nothing - if start >= l: - return self._getitem_axis(tuple(),axis=axis) - - # equiv to a null slice - elif start <= -l: - start = None - if stop is not None: - if stop > l: - stop = None - elif stop <= -l: - stop = None - obj = slice(start,stop,step) - - return self.obj._slice(obj, axis=axis, raise_on_error=raise_on_error, - typ=typ) + def _slice(self, obj, axis=0, typ=None): + return self.obj._slice(obj, axis=axis, typ=typ) def __setitem__(self, key, value): @@ -1343,8 +1319,7 @@ def _get_slice_axis(self, slice_obj, axis=0): return obj if isinstance(slice_obj, slice): - return self._slice(slice_obj, axis=axis, raise_on_error=True, - typ='iloc') + return self._slice(slice_obj, axis=axis, typ='iloc') else: return self.obj.take(slice_obj, axis=axis, convert=False) @@ -1647,18 +1622,6 @@ def _need_slice(obj): (obj.step is not None and obj.step != 1)) -def _check_slice_bounds(slobj, values): - l = len(values) - start = slobj.start - if start is not None: - if start < -l or start > l - 1: - raise IndexError("out-of-bounds on slice (start)") - stop = slobj.stop - if stop is not None: - if stop < -l - 1 or stop > l: - raise IndexError("out-of-bounds on slice (end)") - - def _maybe_droplevels(index, key): # drop levels original_index = index diff --git a/pandas/core/internals.py b/pandas/core/internals.py index e3deed52f4b3f..39eb03eebdb8c 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -14,8 +14,7 @@ _values_from_object, _is_null_datelike_scalar) from pandas.core.index import (Index, MultiIndex, _ensure_index, _handle_legacy_indexes) -from pandas.core.indexing import (_check_slice_bounds, _maybe_convert_indices, - _length_of_indexer) +from pandas.core.indexing import (_maybe_convert_indices, _length_of_indexer) import pandas.core.common as com from pandas.sparse.array import _maybe_to_sparse, SparseArray import pandas.lib as lib @@ -2669,12 +2668,9 @@ def combine(self, blocks): new_axes[0] = new_items return self.__class__(new_blocks, new_axes, do_integrity_check=False) - def get_slice(self, slobj, axis=0, raise_on_error=False): + def get_slice(self, slobj, axis=0): new_axes = list(self.axes) - if raise_on_error: - _check_slice_bounds(slobj, new_axes[axis]) - new_axes[axis] = new_axes[axis][slobj] if axis == 0: @@ -3739,9 +3735,7 @@ def _delete_from_block(self, i, item): ) self._values = self._block.values - def get_slice(self, slobj, raise_on_error=False): - if raise_on_error: - _check_slice_bounds(slobj, self.index) + def get_slice(self, slobj): return self.__class__(self._block._slice(slobj), self.index[slobj], fastpath=True) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index eba526f574375..2bf50bb1bf142 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -539,12 +539,6 @@ def _box_item_values(self, key, values): d = self._construct_axes_dict_for_slice(self._AXIS_ORDERS[1:]) return self._constructor_sliced(values, **d) - def _slice(self, slobj, axis=0, raise_on_error=False, typ=None): - new_data = self._data.get_slice(slobj, - axis=axis, - raise_on_error=raise_on_error) - return self._constructor(new_data) - def __setitem__(self, key, value): shape = tuple(self.shape) if isinstance(value, self._constructor_sliced): diff --git a/pandas/core/series.py b/pandas/core/series.py index 9e6c0bd9305ab..4fc7ced6e8900 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -28,7 +28,7 @@ from pandas.core.index import (Index, MultiIndex, InvalidIndexError, _ensure_index, _handle_legacy_indexes) from pandas.core.indexing import ( - _check_bool_indexer, _check_slice_bounds, + _check_bool_indexer, _is_index_slice, _maybe_convert_indices) from pandas.core import generic, base from pandas.core.internals import SingleBlockManager @@ -469,9 +469,7 @@ def _ixs(self, i, axis=0): def _is_mixed_type(self): return False - def _slice(self, slobj, axis=0, raise_on_error=False, typ=None): - if raise_on_error: - _check_slice_bounds(slobj, self.values) + def _slice(self, slobj, axis=0, typ=None): slobj = self.index._convert_slice_indexer(slobj, typ=typ or 'getitem') return self._get_values(slobj) diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index 6e76155619c09..a69c07494af8a 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -13,7 +13,7 @@ from pandas.core.common import (isnull, notnull, _pickle_array, _unpickle_array, _try_sort) from pandas.core.index import Index, MultiIndex, _ensure_index -from pandas.core.indexing import _check_slice_bounds, _maybe_convert_indices +from pandas.core.indexing import _maybe_convert_indices from pandas.core.series import Series from pandas.core.frame import (DataFrame, extract_index, _prep_ndarray, _default_index) @@ -379,15 +379,11 @@ def set_value(self, index, col, value, takeable=False): return dense.to_sparse(kind=self._default_kind, fill_value=self._default_fill_value) - def _slice(self, slobj, axis=0, raise_on_error=False, typ=None): + def _slice(self, slobj, axis=0, typ=None): if axis == 0: - if raise_on_error: - _check_slice_bounds(slobj, self.index) new_index = self.index[slobj] new_columns = self.columns else: - if raise_on_error: - _check_slice_bounds(slobj, self.columns) new_index = self.index new_columns = self.columns[slobj] diff --git a/pandas/sparse/panel.py b/pandas/sparse/panel.py index 86dcf97c8bd3d..20bbc58cc908f 100644 --- a/pandas/sparse/panel.py +++ b/pandas/sparse/panel.py @@ -187,7 +187,7 @@ def _ixs(self, i, axis=0): return self.xs(key, axis=axis) - def _slice(self, slobj, axis=0, raise_on_error=False, typ=None): + def _slice(self, slobj, axis=0, typ=None): """ for compat as we don't support Block Manager here """ diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index 1d033782a0175..325d770fb62c9 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -393,12 +393,36 @@ def test_iloc_exceeds_bounds(self): self.assertRaises(IndexError, lambda : df.iloc[-30]) # slices are ok - result = df.iloc[:,4:10] + result = df.iloc[:,4:10] # 0 < start < len < stop expected = df.iloc[:,4:] assert_frame_equal(result,expected) - result = df.iloc[:,-4:-10] - expected = df.iloc[:,-4:] + result = df.iloc[:,-4:-10] # stop < 0 < start < len + expected = df.iloc[:,:0] + assert_frame_equal(result,expected) + + result = df.iloc[:,10:4:-1] # 0 < stop < len < start (down) + expected = df.iloc[:,:4:-1] + assert_frame_equal(result,expected) + + result = df.iloc[:,4:-10:-1] # stop < 0 < start < len (down) + expected = df.iloc[:,4::-1] + assert_frame_equal(result,expected) + + result = df.iloc[:,-10:4] # start < 0 < stop < len + expected = df.iloc[:,:4] + assert_frame_equal(result,expected) + + result = df.iloc[:,10:4] # 0 < stop < len < start + expected = df.iloc[:,:0] + assert_frame_equal(result,expected) + + result = df.iloc[:,-10:-11:-1] # stop < start < 0 < len (down) + expected = df.iloc[:,:0] + assert_frame_equal(result,expected) + + result = df.iloc[:,10:11] # 0 < len < start < stop + expected = df.iloc[:,:0] assert_frame_equal(result,expected) # slice bounds exceeding is ok @@ -406,6 +430,14 @@ def test_iloc_exceeds_bounds(self): expected = s.iloc[18:] assert_series_equal(result,expected) + result = s.iloc[30:] + expected = s.iloc[:0] + assert_series_equal(result,expected) + + result = s.iloc[30::-1] + expected = s.iloc[::-1] + assert_series_equal(result,expected) + # doc example def check(result,expected): str(result)