From d6774a7cc711c229f260513a77cb9388cc6c158f Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 28 Feb 2015 10:59:45 -0500 Subject: [PATCH 1/5] API: consistency with .ix and .loc for getitem operations (GH8613) raise TypeError rather than KeyError on invalid scalar/slice indexing with that index type --- doc/source/indexing.rst | 33 ++++++- doc/source/whatsnew/v0.16.0.txt | 60 ++++++++++++ pandas/core/index.py | 63 +++++++++++-- pandas/core/indexing.py | 20 +--- pandas/core/series.py | 2 +- pandas/tests/test_index.py | 22 ++++- pandas/tests/test_indexing.py | 160 +++++++++++++++++++++++--------- pandas/tseries/base.py | 8 ++ pandas/tseries/index.py | 18 ++-- pandas/tseries/period.py | 11 ++- pandas/tseries/tdi.py | 16 ++-- 11 files changed, 316 insertions(+), 97 deletions(-) diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 5ab72f633f49b..846285ef42969 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -85,7 +85,7 @@ of multi-axis indexing. - ``.iloc`` is primarily integer position based (from ``0`` to ``length-1`` of the axis), but may also be used with a boolean - array. ``.iloc`` will raise ``IndexError`` if a requested + array. ``.iloc`` will raise ``IndexError`` if a requested indexer is out-of-bounds, except *slice* indexers which allow out-of-bounds indexing. (this conforms with python/numpy *slice* semantics). Allowed inputs are: @@ -292,6 +292,35 @@ Selection By Label This is sometimes called ``chained assignment`` and should be avoided. See :ref:`Returning a View versus Copy ` +.. warning:: + + ``.loc`` is strict when you present slicers that are not compatible (or convertible) with the index type. For example + using integers in a ``DatetimeIndex`` or float indexers in an ``Int64Index``. These will raise a ``TypeError``. + + .. ipython:: python + + dfl = DataFrame(np.random.randn(5,4), columns=list('ABCD'), index=date_range('20130101',periods=5)) + dfl + sl = Series(range(5),[-2,-1,1,2,3]) + sl + + .. code-block:: python + + In [4]: dfl.loc[2:3] + TypeError: cannot do slice indexing on with these indexers [2] of + + .. code-block:: python + + In [8]: sl.loc[-1.0:2] + TypeError: cannot do slice indexing on with these indexers [-1.0] of + + + String likes in slicing *can* be convertible to the type of the index and lead to natural slicing. + + .. ipython:: python + + dfl.loc['20130102':'20130104'] + pandas provides a suite of methods in order to have **purely label based indexing**. This is a strict inclusion based protocol. **at least 1** of the labels for which you ask, must be in the index or a ``KeyError`` will be raised! When slicing, the start bound is *included*, **AND** the stop bound is *included*. Integers are valid labels, but they refer to the label **and not the position**. @@ -1486,5 +1515,3 @@ This will **not** work at all, and so should be avoided The chained assignment warnings / exceptions are aiming to inform the user of a possibly invalid assignment. There may be false positives; situations where a chained assignment is inadvertantly reported. - - diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt index 1550527706a9e..1fcb60c866d07 100644 --- a/doc/source/whatsnew/v0.16.0.txt +++ b/doc/source/whatsnew/v0.16.0.txt @@ -211,6 +211,66 @@ Backwards incompatible API changes p // 0 +Indexing Changes +~~~~~~~~~~~~~~~~ + +.. _whatsnew_0160.api_breaking.indexing: + +The behavior of a small sub-set of edge cases for using ``.loc`` have changed (:issue:`8613`). Furthermore we have improved the content of the error messages that are raised: + +- slicing with ``.loc`` where the start and/or stop bound is not found in the index is now allowed; this previously would raise a ``KeyError``. This makes the behavior the same as ``.ix`` in this case. This change is only for slicing, not when indexing with a single label. + + .. ipython:: python + + df = DataFrame(np.random.randn(5,4), columns=list('ABCD'), index=date_range('20130101',periods=5)) + df + s = Series(range(5),[-2,-1,1,2,3]) + s + + Previous Behavior + + .. code-block:: python + + In [4]: df.loc['2013-01-02':'2013-01-10'] + KeyError: 'stop bound [2013-01-10] is not in the [index]' + + In [6]: s.loc[-10:3] + KeyError: 'start bound [-10] is not the [index]' + + In [8]: s.loc[-1.0:2] + Out[2]: + -1 1 + 1 2 + 2 3 + dtype: int64 + + New Behavior + + .. ipython:: python + + df.loc['2013-01-02':'2013-01-10'] + s.loc[-10:3] + + .. code-block:: python + + In [8]: s.loc[-1.0:2] + TypeError: cannot do slice indexing on with these indexers [-1.0] of + +- provide a useful exception for indexing with an invalid type for that index when using ``.loc``. For example trying to use ``.loc`` on an index of type ``DatetimeIndex`` or ``PeriodIndex`` or ``TimedeltaIndex``, with an integer (or a float). + + Previous Behavior + + .. code-block:: python + + In [4]: df.loc[2:3] + KeyError: 'start bound [2] is not the [index]' + + New Behavior + + .. code-block:: python + + In [4]: df.loc[2:3] + TypeError: Cannot do slice indexing on with keys Deprecations ~~~~~~~~~~~~ diff --git a/pandas/core/index.py b/pandas/core/index.py index 0cad537855857..94fe13124dbd6 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -640,7 +640,7 @@ def _convert_scalar_indexer(self, key, typ=None): def to_int(): ikey = int(key) if ikey != key: - return self._convert_indexer_error(key, 'label') + return self._invalid_indexer('label', key) return ikey if typ == 'iloc': @@ -651,7 +651,7 @@ def to_int(): warnings.warn("scalar indexers for index type {0} should be integers and not floating point".format( type(self).__name__),FutureWarning) return key - return self._convert_indexer_error(key, 'label') + return self._invalid_indexer('label', key) if is_float(key): if not self.is_floating(): @@ -667,7 +667,7 @@ def _validate_slicer(self, key, f): for c in ['start','stop','step']: if not f(getattr(key,c)): - self._convert_indexer_error(key.start, 'slice {0} value'.format(c)) + self._invalid_indexer('slice {0} value'.format(c), key.start) def _convert_slice_indexer_getitem(self, key, is_index_slice=False): """ called from the getitem slicers, determine how to treat the key @@ -698,7 +698,7 @@ def f(c): "and not floating point",FutureWarning) return int(v) - self._convert_indexer_error(v, 'slice {0} value'.format(c)) + self._invalid_indexer('slice {0} value'.format(c), v) return slice(*[ f(c) for c in ['start','stop','step']]) @@ -787,11 +787,13 @@ def _convert_list_indexer_for_mixed(self, keyarr, typ=None): return None - def _convert_indexer_error(self, key, msg=None): - if msg is None: - msg = 'label' - raise TypeError("the {0} [{1}] is not a proper indexer for this index " - "type ({2})".format(msg, key, self.__class__.__name__)) + def _invalid_indexer(self, form, key): + """ consistent invalid indexer message """ + raise TypeError("cannot do {form} indexing on {klass} with these " + "indexers [{key}] of {typ}".format(form=form, + klass=type(self), + key=key, + typ=type(key))) def get_duplicates(self): from collections import defaultdict @@ -2119,11 +2121,27 @@ def _maybe_cast_slice_bound(self, label, side): label : object side : {'left', 'right'} + Returns + ------- + label : object + Notes ----- Value of `side` parameter should be validated in caller. """ + + # pass thru float indexers if we have a numeric type index + # which then can decide to process / or convert and warng + if is_float(label): + if not self.is_floating(): + self._invalid_indexer('slice',label) + + # we are not an integer based index, and we have an integer label + # treat as positional based slicing semantics + if not self.is_integer() and is_integer(label): + self._invalid_indexer('slice',label) + return label def _searchsorted_monotonic(self, label, side='left'): @@ -2158,10 +2176,12 @@ def get_slice_bound(self, label, side): " must be either 'left' or 'right': %s" % (side,)) original_label = label + # For datetime indices label may be a string that has to be converted # to datetime boundary according to its resolution. label = self._maybe_cast_slice_bound(label, side) + # we need to look up the label try: slc = self.get_loc(label) except KeyError as err: @@ -2654,6 +2674,31 @@ def astype(self, dtype): self.__class__) return Index(self.values, name=self.name, dtype=dtype) + def _maybe_cast_slice_bound(self, label, side): + """ + This function should be overloaded in subclasses that allow non-trivial + casting on label-slice bounds, e.g. datetime-like indices allowing + strings containing formatted datetimes. + + Parameters + ---------- + label : object + side : {'left', 'right'} + + Returns + ------- + label : object + + Notes + ----- + Value of `side` parameter should be validated in caller. + + """ + if not (is_integer(label) or is_float(label)): + self._invalid_indexer('slice',label) + + return label + def _convert_scalar_indexer(self, key, typ=None): if typ == 'iloc': return super(Float64Index, self)._convert_scalar_indexer(key, diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 1c951f58a17d8..2e953fed2b14f 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1243,25 +1243,7 @@ def _has_valid_type(self, key, axis): # boolean if isinstance(key, slice): - - if ax.is_floating(): - - # allowing keys to be slicers with no fallback - pass - - else: - if key.start is not None: - if key.start not in ax: - raise KeyError( - "start bound [%s] is not the [%s]" % - (key.start, self.obj._get_axis_name(axis)) - ) - if key.stop is not None: - if key.stop not in ax: - raise KeyError( - "stop bound [%s] is not in the [%s]" % - (key.stop, self.obj._get_axis_name(axis)) - ) + return True elif is_bool_indexer(key): return True diff --git a/pandas/core/series.py b/pandas/core/series.py index 901faef484377..77a5d61ad6fa0 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -536,7 +536,7 @@ def __getitem__(self, key): else: # we can try to coerce the indexer (or this will raise) - new_key = self.index._convert_scalar_indexer(key) + new_key = self.index._convert_scalar_indexer(key,typ='getitem') if type(new_key) != type(key): return self.__getitem__(new_key) raise diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 75c28681ecde5..53732eb8bdecf 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -950,16 +950,30 @@ def test_slice_locs(self): self.assertEqual(idx.slice_locs(start=3), (3, n)) self.assertEqual(idx.slice_locs(3, 8), (3, 6)) self.assertEqual(idx.slice_locs(5, 10), (3, n)) - self.assertEqual(idx.slice_locs(5.0, 10.0), (3, n)) - self.assertEqual(idx.slice_locs(4.5, 10.5), (3, 8)) self.assertEqual(idx.slice_locs(end=8), (0, 6)) self.assertEqual(idx.slice_locs(end=9), (0, 7)) + # reversed idx2 = idx[::-1] self.assertEqual(idx2.slice_locs(8, 2), (2, 6)) - self.assertEqual(idx2.slice_locs(8.5, 1.5), (2, 6)) self.assertEqual(idx2.slice_locs(7, 3), (2, 5)) - self.assertEqual(idx2.slice_locs(10.5, -1), (0, n)) + + # float slicing + idx = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=float)) + n = len(idx) + self.assertEqual(idx.slice_locs(5.0, 10.0), (3, n)) + self.assertEqual(idx.slice_locs(4.5, 10.5), (3, 8)) + idx2 = idx[::-1] + self.assertEqual(idx2.slice_locs(8.5, 1.5), (2, 6)) + self.assertEqual(idx2.slice_locs(10.5, -1), (0, n)) + + # int slicing with floats + idx = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=int)) + self.assertRaises(TypeError, lambda : idx.slice_locs(5.0, 10.0)) + self.assertRaises(TypeError, lambda : idx.slice_locs(4.5, 10.5)) + idx2 = idx[::-1] + self.assertRaises(TypeError, lambda : idx2.slice_locs(8.5, 1.5)) + self.assertRaises(TypeError, lambda : idx2.slice_locs(10.5, -1)) def test_slice_locs_dup(self): idx = Index(['a', 'a', 'b', 'c', 'd', 'd']) diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index 68c504b2a35c3..8102ca5e7e146 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -20,7 +20,7 @@ from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_frame_equal, assert_panel_equal, assert_attr_equal) -from pandas import concat +from pandas import concat, lib from pandas.io.common import PerformanceWarning import pandas.util.testing as tm @@ -211,8 +211,6 @@ def _print(result, error = None): except AssertionError: raise - except TypeError: - raise AssertionError(_print('type error')) except Exception as detail: # if we are in fails, the ok, otherwise raise it @@ -608,7 +606,7 @@ def test_iloc_setitem(self): expected = Series([0,1,0],index=[4,5,6]) assert_series_equal(s, expected) - def test_ix_loc_setitem(self): + def test_ix_loc_setitem_consistency(self): # GH 5771 # loc with slice and series @@ -656,6 +654,84 @@ def test_ix_loc_setitem(self): df2.ix[:,2] = pd.to_datetime(df['timestamp'], unit='s') assert_frame_equal(df2,expected) + def test_ix_loc_consistency(self): + + # GH 8613 + # some edge cases where ix/loc should return the same + # this is not an exhaustive case + + def compare(result, expected): + if lib.isscalar(expected): + self.assertEqual(result, expected) + else: + self.assertTrue(expected.equals(result)) + + # failure cases for .loc, but these work for .ix + df = pd.DataFrame(np.random.randn(5,4), columns=list('ABCD')) + for key in [ slice(1,3), tuple([slice(0,2),slice(0,2)]), tuple([slice(0,2),df.columns[0:2]]) ]: + + for index in [ tm.makeStringIndex, tm.makeUnicodeIndex, + tm.makeDateIndex, tm.makePeriodIndex, tm.makeTimedeltaIndex ]: + df.index = index(len(df.index)) + df.ix[key] + + self.assertRaises(TypeError, lambda : df.loc[key]) + + df = pd.DataFrame(np.random.randn(5,4), columns=list('ABCD'), index=pd.date_range('2012-01-01', periods=5)) + + for key in [ '2012-01-03', + '2012-01-31', + slice('2012-01-03','2012-01-03'), + slice('2012-01-03','2012-01-04'), + slice('2012-01-03','2012-01-06',2), + slice('2012-01-03','2012-01-31'), + tuple([[True,True,True,False,True]]), + ]: + + # getitem + + # if the expected raises, then compare the exceptions + try: + expected = df.ix[key] + except KeyError: + self.assertRaises(KeyError, lambda : df.loc[key]) + continue + + result = df.loc[key] + compare(result, expected) + + # setitem + df1 = df.copy() + df2 = df.copy() + + df1.ix[key] = 10 + df2.loc[key] = 10 + compare(df2, df1) + + # edge cases + s = Series(['a','b','c','d'], [1,2,3,4]) + self.assertRaises(TypeError, lambda : s.ix[1.5:5.5]) + self.assertRaises(TypeError, lambda : s.loc[1.5:5.5]) + + s = Series([1,2,3,4], index=list('abde')) + + result1 = s['a':'c'] + result2 = s.ix['a':'c'] + result3 = s.loc['a':'c'] + assert_series_equal(result1,result2) + assert_series_equal(result1,result3) + + # now work rather than raising KeyError + s = Series(range(5),[-2,-1,1,2,3]) + + result1 = s.ix[-10:3] + result2 = s.loc[-10:3] + assert_series_equal(result1,result2) + + result1 = s.ix[0:3] + result2 = s.loc[0:3] + assert_series_equal(result1,result2) + def test_loc_setitem_multiindex(self): # GH7190 @@ -776,7 +852,11 @@ def test_loc_getitem_label(self): def test_loc_getitem_label_out_of_range(self): # out of range label - self.check_result('label range', 'loc', 'f', 'ix', 'f', typs = ['ints','labels','mixed','ts','floats'], fails=KeyError) + self.check_result('label range', 'loc', 'f', 'ix', 'f', typs = ['ints','labels','mixed','ts'], fails=KeyError) + self.check_result('label range', 'loc', 'f', 'ix', 'f', typs = ['floats'], fails=TypeError) + self.check_result('label range', 'loc', 20, 'ix', 20, typs = ['ints','labels','mixed'], fails=KeyError) + self.check_result('label range', 'loc', 20, 'ix', 20, typs = ['ts'], axes=0, fails=TypeError) + self.check_result('label range', 'loc', 20, 'ix', 20, typs = ['floats'], axes=0, fails=TypeError) def test_loc_getitem_label_list(self): @@ -814,9 +894,6 @@ def test_loc_getitem_bool(self): def test_loc_getitem_int_slice(self): - # int slices in int - self.check_result('int slice1', 'loc', slice(2,4), 'ix', { 0 : [2,4], 1: [3,6], 2: [4,8] }, typs = ['ints'], fails=KeyError) - # ok self.check_result('int slice2', 'loc', slice(2,4), 'ix', [2,4], typs = ['ints'], axes = 0) self.check_result('int slice2', 'loc', slice(3,6), 'ix', [3,6], typs = ['ints'], axes = 1) @@ -920,7 +997,7 @@ def f(): def test_loc_getitem_label_slice(self): # label slices (with ints) - self.check_result('lab slice', 'loc', slice(1,3), 'ix', slice(1,3), typs = ['labels','mixed','ts','floats','empty'], fails=KeyError) + self.check_result('lab slice', 'loc', slice(1,3), 'ix', slice(1,3), typs = ['labels','mixed','empty','ts','floats'], fails=TypeError) # real label slices self.check_result('lab slice', 'loc', slice('a','c'), 'ix', slice('a','c'), typs = ['labels'], axes=0) @@ -931,20 +1008,15 @@ def test_loc_getitem_label_slice(self): self.check_result('ts slice', 'loc', slice('20130102','20130104'), 'ix', slice('20130102','20130104'), typs = ['ts'], axes=1, fails=KeyError) self.check_result('ts slice', 'loc', slice('20130102','20130104'), 'ix', slice('20130102','20130104'), typs = ['ts'], axes=2, fails=KeyError) - self.check_result('mixed slice', 'loc', slice(2,8), 'ix', slice(2,8), typs = ['mixed'], axes=0, fails=KeyError) + self.check_result('mixed slice', 'loc', slice(2,8), 'ix', slice(2,8), typs = ['mixed'], axes=0, fails=TypeError) self.check_result('mixed slice', 'loc', slice(2,8), 'ix', slice(2,8), typs = ['mixed'], axes=1, fails=KeyError) self.check_result('mixed slice', 'loc', slice(2,8), 'ix', slice(2,8), typs = ['mixed'], axes=2, fails=KeyError) - self.check_result('mixed slice', 'loc', slice(2,4,2), 'ix', slice(2,4,2), typs = ['mixed'], axes=0) + self.check_result('mixed slice', 'loc', slice(2,4,2), 'ix', slice(2,4,2), typs = ['mixed'], axes=0, fails=TypeError) def test_loc_general(self): - # GH 2922 (these are fails) - df = DataFrame(np.random.rand(4,4),columns=['A','B','C','D']) - self.assertRaises(KeyError, df.loc.__getitem__, tuple([slice(0,2),slice(0,2)])) - df = DataFrame(np.random.rand(4,4),columns=['A','B','C','D'], index=['A','B','C','D']) - self.assertRaises(KeyError, df.loc.__getitem__, tuple([slice(0,2),df.columns[0:2]])) # want this to work result = df.loc[:,"A":"B"].iloc[0:2,:] @@ -3239,10 +3311,10 @@ def test_partial_set_invalid(self): # don't allow not string inserts def f(): df.loc[100.0, :] = df.ix[0] - self.assertRaises(ValueError, f) + self.assertRaises(TypeError, f) def f(): df.loc[100,:] = df.ix[0] - self.assertRaises(ValueError, f) + self.assertRaises(TypeError, f) def f(): df.ix[100.0, :] = df.ix[0] @@ -3887,8 +3959,8 @@ def check_invalid(index, loc=None, iloc=None, ix=None, getitem=None): check_invalid(index()) check_invalid(Index(np.arange(5) * 2.5),loc=KeyError, ix=KeyError, getitem=KeyError) - def check_getitem(index): - + def check_index(index, error): + index = index() s = Series(np.arange(len(index)),index=index) # positional selection @@ -3898,22 +3970,26 @@ def check_getitem(index): result4 = s.iloc[5.0] # by value - self.assertRaises(KeyError, lambda : s.loc[5]) - self.assertRaises(KeyError, lambda : s.loc[5.0]) + self.assertRaises(error, lambda : s.loc[5]) + self.assertRaises(error, lambda : s.loc[5.0]) # this is fallback, so it works result5 = s.ix[5] result6 = s.ix[5.0] + self.assertEqual(result1, result2) self.assertEqual(result1, result3) self.assertEqual(result1, result4) self.assertEqual(result1, result5) self.assertEqual(result1, result6) - # all index types except float/int - for index in [ tm.makeStringIndex, tm.makeUnicodeIndex, - tm.makeDateIndex, tm.makePeriodIndex ]: - check_getitem(index()) + # string-like + for index in [ tm.makeStringIndex, tm.makeUnicodeIndex ]: + check_index(index, KeyError) + + # datetimelike + for index in [ tm.makeDateIndex, tm.makeTimedeltaIndex, tm.makePeriodIndex ]: + check_index(index, TypeError) # exact indexing when found on IntIndex s = Series(np.arange(10),dtype='int64') @@ -3943,8 +4019,8 @@ def check_slicing_positional(index): assert_series_equal(result1, result2) assert_series_equal(result1, result3) - # not in the index - self.assertRaises(KeyError, lambda : s.loc[2:5]) + # loc will fail + self.assertRaises(TypeError, lambda : s.loc[2:5]) # make all float slicing fail self.assertRaises(TypeError, lambda : s[2.0:5]) @@ -3955,9 +4031,9 @@ def check_slicing_positional(index): self.assertRaises(TypeError, lambda : s.ix[2.0:5.0]) self.assertRaises(TypeError, lambda : s.ix[2:5.0]) - self.assertRaises(KeyError, lambda : s.loc[2.0:5]) - self.assertRaises(KeyError, lambda : s.loc[2.0:5.0]) - self.assertRaises(KeyError, lambda : s.loc[2:5.0]) + self.assertRaises(TypeError, lambda : s.loc[2.0:5]) + self.assertRaises(TypeError, lambda : s.loc[2.0:5.0]) + self.assertRaises(TypeError, lambda : s.loc[2:5.0]) # these work for now #self.assertRaises(TypeError, lambda : s.iloc[2.0:5]) @@ -3966,7 +4042,7 @@ def check_slicing_positional(index): # all index types except int, float for index in [ tm.makeStringIndex, tm.makeUnicodeIndex, - tm.makeDateIndex, tm.makePeriodIndex ]: + tm.makeDateIndex, tm.makeTimedeltaIndex, tm.makePeriodIndex ]: check_slicing_positional(index()) # int @@ -3981,15 +4057,12 @@ def check_slicing_positional(index): # these are all value based result2 = s.ix[2:5] result3 = s.loc[2:5] - result4 = s.loc[2.0:5] - result5 = s.loc[2.0:5.0] - result6 = s.loc[2:5.0] assert_series_equal(result2, result3) - assert_series_equal(result2, result4) - assert_series_equal(result2, result5) - assert_series_equal(result2, result6) # make all float slicing fail + self.assertRaises(TypeError, lambda : s.loc[2.0:5]) + self.assertRaises(TypeError, lambda : s.loc[2.0:5.0]) + self.assertRaises(TypeError, lambda : s.loc[2:5.0]) self.assertRaises(TypeError, lambda : s[2.0:5]) self.assertRaises(TypeError, lambda : s[2.0:5.0]) self.assertRaises(TypeError, lambda : s[2:5.0]) @@ -4097,9 +4170,7 @@ def test_deprecate_float_indexers(self): import warnings warnings.filterwarnings(action='error', category=FutureWarning) - for index in [ tm.makeStringIndex, tm.makeUnicodeIndex, - tm.makeDateIndex, tm.makePeriodIndex ]: - + def check_index(index): i = index(5) for s in [ Series(np.arange(len(i)),index=i), DataFrame(np.random.randn(len(i),len(i)),index=i,columns=i) ]: @@ -4114,8 +4185,11 @@ def f(): # fallsback to position selection ,series only s = Series(np.arange(len(i)),index=i) s[3] - self.assertRaises(FutureWarning, lambda : - s[3.0]) + self.assertRaises(FutureWarning, lambda : s[3.0]) + + for index in [ tm.makeStringIndex, tm.makeUnicodeIndex, + tm.makeDateIndex, tm.makeTimedeltaIndex, tm.makePeriodIndex ]: + check_index(index) # ints i = index(5) diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index a1904d38ab530..9664cef52696d 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -8,6 +8,7 @@ from pandas import compat import numpy as np from pandas.core import common as com +from pandas.core.common import is_integer, is_float import pandas.tslib as tslib import pandas.lib as lib from pandas.core.index import Index @@ -297,6 +298,13 @@ def resolution(self): from pandas.tseries.frequencies import get_reso_string return get_reso_string(self._resolution) + def _convert_scalar_indexer(self, key, typ=None): + """ we don't allow integer or float indexing on datetime-like when using loc """ + if typ in ['loc'] and lib.isscalar(key) and (is_integer(key) or is_float(key)): + self._invalid_indexer('index',key) + + return super(DatetimeIndexOpsMixin, self)._convert_scalar_indexer(key, typ=typ) + def _add_datelike(self, other): raise NotImplementedError diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 60281b6b875b9..f733a7183c9bc 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -10,7 +10,7 @@ from pandas.core.common import (_NS_DTYPE, _INT64_DTYPE, _values_from_object, _maybe_box, - ABCSeries) + ABCSeries, is_integer, is_float) from pandas.core.index import Index, Int64Index, Float64Index import pandas.compat as compat from pandas.compat import u @@ -215,9 +215,9 @@ def __new__(cls, data=None, freq = None if periods is not None: - if com.is_float(periods): + if is_float(periods): periods = int(periods) - elif not com.is_integer(periods): + elif not is_integer(periods): raise ValueError('Periods must be a number, got %s' % str(periods)) @@ -1271,15 +1271,17 @@ def _maybe_cast_slice_bound(self, label, side): label : object side : {'left', 'right'} + Returns + ------- + label : object + Notes ----- Value of `side` parameter should be validated in caller. """ - if isinstance(label, float): - raise TypeError('Cannot index datetime64 with float keys') - if isinstance(label, time): - raise KeyError('Cannot index datetime64 with time keys') + if is_float(label) or isinstance(label, time) or is_integer(label): + self._invalid_indexer('slice',label) if isinstance(label, compat.string_types): freq = getattr(self, 'freqstr', @@ -1556,7 +1558,7 @@ def delete(self, loc): new_dates = np.delete(self.asi8, loc) freq = None - if lib.is_integer(loc): + if is_integer(loc): if loc in (0, -len(self), -1, len(self) - 1): freq = self.freq else: diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 074ed720991ce..95bbf3825e0da 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -22,7 +22,8 @@ import pandas.core.common as com from pandas.core.common import (isnull, _INT64_DTYPE, _maybe_box, - _values_from_object, ABCSeries) + _values_from_object, ABCSeries, + is_integer, is_float) from pandas import compat from pandas.lib import Timestamp, Timedelta import pandas.lib as lib @@ -166,9 +167,9 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, freq = frequencies.get_standard_freq(freq) if periods is not None: - if com.is_float(periods): + if is_float(periods): periods = int(periods) - elif not com.is_integer(periods): + elif not is_integer(periods): raise ValueError('Periods must be a number, got %s' % str(periods)) @@ -533,7 +534,7 @@ def get_loc(self, key, method=None): try: return self._engine.get_loc(key) except KeyError: - if com.is_integer(key): + if is_integer(key): raise try: @@ -576,6 +577,8 @@ def _maybe_cast_slice_bound(self, label, side): return bounds[0 if side == 'left' else 1] except Exception: raise KeyError(label) + elif is_integer(label) or is_float(label): + self._invalid_indexer('slice',label) return label diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index 897a28e8f5ea9..4e86bf9d92f48 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -7,7 +7,7 @@ from pandas.core.common import (ABCSeries, _TD_DTYPE, _INT64_DTYPE, is_timedelta64_dtype, _maybe_box, - _values_from_object, isnull) + _values_from_object, isnull, is_integer, is_float) from pandas.core.index import Index, Int64Index import pandas.compat as compat from pandas.compat import u @@ -156,9 +156,9 @@ def __new__(cls, data=None, unit=None, freq = None if periods is not None: - if com.is_float(periods): + if is_float(periods): periods = int(periods) - elif not com.is_integer(periods): + elif not is_integer(periods): raise ValueError('Periods must be a number, got %s' % str(periods)) @@ -687,7 +687,7 @@ def _maybe_cast_slice_bound(self, label, side): Returns ------- - bound : Timedelta or object + label : object """ if isinstance(label, compat.string_types): @@ -698,12 +698,16 @@ def _maybe_cast_slice_bound(self, label, side): else: return (lbound + _resolution_map[parsed.resolution]() - Timedelta(1, 'ns')) + elif is_integer(label) or is_float(label): + self._invalid_indexer('slice',label) + return label def _get_string_slice(self, key, use_lhs=True, use_rhs=True): freq = getattr(self, 'freqstr', getattr(self, 'inferred_freq', None)) - + if is_integer(key) or is_float(key): + self._invalid_indexer('slice',key) loc = self._partial_td_slice(key, freq, use_lhs=use_lhs, use_rhs=use_rhs) return loc @@ -866,7 +870,7 @@ def delete(self, loc): new_tds = np.delete(self.asi8, loc) freq = 'infer' - if lib.is_integer(loc): + if is_integer(loc): if loc in (0, -len(self), -1, len(self) - 1): freq = self.freq else: From dd1ff3eb11ec3df22bd2497222fde289576e58ce Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 1 Mar 2015 18:05:41 -0500 Subject: [PATCH 2/5] DOC: reorg / update v0.16.0.txt docs --- doc/source/whatsnew/v0.16.0.txt | 171 ++++++++++++++++---------------- 1 file changed, 86 insertions(+), 85 deletions(-) diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt index 1fcb60c866d07..03b0b55b8e3b2 100644 --- a/doc/source/whatsnew/v0.16.0.txt +++ b/doc/source/whatsnew/v0.16.0.txt @@ -20,6 +20,8 @@ users upgrade to this version. New features ~~~~~~~~~~~~ +.. _whatsnew_0160.enhancements: + - Reindex now supports ``method='nearest'`` for frames or series with a monotonic increasing or decreasing index (:issue:`9258`): .. ipython:: python @@ -29,7 +31,41 @@ New features This method is also exposed by the lower level ``Index.get_indexer`` and ``Index.get_loc`` methods. -- DataFrame assign method +- Paths beginning with ~ will now be expanded to begin with the user's home directory (:issue:`9066`) +- Added time interval selection in ``get_data_yahoo`` (:issue:`9071`) +- Added ``Series.str.slice_replace()``, which previously raised ``NotImplementedError`` (:issue:`8888`) +- Added ``Timestamp.to_datetime64()`` to complement ``Timedelta.to_timedelta64()`` (:issue:`9255`) +- ``tseries.frequencies.to_offset()`` now accepts ``Timedelta`` as input (:issue:`9064`) +- Lag parameter was added to the autocorrelation method of ``Series``, defaults to lag-1 autocorrelation (:issue:`9192`) +- ``Timedelta`` will now accept ``nanoseconds`` keyword in constructor (:issue:`9273`) +- SQL code now safely escapes table and column names (:issue:`8986`) + +- Added auto-complete for ``Series.str.``, ``Series.dt.`` and ``Series.cat.`` (:issue:`9322`) +- Added ``StringMethods.isalnum()``, ``isalpha()``, ``isdigit()``, ``isspace()``, ``islower()``, + ``isupper()``, ``istitle()`` which behave as the same as standard ``str`` (:issue:`9282`) + +- Added ``StringMethods.find()`` and ``rfind()`` which behave as the same as standard ``str`` (:issue:`9386`) + +- ``Index.get_indexer`` now supports ``method='pad'`` and ``method='backfill'`` even for any target array, not just monotonic targets. These methods also work for monotonic decreasing as well as monotonic increasing indexes (:issue:`9258`). +- ``Index.asof`` now works on all index types (:issue:`9258`). + +- Added ``StringMethods.isnumeric`` and ``isdecimal`` which behave as the same as standard ``str`` (:issue:`9439`) +- The ``read_excel()`` function's :ref:`sheetname <_io.specifying_sheets>` argument now accepts a list and ``None``, to get multiple or all sheets respectively. If more than one sheet is specified, a dictionary is returned. (:issue:`9450`) + + .. code-block:: python + + # Returns the 1st and 4th sheet, as a dictionary of DataFrames. + pd.read_excel('path_to_file.xls',sheetname=['Sheet1',3]) + +- A ``verbose`` argument has been augmented in ``io.read_excel()``, defaults to False. Set to True to print sheet names as they are parsed. (:issue:`9450`) +- Added ``StringMethods.ljust()`` and ``rjust()`` which behave as the same as standard ``str`` (:issue:`9352`) +- ``StringMethods.pad()`` and ``center()`` now accept ``fillchar`` option to specify filling character (:issue:`9352`) +- Added ``StringMethods.zfill()`` which behave as the same as standard ``str`` (:issue:`9387`) + +DataFrame Assign +~~~~~~~~~~~~~~~~ + +.. _whatsnew_0160.enhancements.assign: Inspired by `dplyr's `__ ``mutate`` verb, DataFrame has a new @@ -71,6 +107,55 @@ calculate the ratio, and plot See the :ref:`documentation ` for more. (:issue:`9229`) + +Interaction with scipy.sparse +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_0160.enhancements.sparse: + +Added :meth:`SparseSeries.to_coo` and :meth:`SparseSeries.from_coo` methods (:issue:`8048`) for converting to and from ``scipy.sparse.coo_matrix`` instances (see :ref:`here `). For example, given a SparseSeries with MultiIndex we can convert to a `scipy.sparse.coo_matrix` by specifying the row and column labels as index levels: + +.. ipython:: python + + from numpy import nan + s = Series([3.0, nan, 1.0, 3.0, nan, nan]) + s.index = MultiIndex.from_tuples([(1, 2, 'a', 0), + (1, 2, 'a', 1), + (1, 1, 'b', 0), + (1, 1, 'b', 1), + (2, 1, 'b', 0), + (2, 1, 'b', 1)], + names=['A', 'B', 'C', 'D']) + + s + + # SparseSeries + ss = s.to_sparse() + ss + + A, rows, columns = ss.to_coo(row_levels=['A', 'B'], + column_levels=['C', 'D'], + sort_labels=False) + + A + A.todense() + rows + columns + +The from_coo method is a convenience method for creating a ``SparseSeries`` +from a ``scipy.sparse.coo_matrix``: + +.. ipython:: python + + from scipy import sparse + A = sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), + shape=(3, 4)) + A + A.todense() + + ss = SparseSeries.from_coo(A) + ss + .. _whatsnew_0160.api: .. _whatsnew_0160.api_breaking: @@ -278,90 +363,6 @@ Deprecations .. _whatsnew_0160.deprecations: -Enhancements -~~~~~~~~~~~~ - -.. _whatsnew_0160.enhancements: - -- Paths beginning with ~ will now be expanded to begin with the user's home directory (:issue:`9066`) -- Added time interval selection in ``get_data_yahoo`` (:issue:`9071`) -- Added ``Series.str.slice_replace()``, which previously raised ``NotImplementedError`` (:issue:`8888`) -- Added ``Timestamp.to_datetime64()`` to complement ``Timedelta.to_timedelta64()`` (:issue:`9255`) -- ``tseries.frequencies.to_offset()`` now accepts ``Timedelta`` as input (:issue:`9064`) -- Lag parameter was added to the autocorrelation method of ``Series``, defaults to lag-1 autocorrelation (:issue:`9192`) -- ``Timedelta`` will now accept ``nanoseconds`` keyword in constructor (:issue:`9273`) -- SQL code now safely escapes table and column names (:issue:`8986`) - -- Added auto-complete for ``Series.str.``, ``Series.dt.`` and ``Series.cat.`` (:issue:`9322`) -- Added ``StringMethods.isalnum()``, ``isalpha()``, ``isdigit()``, ``isspace()``, ``islower()``, - ``isupper()``, ``istitle()`` which behave as the same as standard ``str`` (:issue:`9282`) - -- Added ``StringMethods.find()`` and ``rfind()`` which behave as the same as standard ``str`` (:issue:`9386`) - -- ``Index.get_indexer`` now supports ``method='pad'`` and ``method='backfill'`` even for any target array, not just monotonic targets. These methods also work for monotonic decreasing as well as monotonic increasing indexes (:issue:`9258`). -- ``Index.asof`` now works on all index types (:issue:`9258`). - -- Added ``StringMethods.isnumeric`` and ``isdecimal`` which behave as the same as standard ``str`` (:issue:`9439`) -- The ``read_excel()`` function's :ref:`sheetname <_io.specifying_sheets>` argument now accepts a list and ``None``, to get multiple or all sheets respectively. If more than one sheet is specified, a dictionary is returned. (:issue:`9450`) - - .. code-block:: python - - # Returns the 1st and 4th sheet, as a dictionary of DataFrames. - pd.read_excel('path_to_file.xls',sheetname=['Sheet1',3]) - -- A ``verbose`` argument has been augmented in ``io.read_excel()``, defaults to False. Set to True to print sheet names as they are parsed. (:issue:`9450`) -- Added ``StringMethods.ljust()`` and ``rjust()`` which behave as the same as standard ``str`` (:issue:`9352`) -- ``StringMethods.pad()`` and ``center()`` now accept ``fillchar`` option to specify filling character (:issue:`9352`) -- Added ``StringMethods.zfill()`` which behave as the same as standard ``str`` (:issue:`9387`) - -Interaction with scipy.sparse -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. _whatsnew_0160.enhancements.sparse: - -Added :meth:`SparseSeries.to_coo` and :meth:`SparseSeries.from_coo` methods (:issue:`8048`) for converting to and from ``scipy.sparse.coo_matrix`` instances (see :ref:`here `). For example, given a SparseSeries with MultiIndex we can convert to a `scipy.sparse.coo_matrix` by specifying the row and column labels as index levels: - -.. ipython:: python - - from numpy import nan - s = Series([3.0, nan, 1.0, 3.0, nan, nan]) - s.index = MultiIndex.from_tuples([(1, 2, 'a', 0), - (1, 2, 'a', 1), - (1, 1, 'b', 0), - (1, 1, 'b', 1), - (2, 1, 'b', 0), - (2, 1, 'b', 1)], - names=['A', 'B', 'C', 'D']) - - s - - # SparseSeries - ss = s.to_sparse() - ss - - A, rows, columns = ss.to_coo(row_levels=['A', 'B'], - column_levels=['C', 'D'], - sort_labels=False) - - A - A.todense() - rows - columns - -The from_coo method is a convenience method for creating a ``SparseSeries`` -from a ``scipy.sparse.coo_matrix``: - -.. ipython:: python - - from scipy import sparse - A = sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), - shape=(3, 4)) - A - A.todense() - - ss = SparseSeries.from_coo(A) - ss - Performance ~~~~~~~~~~~ From a5801f6ba286111681e597dc6d171411e01a9d82 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 1 Mar 2015 18:40:29 -0500 Subject: [PATCH 3/5] API: enable float slicing with .loc on int index --- pandas/core/index.py | 2 +- pandas/tests/test_index.py | 8 +-- pandas/tests/test_indexing.py | 97 +++++++++++++++++++++-------------- 3 files changed, 64 insertions(+), 43 deletions(-) diff --git a/pandas/core/index.py b/pandas/core/index.py index 94fe13124dbd6..020ba5cb260b1 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -2134,7 +2134,7 @@ def _maybe_cast_slice_bound(self, label, side): # pass thru float indexers if we have a numeric type index # which then can decide to process / or convert and warng if is_float(label): - if not self.is_floating(): + if not (self.is_integer() or self.is_floating()): self._invalid_indexer('slice',label) # we are not an integer based index, and we have an integer label diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 53732eb8bdecf..ef05209ebe54c 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -969,11 +969,11 @@ def test_slice_locs(self): # int slicing with floats idx = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=int)) - self.assertRaises(TypeError, lambda : idx.slice_locs(5.0, 10.0)) - self.assertRaises(TypeError, lambda : idx.slice_locs(4.5, 10.5)) + self.assertEqual(idx.slice_locs(5.0, 10.0), (3, n)) + self.assertEqual(idx.slice_locs(4.5, 10.5), (3, 8)) idx2 = idx[::-1] - self.assertRaises(TypeError, lambda : idx2.slice_locs(8.5, 1.5)) - self.assertRaises(TypeError, lambda : idx2.slice_locs(10.5, -1)) + self.assertEqual(idx2.slice_locs(8.5, 1.5), (2, 6)) + self.assertEqual(idx2.slice_locs(10.5, -1), (0, n)) def test_slice_locs_dup(self): idx = Index(['a', 'a', 'b', 'c', 'd', 'd']) diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index 8102ca5e7e146..847506b9ef896 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -709,10 +709,6 @@ def compare(result, expected): compare(df2, df1) # edge cases - s = Series(['a','b','c','d'], [1,2,3,4]) - self.assertRaises(TypeError, lambda : s.ix[1.5:5.5]) - self.assertRaises(TypeError, lambda : s.loc[1.5:5.5]) - s = Series([1,2,3,4], index=list('abde')) result1 = s['a':'c'] @@ -4047,7 +4043,7 @@ def check_slicing_positional(index): # int index = tm.makeIntIndex() - s = Series(np.arange(len(index))+10,index) + s = Series(np.arange(len(index))+10,index+5) # this is positional result1 = s[2:5] @@ -4059,55 +4055,80 @@ def check_slicing_positional(index): result3 = s.loc[2:5] assert_series_equal(result2, result3) - # make all float slicing fail - self.assertRaises(TypeError, lambda : s.loc[2.0:5]) - self.assertRaises(TypeError, lambda : s.loc[2.0:5.0]) - self.assertRaises(TypeError, lambda : s.loc[2:5.0]) - self.assertRaises(TypeError, lambda : s[2.0:5]) - self.assertRaises(TypeError, lambda : s[2.0:5.0]) - self.assertRaises(TypeError, lambda : s[2:5.0]) + # float slicers on an int index + expected = Series([11,12,13],index=[6,7,8]) + result = s.loc[6.0:8.5] + assert_series_equal(result, expected) + + result = s.loc[5.5:8.5] + assert_series_equal(result, expected) + + result = s.loc[5.5:8.0] + assert_series_equal(result, expected) - self.assertRaises(TypeError, lambda : s.ix[2.0:5]) - self.assertRaises(TypeError, lambda : s.ix[2.0:5.0]) - self.assertRaises(TypeError, lambda : s.ix[2:5.0]) + # make all float slicing fail for ix/[] with an int index + self.assertRaises(TypeError, lambda : s[6.0:8]) + self.assertRaises(TypeError, lambda : s[6.0:8.0]) + self.assertRaises(TypeError, lambda : s[6:8.0]) + self.assertRaises(TypeError, lambda : s.ix[6.0:8]) + self.assertRaises(TypeError, lambda : s.ix[6.0:8.0]) + self.assertRaises(TypeError, lambda : s.ix[6:8.0]) # these work for now - #self.assertRaises(TypeError, lambda : s.iloc[2.0:5]) - #self.assertRaises(TypeError, lambda : s.iloc[2.0:5.0]) - #self.assertRaises(TypeError, lambda : s.iloc[2:5.0]) + #self.assertRaises(TypeError, lambda : s.iloc[6.0:8]) + #self.assertRaises(TypeError, lambda : s.iloc[6.0:8.0]) + #self.assertRaises(TypeError, lambda : s.iloc[6:8.0]) # float index = tm.makeFloatIndex() - s = Series(np.arange(len(index))+10,index=index) + s = Series(np.arange(len(index))+10,index=index+5) # these are all value based - result1 = s[2:5] - result2 = s.ix[2:5] - result3 = s.loc[2:5] + result1 = s[6:8] + result2 = s.ix[6:8] + result3 = s.loc[6:8] assert_series_equal(result1, result2) assert_series_equal(result1, result3) # these are all valid - result1a = s[2.0:5] - result2a = s[2.0:5.0] - result3a = s[2:5.0] + result1a = s[6.0:8] + result2a = s[6.0:8.0] + result3a = s[6:8.0] + result1b = s[6.5:8] + result2b = s[6.5:8.5] + result3b = s[6:8.5] assert_series_equal(result1a, result2a) assert_series_equal(result1a, result3a) - - result1b = s.ix[2.0:5] - result2b = s.ix[2.0:5.0] - result3b = s.ix[2:5.0] - assert_series_equal(result1b, result2b) - assert_series_equal(result1b, result3b) - - result1c = s.loc[2.0:5] - result2c = s.loc[2.0:5.0] - result3c = s.loc[2:5.0] - assert_series_equal(result1c, result2c) - assert_series_equal(result1c, result3c) - assert_series_equal(result1a, result1b) + assert_series_equal(result1a, result2b) + assert_series_equal(result1a, result3b) + + result1c = s.ix[6.0:8] + result2c = s.ix[6.0:8.0] + result3c = s.ix[6:8.0] + result1d = s.ix[6.5:8] + result2d = s.ix[6.5:8.5] + result3d = s.ix[6:8.5] assert_series_equal(result1a, result1c) + assert_series_equal(result1a, result2c) + assert_series_equal(result1a, result3c) + assert_series_equal(result1a, result1d) + assert_series_equal(result1a, result2d) + assert_series_equal(result1a, result3d) + + result1e = s.loc[6.0:8] + result2e = s.loc[6.0:8.0] + result3e = s.loc[6:8.0] + result1f = s.loc[6.5:8] + result2f = s.loc[6.5:8.5] + result3f = s.loc[6:8.5] + assert_series_equal(result1a, result1e) + assert_series_equal(result1a, result2e) + assert_series_equal(result1a, result3e) + assert_series_equal(result1a, result1f) + assert_series_equal(result1a, result2f) + assert_series_equal(result1a, result3f) + # these work for now #self.assertRaises(TypeError, lambda : s.iloc[2.0:5]) From 560fa8718f2a47d7c94525751aa88156d61d0aa8 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 2 Mar 2015 06:06:30 -0500 Subject: [PATCH 4/5] API: enable float slicing with .ix --- doc/source/indexing.rst | 10 +-- doc/source/whatsnew/v0.16.0.txt | 26 +++++--- pandas/core/index.py | 5 +- pandas/tests/test_indexing.py | 110 +++++++++++++------------------- 4 files changed, 65 insertions(+), 86 deletions(-) diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 846285ef42969..5079b4fa8ad6f 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -295,26 +295,18 @@ Selection By Label .. warning:: ``.loc`` is strict when you present slicers that are not compatible (or convertible) with the index type. For example - using integers in a ``DatetimeIndex`` or float indexers in an ``Int64Index``. These will raise a ``TypeError``. + using integers in a ``DatetimeIndex``. These will raise a ``TypeError``. .. ipython:: python dfl = DataFrame(np.random.randn(5,4), columns=list('ABCD'), index=date_range('20130101',periods=5)) dfl - sl = Series(range(5),[-2,-1,1,2,3]) - sl .. code-block:: python In [4]: dfl.loc[2:3] TypeError: cannot do slice indexing on with these indexers [2] of - .. code-block:: python - - In [8]: sl.loc[-1.0:2] - TypeError: cannot do slice indexing on with these indexers [-1.0] of - - String likes in slicing *can* be convertible to the type of the index and lead to natural slicing. .. ipython:: python diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt index 03b0b55b8e3b2..9cb474a53f25a 100644 --- a/doc/source/whatsnew/v0.16.0.txt +++ b/doc/source/whatsnew/v0.16.0.txt @@ -322,13 +322,6 @@ The behavior of a small sub-set of edge cases for using ``.loc`` have changed (: In [6]: s.loc[-10:3] KeyError: 'start bound [-10] is not the [index]' - In [8]: s.loc[-1.0:2] - Out[2]: - -1 1 - 1 2 - 2 3 - dtype: int64 - New Behavior .. ipython:: python @@ -336,10 +329,25 @@ The behavior of a small sub-set of edge cases for using ``.loc`` have changed (: df.loc['2013-01-02':'2013-01-10'] s.loc[-10:3] +- allow slicing with float-like values on an integer index for ``.ix``. Previously this was only enabled for ``.loc``: + .. code-block:: python - In [8]: s.loc[-1.0:2] - TypeError: cannot do slice indexing on with these indexers [-1.0] of + Previous Behavior + + In [8]: s.ix[-1.0:2] + TypeError: the slice start value [-1.0] is not a proper indexer for this index type (Int64Index) + + New Behavior + + .. ipython:: python + + In [8]: s.ix[-1.0:2] + Out[2]: + -1 1 + 1 2 + 2 3 + dtype: int64 - provide a useful exception for indexing with an invalid type for that index when using ``.loc``. For example trying to use ``.loc`` on an index of type ``DatetimeIndex`` or ``PeriodIndex`` or ``TimedeltaIndex``, with an integer (or a float). diff --git a/pandas/core/index.py b/pandas/core/index.py index 020ba5cb260b1..298f0053251b7 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -707,8 +707,11 @@ def validate(v): if v is None or is_integer(v): return True - # dissallow floats + # dissallow floats (except for .ix) elif is_float(v): + if typ == 'ix': + return True + return False return True diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index 847506b9ef896..b8650683a004a 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -4004,6 +4004,12 @@ def check_index(index, error): def test_slice_indexer(self): + def check_iloc_compat(s): + # invalid type for iloc (but works with a warning) + self.assert_produces_warning(FutureWarning, lambda : s.iloc[6.0:8]) + self.assert_produces_warning(FutureWarning, lambda : s.iloc[6.0:8.0]) + self.assert_produces_warning(FutureWarning, lambda : s.iloc[6:8.0]) + def check_slicing_positional(index): s = Series(np.arange(len(index))+10,index=index) @@ -4031,17 +4037,16 @@ def check_slicing_positional(index): self.assertRaises(TypeError, lambda : s.loc[2.0:5.0]) self.assertRaises(TypeError, lambda : s.loc[2:5.0]) - # these work for now - #self.assertRaises(TypeError, lambda : s.iloc[2.0:5]) - #self.assertRaises(TypeError, lambda : s.iloc[2.0:5.0]) - #self.assertRaises(TypeError, lambda : s.iloc[2:5.0]) + check_iloc_compat(s) # all index types except int, float for index in [ tm.makeStringIndex, tm.makeUnicodeIndex, tm.makeDateIndex, tm.makeTimedeltaIndex, tm.makePeriodIndex ]: check_slicing_positional(index()) - # int + ############ + # IntIndex # + ############ index = tm.makeIntIndex() s = Series(np.arange(len(index))+10,index+5) @@ -4050,38 +4055,34 @@ def check_slicing_positional(index): result4 = s.iloc[2:5] assert_series_equal(result1, result4) - # these are all value based + # these are all label based result2 = s.ix[2:5] result3 = s.loc[2:5] assert_series_equal(result2, result3) # float slicers on an int index expected = Series([11,12,13],index=[6,7,8]) - result = s.loc[6.0:8.5] - assert_series_equal(result, expected) + for method in [lambda x: x.loc, lambda x: x.ix]: + result = method(s)[6.0:8.5] + assert_series_equal(result, expected) - result = s.loc[5.5:8.5] - assert_series_equal(result, expected) + result = method(s)[5.5:8.5] + assert_series_equal(result, expected) - result = s.loc[5.5:8.0] - assert_series_equal(result, expected) + result = method(s)[5.5:8.0] + assert_series_equal(result, expected) - # make all float slicing fail for ix/[] with an int index + # make all float slicing fail for [] with an int index self.assertRaises(TypeError, lambda : s[6.0:8]) self.assertRaises(TypeError, lambda : s[6.0:8.0]) self.assertRaises(TypeError, lambda : s[6:8.0]) - self.assertRaises(TypeError, lambda : s.ix[6.0:8]) - self.assertRaises(TypeError, lambda : s.ix[6.0:8.0]) - self.assertRaises(TypeError, lambda : s.ix[6:8.0]) - # these work for now - #self.assertRaises(TypeError, lambda : s.iloc[6.0:8]) - #self.assertRaises(TypeError, lambda : s.iloc[6.0:8.0]) - #self.assertRaises(TypeError, lambda : s.iloc[6:8.0]) + check_iloc_compat(s) - # float - index = tm.makeFloatIndex() - s = Series(np.arange(len(index))+10,index=index+5) + ############## + # FloatIndex # + ############## + s.index = s.index.astype('float64') # these are all value based result1 = s[6:8] @@ -4090,50 +4091,25 @@ def check_slicing_positional(index): assert_series_equal(result1, result2) assert_series_equal(result1, result3) - # these are all valid - result1a = s[6.0:8] - result2a = s[6.0:8.0] - result3a = s[6:8.0] - result1b = s[6.5:8] - result2b = s[6.5:8.5] - result3b = s[6:8.5] - assert_series_equal(result1a, result2a) - assert_series_equal(result1a, result3a) - assert_series_equal(result1a, result1b) - assert_series_equal(result1a, result2b) - assert_series_equal(result1a, result3b) - - result1c = s.ix[6.0:8] - result2c = s.ix[6.0:8.0] - result3c = s.ix[6:8.0] - result1d = s.ix[6.5:8] - result2d = s.ix[6.5:8.5] - result3d = s.ix[6:8.5] - assert_series_equal(result1a, result1c) - assert_series_equal(result1a, result2c) - assert_series_equal(result1a, result3c) - assert_series_equal(result1a, result1d) - assert_series_equal(result1a, result2d) - assert_series_equal(result1a, result3d) - - result1e = s.loc[6.0:8] - result2e = s.loc[6.0:8.0] - result3e = s.loc[6:8.0] - result1f = s.loc[6.5:8] - result2f = s.loc[6.5:8.5] - result3f = s.loc[6:8.5] - assert_series_equal(result1a, result1e) - assert_series_equal(result1a, result2e) - assert_series_equal(result1a, result3e) - assert_series_equal(result1a, result1f) - assert_series_equal(result1a, result2f) - assert_series_equal(result1a, result3f) - - - # these work for now - #self.assertRaises(TypeError, lambda : s.iloc[2.0:5]) - #self.assertRaises(TypeError, lambda : s.iloc[2.0:5.0]) - #self.assertRaises(TypeError, lambda : s.iloc[2:5.0]) + # these are valid for all methods + # these are treated like labels (e.g. the rhs IS included) + def compare(slicers, expected): + for method in [lambda x: x, lambda x: x.loc, lambda x: x.ix ]: + for slices in slicers: + + result = method(s)[slices] + assert_series_equal(result, expected) + + compare([slice(6.0,8),slice(6.0,8.0),slice(6,8.0)], + s[(s.index>=6.0)&(s.index<=8)]) + compare([slice(6.5,8),slice(6.5,8.5)], + s[(s.index>=6.5)&(s.index<=8.5)]) + compare([slice(6,8.5)], + s[(s.index>=6.0)&(s.index<=8.5)]) + compare([slice(6.5,6.5)], + s[(s.index>=6.5)&(s.index<=6.5)]) + + check_iloc_compat(s) def test_set_ix_out_of_bounds_axis_0(self): df = pd.DataFrame(randn(2, 5), index=["row%s" % i for i in range(2)], columns=["col%s" % i for i in range(5)]) From 9207145815ca3c50ea3db19eebcaeaa04c9e6bd2 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 3 Mar 2015 06:28:27 -0500 Subject: [PATCH 5/5] clean up semantics on slice_indexer a bit rename typ -> kind for _convert_*_indexer add kind argument to _maybe_cast_slice_bound cleaned up _convert_slice_indexer a bit --- pandas/core/generic.py | 4 +- pandas/core/index.py | 179 +++++++++++++++++++--------------- pandas/core/indexing.py | 20 ++-- pandas/core/series.py | 12 +-- pandas/sparse/frame.py | 2 +- pandas/sparse/panel.py | 6 +- pandas/tests/test_indexing.py | 4 +- pandas/tseries/base.py | 16 ++- pandas/tseries/index.py | 9 +- pandas/tseries/period.py | 3 +- pandas/tseries/tdi.py | 3 +- 11 files changed, 147 insertions(+), 111 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 18500fd05b5f8..9e4e79f3d70cb 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1159,11 +1159,11 @@ def _clear_item_cache(self, i=None): else: self._item_cache.clear() - def _slice(self, slobj, axis=0, typ=None): + def _slice(self, slobj, axis=0, kind=None): """ Construct a slice of this container. - typ parameter is maintained for compatibility with Series slicing. + kind parameter is maintained for compatibility with Series slicing. """ axis = self._get_block_manager_axis(axis) diff --git a/pandas/core/index.py b/pandas/core/index.py index 298f0053251b7..10dcdc5a7185a 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -632,8 +632,16 @@ def is_mixed(self): def holds_integer(self): return self.inferred_type in ['integer', 'mixed-integer'] - def _convert_scalar_indexer(self, key, typ=None): - """ convert a scalar indexer, right now we are converting + def _convert_scalar_indexer(self, key, kind=None): + """ + convert a scalar indexer + + Parameters + ---------- + key : label of the slice bound + kind : optional, type of the indexing operation (loc/ix/iloc/None) + + right now we are converting floats -> ints if the index supports it """ @@ -643,7 +651,7 @@ def to_int(): return self._invalid_indexer('label', key) return ikey - if typ == 'iloc': + if kind == 'iloc': if is_integer(key): return key elif is_float(key): @@ -661,14 +669,6 @@ def to_int(): return key - def _validate_slicer(self, key, f): - """ validate and raise if needed on a slice indexers according to the - passed in function """ - - for c in ['start','stop','step']: - if not f(getattr(key,c)): - self._invalid_indexer('slice {0} value'.format(c), key.start) - def _convert_slice_indexer_getitem(self, key, is_index_slice=False): """ called from the getitem slicers, determine how to treat the key whether positional or not """ @@ -676,15 +676,22 @@ def _convert_slice_indexer_getitem(self, key, is_index_slice=False): return key return self._convert_slice_indexer(key) - def _convert_slice_indexer(self, key, typ=None): - """ convert a slice indexer. disallow floats in the start/stop/step """ + def _convert_slice_indexer(self, key, kind=None): + """ + convert a slice indexer. disallow floats in the start/stop/step + + Parameters + ---------- + key : label of the slice bound + kind : optional, type of the indexing operation (loc/ix/iloc/None) + """ # if we are not a slice, then we are done if not isinstance(key, slice): return key # validate iloc - if typ == 'iloc': + if kind == 'iloc': # need to coerce to_int if needed def f(c): @@ -709,13 +716,16 @@ def validate(v): # dissallow floats (except for .ix) elif is_float(v): - if typ == 'ix': + if kind == 'ix': return True return False return True - self._validate_slicer(key, validate) + for c in ['start','stop','step']: + v = getattr(key,c) + if not validate(v): + self._invalid_indexer('slice {0} value'.format(c), v) # figure out if this is a positional indexer start, stop, step = key.start, key.stop, key.step @@ -727,7 +737,7 @@ def is_int(v): is_index_slice = is_int(start) and is_int(stop) is_positional = is_index_slice and not self.is_integer() - if typ == 'getitem': + if kind == 'getitem': return self._convert_slice_indexer_getitem( key, is_index_slice=is_index_slice) @@ -763,16 +773,16 @@ def is_int(v): return indexer - def _convert_list_indexer(self, key, typ=None): + def _convert_list_indexer(self, key, kind=None): """ convert a list indexer. these should be locations """ return key - def _convert_list_indexer_for_mixed(self, keyarr, typ=None): + def _convert_list_indexer_for_mixed(self, keyarr, kind=None): """ passed a key that is tuplesafe that is integer based and we have a mixed index (e.g. number/labels). figure out the indexer. return None if we can't help """ - if (typ is None or typ in ['iloc','ix']) and (is_integer_dtype(keyarr) and not self.is_floating()): + if (kind is None or kind in ['iloc','ix']) and (is_integer_dtype(keyarr) and not self.is_floating()): if self.inferred_type != 'integer': keyarr = np.where(keyarr < 0, len(self) + keyarr, keyarr) @@ -793,10 +803,10 @@ def _convert_list_indexer_for_mixed(self, keyarr, typ=None): def _invalid_indexer(self, form, key): """ consistent invalid indexer message """ raise TypeError("cannot do {form} indexing on {klass} with these " - "indexers [{key}] of {typ}".format(form=form, + "indexers [{key}] of {kind}".format(form=form, klass=type(self), key=key, - typ=type(key))) + kind=type(key))) def get_duplicates(self): from collections import defaultdict @@ -844,8 +854,8 @@ def inferred_type(self): """ return a string of the type inferred from the values """ return lib.infer_dtype(self) - def is_type_compatible(self, typ): - return typ == self.inferred_type + def is_type_compatible(self, kind): + return kind == self.inferred_type @cache_readonly def is_all_dates(self): @@ -2082,7 +2092,7 @@ def _wrap_joined_index(self, joined, other): name = self.name if self.name == other.name else None return Index(joined, name=name) - def slice_indexer(self, start=None, end=None, step=None): + def slice_indexer(self, start=None, end=None, step=None, kind=None): """ For an ordered Index, compute the slice indexer for input labels and step @@ -2094,6 +2104,7 @@ def slice_indexer(self, start=None, end=None, step=None): end : label, default None If None, defaults to the end step : int, default None + kind : string, default None Returns ------- @@ -2103,7 +2114,7 @@ def slice_indexer(self, start=None, end=None, step=None): ----- This function assumes that the data is sorted, so use at your own peril """ - start_slice, end_slice = self.slice_locs(start, end, step=step) + start_slice, end_slice = self.slice_locs(start, end, step=step, kind=kind) # return a slice if not lib.isscalar(start_slice): @@ -2113,7 +2124,7 @@ def slice_indexer(self, start=None, end=None, step=None): return slice(start_slice, end_slice, step) - def _maybe_cast_slice_bound(self, label, side): + def _maybe_cast_slice_bound(self, label, side, kind): """ This function should be overloaded in subclasses that allow non-trivial casting on label-slice bounds, e.g. datetime-like indices allowing @@ -2123,6 +2134,7 @@ def _maybe_cast_slice_bound(self, label, side): ---------- label : object side : {'left', 'right'} + kind : string / None Returns ------- @@ -2134,15 +2146,16 @@ def _maybe_cast_slice_bound(self, label, side): """ - # pass thru float indexers if we have a numeric type index - # which then can decide to process / or convert and warng + # We are a plain index here (sub-class override this method if they + # wish to have special treatment for floats/ints, e.g. Float64Index and + # datetimelike Indexes + # reject them if is_float(label): - if not (self.is_integer() or self.is_floating()): - self._invalid_indexer('slice',label) + self._invalid_indexer('slice',label) - # we are not an integer based index, and we have an integer label - # treat as positional based slicing semantics - if not self.is_integer() and is_integer(label): + # we are trying to find integer bounds on a non-integer based index + # this is rejected (generally .loc gets you here) + elif is_integer(label): self._invalid_indexer('slice',label) return label @@ -2160,7 +2173,7 @@ def _searchsorted_monotonic(self, label, side='left'): raise ValueError('index must be monotonic increasing or decreasing') - def get_slice_bound(self, label, side): + def get_slice_bound(self, label, side, kind): """ Calculate slice bound that corresponds to given label. @@ -2171,6 +2184,7 @@ def get_slice_bound(self, label, side): ---------- label : object side : {'left', 'right'} + kind : string / None, the type of indexer """ if side not in ('left', 'right'): @@ -2182,7 +2196,7 @@ def get_slice_bound(self, label, side): # For datetime indices label may be a string that has to be converted # to datetime boundary according to its resolution. - label = self._maybe_cast_slice_bound(label, side) + label = self._maybe_cast_slice_bound(label, side, kind) # we need to look up the label try: @@ -2217,7 +2231,7 @@ def get_slice_bound(self, label, side): else: return slc - def slice_locs(self, start=None, end=None, step=None): + def slice_locs(self, start=None, end=None, step=None, kind=None): """ Compute slice locations for input labels. @@ -2227,6 +2241,9 @@ def slice_locs(self, start=None, end=None, step=None): If None, defaults to the beginning end : label, default None If None, defaults to the end + step : int, defaults None + If None, defaults to 1 + kind : string, defaults None Returns ------- @@ -2241,13 +2258,13 @@ def slice_locs(self, start=None, end=None, step=None): start_slice = None if start is not None: - start_slice = self.get_slice_bound(start, 'left') + start_slice = self.get_slice_bound(start, 'left', kind) if start_slice is None: start_slice = 0 end_slice = None if end is not None: - end_slice = self.get_slice_bound(end, 'right') + end_slice = self.get_slice_bound(end, 'right', kind) if end_slice is None: end_slice = len(self) @@ -2504,6 +2521,35 @@ class NumericIndex(Index): """ _is_numeric_dtype = True + def _maybe_cast_slice_bound(self, label, side, kind): + """ + This function should be overloaded in subclasses that allow non-trivial + casting on label-slice bounds, e.g. datetime-like indices allowing + strings containing formatted datetimes. + + Parameters + ---------- + label : object + side : {'left', 'right'} + kind : string / None + + Returns + ------- + label : object + + Notes + ----- + Value of `side` parameter should be validated in caller. + + """ + + # we are a numeric index, so we accept + # integer/floats directly + if not (is_integer(label) or is_float(label)): + self._invalid_indexer('slice',label) + + return label + class Int64Index(NumericIndex): """ @@ -2677,52 +2723,30 @@ def astype(self, dtype): self.__class__) return Index(self.values, name=self.name, dtype=dtype) - def _maybe_cast_slice_bound(self, label, side): + def _convert_scalar_indexer(self, key, kind=None): + if kind == 'iloc': + return super(Float64Index, self)._convert_scalar_indexer(key, + kind=kind) + return key + + def _convert_slice_indexer(self, key, kind=None): """ - This function should be overloaded in subclasses that allow non-trivial - casting on label-slice bounds, e.g. datetime-like indices allowing - strings containing formatted datetimes. + convert a slice indexer, by definition these are labels + unless we are iloc Parameters ---------- - label : object - side : {'left', 'right'} - - Returns - ------- - label : object - - Notes - ----- - Value of `side` parameter should be validated in caller. - + key : label of the slice bound + kind : optional, type of the indexing operation (loc/ix/iloc/None) """ - if not (is_integer(label) or is_float(label)): - self._invalid_indexer('slice',label) - - return label - - def _convert_scalar_indexer(self, key, typ=None): - if typ == 'iloc': - return super(Float64Index, self)._convert_scalar_indexer(key, - typ=typ) - return key - - def _convert_slice_indexer(self, key, typ=None): - """ convert a slice indexer, by definition these are labels - unless we are iloc """ # if we are not a slice, then we are done if not isinstance(key, slice): return key - if typ == 'iloc': + if kind == 'iloc': return super(Float64Index, self)._convert_slice_indexer(key, - typ=typ) - - # allow floats here - validator = lambda v: v is None or is_integer(v) or is_float(v) - self._validate_slicer(key, validator) + kind=kind) # translate to locations return self.slice_indexer(key.start, key.stop, key.step) @@ -4147,12 +4171,12 @@ def _tuple_index(self): """ return Index(self.values) - def get_slice_bound(self, label, side): + def get_slice_bound(self, label, side, kind): if not isinstance(label, tuple): label = label, return self._partial_tup_index(label, side=side) - def slice_locs(self, start=None, end=None, step=None): + def slice_locs(self, start=None, end=None, step=None, kind=None): """ For an ordered MultiIndex, compute the slice locations for input labels. They can be tuples representing partial levels, e.g. for a @@ -4167,6 +4191,7 @@ def slice_locs(self, start=None, end=None, step=None): If None, defaults to the end step : int or None Slice step + kind : string, optional, defaults None Returns ------- @@ -4178,7 +4203,7 @@ def slice_locs(self, start=None, end=None, step=None): """ # This function adds nothing to its parent implementation (the magic # happens in get_slice_bound method), but it adds meaningful doc. - return super(MultiIndex, self).slice_locs(start, end, step) + return super(MultiIndex, self).slice_locs(start, end, step, kind=kind) def _partial_tup_index(self, tup, side='left'): if len(tup) > self.lexsort_depth: diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 2e953fed2b14f..29fc1d1e4ba78 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -91,8 +91,8 @@ def _get_label(self, label, axis=0): def _get_loc(self, key, axis=0): return self.obj._ixs(key, axis=axis) - def _slice(self, obj, axis=0, typ=None): - return self.obj._slice(obj, axis=axis, typ=typ) + def _slice(self, obj, axis=0, kind=None): + return self.obj._slice(obj, axis=axis, kind=kind) def _get_setitem_indexer(self, key): if self.axis is not None: @@ -163,12 +163,12 @@ def _convert_scalar_indexer(self, key, axis): # if we are accessing via lowered dim, use the last dim ax = self.obj._get_axis(min(axis, self.ndim - 1)) # a scalar - return ax._convert_scalar_indexer(key, typ=self.name) + return ax._convert_scalar_indexer(key, kind=self.name) def _convert_slice_indexer(self, key, axis): # if we are accessing via lowered dim, use the last dim ax = self.obj._get_axis(min(axis, self.ndim - 1)) - return ax._convert_slice_indexer(key, typ=self.name) + return ax._convert_slice_indexer(key, kind=self.name) def _has_valid_setitem_indexer(self, indexer): return True @@ -960,7 +960,7 @@ def _reindex(keys, level=None): keyarr = _asarray_tuplesafe(key) # handle a mixed integer scenario - indexer = labels._convert_list_indexer_for_mixed(keyarr, typ=self.name) + indexer = labels._convert_list_indexer_for_mixed(keyarr, kind=self.name) if indexer is not None: return self.obj.take(indexer, axis=axis) @@ -1107,7 +1107,7 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False): objarr = _asarray_tuplesafe(obj) # If have integer labels, defer to label-based indexing - indexer = labels._convert_list_indexer_for_mixed(objarr, typ=self.name) + indexer = labels._convert_list_indexer_for_mixed(objarr, kind=self.name) if indexer is not None: return indexer @@ -1163,7 +1163,7 @@ def _get_slice_axis(self, slice_obj, axis=0): indexer = self._convert_slice_indexer(slice_obj, axis) if isinstance(indexer, slice): - return self._slice(indexer, axis=axis, typ='iloc') + return self._slice(indexer, axis=axis, kind='iloc') else: return self.obj.take(indexer, axis=axis, convert=False) @@ -1221,7 +1221,7 @@ def _get_slice_axis(self, slice_obj, axis=0): slice_obj.step) if isinstance(indexer, slice): - return self._slice(indexer, axis=axis, typ='iloc') + return self._slice(indexer, axis=axis, kind='iloc') else: return self.obj.take(indexer, axis=axis, convert=False) @@ -1412,7 +1412,7 @@ def _get_slice_axis(self, slice_obj, axis=0): slice_obj = self._convert_slice_indexer(slice_obj, axis) if isinstance(slice_obj, slice): - return self._slice(slice_obj, axis=axis, typ='iloc') + return self._slice(slice_obj, axis=axis, kind='iloc') else: return self.obj.take(slice_obj, axis=axis, convert=False) @@ -1572,7 +1572,7 @@ def convert_to_index_sliceable(obj, key): """ idx = obj.index if isinstance(key, slice): - return idx._convert_slice_indexer(key, typ='getitem') + return idx._convert_slice_indexer(key, kind='getitem') elif isinstance(key, compat.string_types): diff --git a/pandas/core/series.py b/pandas/core/series.py index 77a5d61ad6fa0..036aca72c8230 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -491,7 +491,7 @@ def _ixs(self, i, axis=0): raise except: if isinstance(i, slice): - indexer = self.index._convert_slice_indexer(i, typ='iloc') + indexer = self.index._convert_slice_indexer(i, kind='iloc') return self._get_values(indexer) else: label = self.index[i] @@ -504,8 +504,8 @@ def _ixs(self, i, axis=0): def _is_mixed_type(self): return False - def _slice(self, slobj, axis=0, typ=None): - slobj = self.index._convert_slice_indexer(slobj, typ=typ or 'getitem') + def _slice(self, slobj, axis=0, kind=None): + slobj = self.index._convert_slice_indexer(slobj, kind=kind or 'getitem') return self._get_values(slobj) def __getitem__(self, key): @@ -536,7 +536,7 @@ def __getitem__(self, key): else: # we can try to coerce the indexer (or this will raise) - new_key = self.index._convert_scalar_indexer(key,typ='getitem') + new_key = self.index._convert_scalar_indexer(key,kind='getitem') if type(new_key) != type(key): return self.__getitem__(new_key) raise @@ -555,7 +555,7 @@ def __getitem__(self, key): def _get_with(self, key): # other: fancy integer or otherwise if isinstance(key, slice): - indexer = self.index._convert_slice_indexer(key, typ='getitem') + indexer = self.index._convert_slice_indexer(key, kind='getitem') return self._get_values(indexer) elif isinstance(key, ABCDataFrame): raise TypeError('Indexing a Series with DataFrame is not supported, '\ @@ -693,7 +693,7 @@ def _set_with_engine(self, key, value): def _set_with(self, key, value): # other: fancy integer or otherwise if isinstance(key, slice): - indexer = self.index._convert_slice_indexer(key, typ='getitem') + indexer = self.index._convert_slice_indexer(key, kind='getitem') return self._set_values(indexer, value) else: if isinstance(key, tuple): diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index 821720f4035a8..30b06c8a93142 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -378,7 +378,7 @@ def set_value(self, index, col, value, takeable=False): return dense.to_sparse(kind=self._default_kind, fill_value=self._default_fill_value) - def _slice(self, slobj, axis=0, typ=None): + def _slice(self, slobj, axis=0, kind=None): if axis == 0: new_index = self.index[slobj] new_columns = self.columns diff --git a/pandas/sparse/panel.py b/pandas/sparse/panel.py index ee9edbe36ae28..d3f3f59f264c5 100644 --- a/pandas/sparse/panel.py +++ b/pandas/sparse/panel.py @@ -68,10 +68,10 @@ class SparsePanel(Panel): def __init__(self, frames=None, items=None, major_axis=None, minor_axis=None, default_fill_value=np.nan, default_kind='block', copy=False): - + if frames is None: frames = {} - + if isinstance(frames, np.ndarray): new_frames = {} for item, vals in zip(items, frames): @@ -191,7 +191,7 @@ def _ixs(self, i, axis=0): return self.xs(key, axis=axis) - def _slice(self, slobj, axis=0, typ=None): + def _slice(self, slobj, axis=0, kind=None): """ for compat as we don't support Block Manager here """ diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index b8650683a004a..bdf2b43d7e945 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -1001,8 +1001,8 @@ def test_loc_getitem_label_slice(self): self.check_result('lab slice', 'loc', slice('W','Z'), 'ix', slice('W','Z'), typs = ['labels'], axes=2) self.check_result('ts slice', 'loc', slice('20130102','20130104'), 'ix', slice('20130102','20130104'), typs = ['ts'], axes=0) - self.check_result('ts slice', 'loc', slice('20130102','20130104'), 'ix', slice('20130102','20130104'), typs = ['ts'], axes=1, fails=KeyError) - self.check_result('ts slice', 'loc', slice('20130102','20130104'), 'ix', slice('20130102','20130104'), typs = ['ts'], axes=2, fails=KeyError) + self.check_result('ts slice', 'loc', slice('20130102','20130104'), 'ix', slice('20130102','20130104'), typs = ['ts'], axes=1, fails=TypeError) + self.check_result('ts slice', 'loc', slice('20130102','20130104'), 'ix', slice('20130102','20130104'), typs = ['ts'], axes=2, fails=TypeError) self.check_result('mixed slice', 'loc', slice(2,8), 'ix', slice(2,8), typs = ['mixed'], axes=0, fails=TypeError) self.check_result('mixed slice', 'loc', slice(2,8), 'ix', slice(2,8), typs = ['mixed'], axes=1, fails=KeyError) diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index 9664cef52696d..048a9ff4b93a6 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -298,12 +298,20 @@ def resolution(self): from pandas.tseries.frequencies import get_reso_string return get_reso_string(self._resolution) - def _convert_scalar_indexer(self, key, typ=None): - """ we don't allow integer or float indexing on datetime-like when using loc """ - if typ in ['loc'] and lib.isscalar(key) and (is_integer(key) or is_float(key)): + def _convert_scalar_indexer(self, key, kind=None): + """ + we don't allow integer or float indexing on datetime-like when using loc + + Parameters + ---------- + key : label of the slice bound + kind : optional, type of the indexing operation (loc/ix/iloc/None) + """ + + if kind in ['loc'] and lib.isscalar(key) and (is_integer(key) or is_float(key)): self._invalid_indexer('index',key) - return super(DatetimeIndexOpsMixin, self)._convert_scalar_indexer(key, typ=typ) + return super(DatetimeIndexOpsMixin, self)._convert_scalar_indexer(key, kind=kind) def _add_datelike(self, other): raise NotImplementedError diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index f733a7183c9bc..24d12078fd7f0 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -1262,7 +1262,7 @@ def get_loc(self, key, method=None): except (KeyError, ValueError): raise KeyError(key) - def _maybe_cast_slice_bound(self, label, side): + def _maybe_cast_slice_bound(self, label, side, kind): """ If label is a string, cast it to datetime according to resolution. @@ -1270,6 +1270,7 @@ def _maybe_cast_slice_bound(self, label, side): ---------- label : object side : {'left', 'right'} + kind : string / None Returns ------- @@ -1300,7 +1301,7 @@ def _get_string_slice(self, key, use_lhs=True, use_rhs=True): use_rhs=use_rhs) return loc - def slice_indexer(self, start=None, end=None, step=None): + def slice_indexer(self, start=None, end=None, step=None, kind=None): """ Return indexer for specified label slice. Index.slice_indexer, customized to handle time slicing. @@ -1335,11 +1336,11 @@ def slice_indexer(self, start=None, end=None, step=None): (end is None or isinstance(end, compat.string_types))): mask = True if start is not None: - start_casted = self._maybe_cast_slice_bound(start, 'left') + start_casted = self._maybe_cast_slice_bound(start, 'left', kind) mask = start_casted <= self if end is not None: - end_casted = self._maybe_cast_slice_bound(end, 'right') + end_casted = self._maybe_cast_slice_bound(end, 'right', kind) mask = (self <= end_casted) & mask indexer = mask.nonzero()[0][::step] diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 95bbf3825e0da..1a2381441ab8d 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -549,7 +549,7 @@ def get_loc(self, key, method=None): except KeyError: raise KeyError(key) - def _maybe_cast_slice_bound(self, label, side): + def _maybe_cast_slice_bound(self, label, side, kind): """ If label is a string or a datetime, cast it to Period.ordinal according to resolution. @@ -558,6 +558,7 @@ def _maybe_cast_slice_bound(self, label, side): ---------- label : object side : {'left', 'right'} + kind : string / None Returns ------- diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index 4e86bf9d92f48..e01ff54feab57 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -675,7 +675,7 @@ def get_loc(self, key, method=None): except (KeyError, ValueError): raise KeyError(key) - def _maybe_cast_slice_bound(self, label, side): + def _maybe_cast_slice_bound(self, label, side, kind): """ If label is a string, cast it to timedelta according to resolution. @@ -684,6 +684,7 @@ def _maybe_cast_slice_bound(self, label, side): ---------- label : object side : {'left', 'right'} + kind : string / None Returns -------