diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 5ab72f633f49b..5079b4fa8ad6f 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -85,7 +85,7 @@ of multi-axis indexing. - ``.iloc`` is primarily integer position based (from ``0`` to ``length-1`` of the axis), but may also be used with a boolean - array. ``.iloc`` will raise ``IndexError`` if a requested + array. ``.iloc`` will raise ``IndexError`` if a requested indexer is out-of-bounds, except *slice* indexers which allow out-of-bounds indexing. (this conforms with python/numpy *slice* semantics). Allowed inputs are: @@ -292,6 +292,27 @@ Selection By Label This is sometimes called ``chained assignment`` and should be avoided. See :ref:`Returning a View versus Copy ` +.. warning:: + + ``.loc`` is strict when you present slicers that are not compatible (or convertible) with the index type. For example + using integers in a ``DatetimeIndex``. These will raise a ``TypeError``. + + .. ipython:: python + + dfl = DataFrame(np.random.randn(5,4), columns=list('ABCD'), index=date_range('20130101',periods=5)) + dfl + + .. code-block:: python + + In [4]: dfl.loc[2:3] + TypeError: cannot do slice indexing on with these indexers [2] of + + String likes in slicing *can* be convertible to the type of the index and lead to natural slicing. + + .. ipython:: python + + dfl.loc['20130102':'20130104'] + pandas provides a suite of methods in order to have **purely label based indexing**. This is a strict inclusion based protocol. **at least 1** of the labels for which you ask, must be in the index or a ``KeyError`` will be raised! When slicing, the start bound is *included*, **AND** the stop bound is *included*. Integers are valid labels, but they refer to the label **and not the position**. @@ -1486,5 +1507,3 @@ This will **not** work at all, and so should be avoided The chained assignment warnings / exceptions are aiming to inform the user of a possibly invalid assignment. There may be false positives; situations where a chained assignment is inadvertantly reported. - - diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt index 1550527706a9e..9cb474a53f25a 100644 --- a/doc/source/whatsnew/v0.16.0.txt +++ b/doc/source/whatsnew/v0.16.0.txt @@ -20,6 +20,8 @@ users upgrade to this version. New features ~~~~~~~~~~~~ +.. _whatsnew_0160.enhancements: + - Reindex now supports ``method='nearest'`` for frames or series with a monotonic increasing or decreasing index (:issue:`9258`): .. ipython:: python @@ -29,7 +31,41 @@ New features This method is also exposed by the lower level ``Index.get_indexer`` and ``Index.get_loc`` methods. -- DataFrame assign method +- Paths beginning with ~ will now be expanded to begin with the user's home directory (:issue:`9066`) +- Added time interval selection in ``get_data_yahoo`` (:issue:`9071`) +- Added ``Series.str.slice_replace()``, which previously raised ``NotImplementedError`` (:issue:`8888`) +- Added ``Timestamp.to_datetime64()`` to complement ``Timedelta.to_timedelta64()`` (:issue:`9255`) +- ``tseries.frequencies.to_offset()`` now accepts ``Timedelta`` as input (:issue:`9064`) +- Lag parameter was added to the autocorrelation method of ``Series``, defaults to lag-1 autocorrelation (:issue:`9192`) +- ``Timedelta`` will now accept ``nanoseconds`` keyword in constructor (:issue:`9273`) +- SQL code now safely escapes table and column names (:issue:`8986`) + +- Added auto-complete for ``Series.str.``, ``Series.dt.`` and ``Series.cat.`` (:issue:`9322`) +- Added ``StringMethods.isalnum()``, ``isalpha()``, ``isdigit()``, ``isspace()``, ``islower()``, + ``isupper()``, ``istitle()`` which behave as the same as standard ``str`` (:issue:`9282`) + +- Added ``StringMethods.find()`` and ``rfind()`` which behave as the same as standard ``str`` (:issue:`9386`) + +- ``Index.get_indexer`` now supports ``method='pad'`` and ``method='backfill'`` even for any target array, not just monotonic targets. These methods also work for monotonic decreasing as well as monotonic increasing indexes (:issue:`9258`). +- ``Index.asof`` now works on all index types (:issue:`9258`). + +- Added ``StringMethods.isnumeric`` and ``isdecimal`` which behave as the same as standard ``str`` (:issue:`9439`) +- The ``read_excel()`` function's :ref:`sheetname <_io.specifying_sheets>` argument now accepts a list and ``None``, to get multiple or all sheets respectively. If more than one sheet is specified, a dictionary is returned. (:issue:`9450`) + + .. code-block:: python + + # Returns the 1st and 4th sheet, as a dictionary of DataFrames. + pd.read_excel('path_to_file.xls',sheetname=['Sheet1',3]) + +- A ``verbose`` argument has been augmented in ``io.read_excel()``, defaults to False. Set to True to print sheet names as they are parsed. (:issue:`9450`) +- Added ``StringMethods.ljust()`` and ``rjust()`` which behave as the same as standard ``str`` (:issue:`9352`) +- ``StringMethods.pad()`` and ``center()`` now accept ``fillchar`` option to specify filling character (:issue:`9352`) +- Added ``StringMethods.zfill()`` which behave as the same as standard ``str`` (:issue:`9387`) + +DataFrame Assign +~~~~~~~~~~~~~~~~ + +.. _whatsnew_0160.enhancements.assign: Inspired by `dplyr's `__ ``mutate`` verb, DataFrame has a new @@ -71,6 +107,55 @@ calculate the ratio, and plot See the :ref:`documentation ` for more. (:issue:`9229`) + +Interaction with scipy.sparse +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_0160.enhancements.sparse: + +Added :meth:`SparseSeries.to_coo` and :meth:`SparseSeries.from_coo` methods (:issue:`8048`) for converting to and from ``scipy.sparse.coo_matrix`` instances (see :ref:`here `). For example, given a SparseSeries with MultiIndex we can convert to a `scipy.sparse.coo_matrix` by specifying the row and column labels as index levels: + +.. ipython:: python + + from numpy import nan + s = Series([3.0, nan, 1.0, 3.0, nan, nan]) + s.index = MultiIndex.from_tuples([(1, 2, 'a', 0), + (1, 2, 'a', 1), + (1, 1, 'b', 0), + (1, 1, 'b', 1), + (2, 1, 'b', 0), + (2, 1, 'b', 1)], + names=['A', 'B', 'C', 'D']) + + s + + # SparseSeries + ss = s.to_sparse() + ss + + A, rows, columns = ss.to_coo(row_levels=['A', 'B'], + column_levels=['C', 'D'], + sort_labels=False) + + A + A.todense() + rows + columns + +The from_coo method is a convenience method for creating a ``SparseSeries`` +from a ``scipy.sparse.coo_matrix``: + +.. ipython:: python + + from scipy import sparse + A = sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), + shape=(3, 4)) + A + A.todense() + + ss = SparseSeries.from_coo(A) + ss + .. _whatsnew_0160.api: .. _whatsnew_0160.api_breaking: @@ -211,96 +296,80 @@ Backwards incompatible API changes p // 0 +Indexing Changes +~~~~~~~~~~~~~~~~ -Deprecations -~~~~~~~~~~~~ +.. _whatsnew_0160.api_breaking.indexing: -.. _whatsnew_0160.deprecations: +The behavior of a small sub-set of edge cases for using ``.loc`` have changed (:issue:`8613`). Furthermore we have improved the content of the error messages that are raised: +- slicing with ``.loc`` where the start and/or stop bound is not found in the index is now allowed; this previously would raise a ``KeyError``. This makes the behavior the same as ``.ix`` in this case. This change is only for slicing, not when indexing with a single label. -Enhancements -~~~~~~~~~~~~ + .. ipython:: python -.. _whatsnew_0160.enhancements: + df = DataFrame(np.random.randn(5,4), columns=list('ABCD'), index=date_range('20130101',periods=5)) + df + s = Series(range(5),[-2,-1,1,2,3]) + s -- Paths beginning with ~ will now be expanded to begin with the user's home directory (:issue:`9066`) -- Added time interval selection in ``get_data_yahoo`` (:issue:`9071`) -- Added ``Series.str.slice_replace()``, which previously raised ``NotImplementedError`` (:issue:`8888`) -- Added ``Timestamp.to_datetime64()`` to complement ``Timedelta.to_timedelta64()`` (:issue:`9255`) -- ``tseries.frequencies.to_offset()`` now accepts ``Timedelta`` as input (:issue:`9064`) -- Lag parameter was added to the autocorrelation method of ``Series``, defaults to lag-1 autocorrelation (:issue:`9192`) -- ``Timedelta`` will now accept ``nanoseconds`` keyword in constructor (:issue:`9273`) -- SQL code now safely escapes table and column names (:issue:`8986`) + Previous Behavior -- Added auto-complete for ``Series.str.``, ``Series.dt.`` and ``Series.cat.`` (:issue:`9322`) -- Added ``StringMethods.isalnum()``, ``isalpha()``, ``isdigit()``, ``isspace()``, ``islower()``, - ``isupper()``, ``istitle()`` which behave as the same as standard ``str`` (:issue:`9282`) + .. code-block:: python -- Added ``StringMethods.find()`` and ``rfind()`` which behave as the same as standard ``str`` (:issue:`9386`) + In [4]: df.loc['2013-01-02':'2013-01-10'] + KeyError: 'stop bound [2013-01-10] is not in the [index]' -- ``Index.get_indexer`` now supports ``method='pad'`` and ``method='backfill'`` even for any target array, not just monotonic targets. These methods also work for monotonic decreasing as well as monotonic increasing indexes (:issue:`9258`). -- ``Index.asof`` now works on all index types (:issue:`9258`). + In [6]: s.loc[-10:3] + KeyError: 'start bound [-10] is not the [index]' -- Added ``StringMethods.isnumeric`` and ``isdecimal`` which behave as the same as standard ``str`` (:issue:`9439`) -- The ``read_excel()`` function's :ref:`sheetname <_io.specifying_sheets>` argument now accepts a list and ``None``, to get multiple or all sheets respectively. If more than one sheet is specified, a dictionary is returned. (:issue:`9450`) + New Behavior + + .. ipython:: python + + df.loc['2013-01-02':'2013-01-10'] + s.loc[-10:3] + +- allow slicing with float-like values on an integer index for ``.ix``. Previously this was only enabled for ``.loc``: .. code-block:: python - # Returns the 1st and 4th sheet, as a dictionary of DataFrames. - pd.read_excel('path_to_file.xls',sheetname=['Sheet1',3]) + Previous Behavior -- A ``verbose`` argument has been augmented in ``io.read_excel()``, defaults to False. Set to True to print sheet names as they are parsed. (:issue:`9450`) -- Added ``StringMethods.ljust()`` and ``rjust()`` which behave as the same as standard ``str`` (:issue:`9352`) -- ``StringMethods.pad()`` and ``center()`` now accept ``fillchar`` option to specify filling character (:issue:`9352`) -- Added ``StringMethods.zfill()`` which behave as the same as standard ``str`` (:issue:`9387`) + In [8]: s.ix[-1.0:2] + TypeError: the slice start value [-1.0] is not a proper indexer for this index type (Int64Index) -Interaction with scipy.sparse -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + New Behavior -.. _whatsnew_0160.enhancements.sparse: + .. ipython:: python -Added :meth:`SparseSeries.to_coo` and :meth:`SparseSeries.from_coo` methods (:issue:`8048`) for converting to and from ``scipy.sparse.coo_matrix`` instances (see :ref:`here `). For example, given a SparseSeries with MultiIndex we can convert to a `scipy.sparse.coo_matrix` by specifying the row and column labels as index levels: + In [8]: s.ix[-1.0:2] + Out[2]: + -1 1 + 1 2 + 2 3 + dtype: int64 -.. ipython:: python +- provide a useful exception for indexing with an invalid type for that index when using ``.loc``. For example trying to use ``.loc`` on an index of type ``DatetimeIndex`` or ``PeriodIndex`` or ``TimedeltaIndex``, with an integer (or a float). - from numpy import nan - s = Series([3.0, nan, 1.0, 3.0, nan, nan]) - s.index = MultiIndex.from_tuples([(1, 2, 'a', 0), - (1, 2, 'a', 1), - (1, 1, 'b', 0), - (1, 1, 'b', 1), - (2, 1, 'b', 0), - (2, 1, 'b', 1)], - names=['A', 'B', 'C', 'D']) + Previous Behavior - s + .. code-block:: python - # SparseSeries - ss = s.to_sparse() - ss + In [4]: df.loc[2:3] + KeyError: 'start bound [2] is not the [index]' - A, rows, columns = ss.to_coo(row_levels=['A', 'B'], - column_levels=['C', 'D'], - sort_labels=False) + New Behavior - A - A.todense() - rows - columns + .. code-block:: python -The from_coo method is a convenience method for creating a ``SparseSeries`` -from a ``scipy.sparse.coo_matrix``: + In [4]: df.loc[2:3] + TypeError: Cannot do slice indexing on with keys -.. ipython:: python +Deprecations +~~~~~~~~~~~~ - from scipy import sparse - A = sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), - shape=(3, 4)) - A - A.todense() +.. _whatsnew_0160.deprecations: - ss = SparseSeries.from_coo(A) - ss Performance ~~~~~~~~~~~ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 18500fd05b5f8..9e4e79f3d70cb 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1159,11 +1159,11 @@ def _clear_item_cache(self, i=None): else: self._item_cache.clear() - def _slice(self, slobj, axis=0, typ=None): + def _slice(self, slobj, axis=0, kind=None): """ Construct a slice of this container. - typ parameter is maintained for compatibility with Series slicing. + kind parameter is maintained for compatibility with Series slicing. """ axis = self._get_block_manager_axis(axis) diff --git a/pandas/core/index.py b/pandas/core/index.py index 0cad537855857..10dcdc5a7185a 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -632,18 +632,26 @@ def is_mixed(self): def holds_integer(self): return self.inferred_type in ['integer', 'mixed-integer'] - def _convert_scalar_indexer(self, key, typ=None): - """ convert a scalar indexer, right now we are converting + def _convert_scalar_indexer(self, key, kind=None): + """ + convert a scalar indexer + + Parameters + ---------- + key : label of the slice bound + kind : optional, type of the indexing operation (loc/ix/iloc/None) + + right now we are converting floats -> ints if the index supports it """ def to_int(): ikey = int(key) if ikey != key: - return self._convert_indexer_error(key, 'label') + return self._invalid_indexer('label', key) return ikey - if typ == 'iloc': + if kind == 'iloc': if is_integer(key): return key elif is_float(key): @@ -651,7 +659,7 @@ def to_int(): warnings.warn("scalar indexers for index type {0} should be integers and not floating point".format( type(self).__name__),FutureWarning) return key - return self._convert_indexer_error(key, 'label') + return self._invalid_indexer('label', key) if is_float(key): if not self.is_floating(): @@ -661,14 +669,6 @@ def to_int(): return key - def _validate_slicer(self, key, f): - """ validate and raise if needed on a slice indexers according to the - passed in function """ - - for c in ['start','stop','step']: - if not f(getattr(key,c)): - self._convert_indexer_error(key.start, 'slice {0} value'.format(c)) - def _convert_slice_indexer_getitem(self, key, is_index_slice=False): """ called from the getitem slicers, determine how to treat the key whether positional or not """ @@ -676,15 +676,22 @@ def _convert_slice_indexer_getitem(self, key, is_index_slice=False): return key return self._convert_slice_indexer(key) - def _convert_slice_indexer(self, key, typ=None): - """ convert a slice indexer. disallow floats in the start/stop/step """ + def _convert_slice_indexer(self, key, kind=None): + """ + convert a slice indexer. disallow floats in the start/stop/step + + Parameters + ---------- + key : label of the slice bound + kind : optional, type of the indexing operation (loc/ix/iloc/None) + """ # if we are not a slice, then we are done if not isinstance(key, slice): return key # validate iloc - if typ == 'iloc': + if kind == 'iloc': # need to coerce to_int if needed def f(c): @@ -698,7 +705,7 @@ def f(c): "and not floating point",FutureWarning) return int(v) - self._convert_indexer_error(v, 'slice {0} value'.format(c)) + self._invalid_indexer('slice {0} value'.format(c), v) return slice(*[ f(c) for c in ['start','stop','step']]) @@ -707,12 +714,18 @@ def validate(v): if v is None or is_integer(v): return True - # dissallow floats + # dissallow floats (except for .ix) elif is_float(v): + if kind == 'ix': + return True + return False return True - self._validate_slicer(key, validate) + for c in ['start','stop','step']: + v = getattr(key,c) + if not validate(v): + self._invalid_indexer('slice {0} value'.format(c), v) # figure out if this is a positional indexer start, stop, step = key.start, key.stop, key.step @@ -724,7 +737,7 @@ def is_int(v): is_index_slice = is_int(start) and is_int(stop) is_positional = is_index_slice and not self.is_integer() - if typ == 'getitem': + if kind == 'getitem': return self._convert_slice_indexer_getitem( key, is_index_slice=is_index_slice) @@ -760,16 +773,16 @@ def is_int(v): return indexer - def _convert_list_indexer(self, key, typ=None): + def _convert_list_indexer(self, key, kind=None): """ convert a list indexer. these should be locations """ return key - def _convert_list_indexer_for_mixed(self, keyarr, typ=None): + def _convert_list_indexer_for_mixed(self, keyarr, kind=None): """ passed a key that is tuplesafe that is integer based and we have a mixed index (e.g. number/labels). figure out the indexer. return None if we can't help """ - if (typ is None or typ in ['iloc','ix']) and (is_integer_dtype(keyarr) and not self.is_floating()): + if (kind is None or kind in ['iloc','ix']) and (is_integer_dtype(keyarr) and not self.is_floating()): if self.inferred_type != 'integer': keyarr = np.where(keyarr < 0, len(self) + keyarr, keyarr) @@ -787,11 +800,13 @@ def _convert_list_indexer_for_mixed(self, keyarr, typ=None): return None - def _convert_indexer_error(self, key, msg=None): - if msg is None: - msg = 'label' - raise TypeError("the {0} [{1}] is not a proper indexer for this index " - "type ({2})".format(msg, key, self.__class__.__name__)) + def _invalid_indexer(self, form, key): + """ consistent invalid indexer message """ + raise TypeError("cannot do {form} indexing on {klass} with these " + "indexers [{key}] of {kind}".format(form=form, + klass=type(self), + key=key, + kind=type(key))) def get_duplicates(self): from collections import defaultdict @@ -839,8 +854,8 @@ def inferred_type(self): """ return a string of the type inferred from the values """ return lib.infer_dtype(self) - def is_type_compatible(self, typ): - return typ == self.inferred_type + def is_type_compatible(self, kind): + return kind == self.inferred_type @cache_readonly def is_all_dates(self): @@ -2077,7 +2092,7 @@ def _wrap_joined_index(self, joined, other): name = self.name if self.name == other.name else None return Index(joined, name=name) - def slice_indexer(self, start=None, end=None, step=None): + def slice_indexer(self, start=None, end=None, step=None, kind=None): """ For an ordered Index, compute the slice indexer for input labels and step @@ -2089,6 +2104,7 @@ def slice_indexer(self, start=None, end=None, step=None): end : label, default None If None, defaults to the end step : int, default None + kind : string, default None Returns ------- @@ -2098,7 +2114,7 @@ def slice_indexer(self, start=None, end=None, step=None): ----- This function assumes that the data is sorted, so use at your own peril """ - start_slice, end_slice = self.slice_locs(start, end, step=step) + start_slice, end_slice = self.slice_locs(start, end, step=step, kind=kind) # return a slice if not lib.isscalar(start_slice): @@ -2108,7 +2124,7 @@ def slice_indexer(self, start=None, end=None, step=None): return slice(start_slice, end_slice, step) - def _maybe_cast_slice_bound(self, label, side): + def _maybe_cast_slice_bound(self, label, side, kind): """ This function should be overloaded in subclasses that allow non-trivial casting on label-slice bounds, e.g. datetime-like indices allowing @@ -2118,12 +2134,30 @@ def _maybe_cast_slice_bound(self, label, side): ---------- label : object side : {'left', 'right'} + kind : string / None + + Returns + ------- + label : object Notes ----- Value of `side` parameter should be validated in caller. """ + + # We are a plain index here (sub-class override this method if they + # wish to have special treatment for floats/ints, e.g. Float64Index and + # datetimelike Indexes + # reject them + if is_float(label): + self._invalid_indexer('slice',label) + + # we are trying to find integer bounds on a non-integer based index + # this is rejected (generally .loc gets you here) + elif is_integer(label): + self._invalid_indexer('slice',label) + return label def _searchsorted_monotonic(self, label, side='left'): @@ -2139,7 +2173,7 @@ def _searchsorted_monotonic(self, label, side='left'): raise ValueError('index must be monotonic increasing or decreasing') - def get_slice_bound(self, label, side): + def get_slice_bound(self, label, side, kind): """ Calculate slice bound that corresponds to given label. @@ -2150,6 +2184,7 @@ def get_slice_bound(self, label, side): ---------- label : object side : {'left', 'right'} + kind : string / None, the type of indexer """ if side not in ('left', 'right'): @@ -2158,10 +2193,12 @@ def get_slice_bound(self, label, side): " must be either 'left' or 'right': %s" % (side,)) original_label = label + # For datetime indices label may be a string that has to be converted # to datetime boundary according to its resolution. - label = self._maybe_cast_slice_bound(label, side) + label = self._maybe_cast_slice_bound(label, side, kind) + # we need to look up the label try: slc = self.get_loc(label) except KeyError as err: @@ -2194,7 +2231,7 @@ def get_slice_bound(self, label, side): else: return slc - def slice_locs(self, start=None, end=None, step=None): + def slice_locs(self, start=None, end=None, step=None, kind=None): """ Compute slice locations for input labels. @@ -2204,6 +2241,9 @@ def slice_locs(self, start=None, end=None, step=None): If None, defaults to the beginning end : label, default None If None, defaults to the end + step : int, defaults None + If None, defaults to 1 + kind : string, defaults None Returns ------- @@ -2218,13 +2258,13 @@ def slice_locs(self, start=None, end=None, step=None): start_slice = None if start is not None: - start_slice = self.get_slice_bound(start, 'left') + start_slice = self.get_slice_bound(start, 'left', kind) if start_slice is None: start_slice = 0 end_slice = None if end is not None: - end_slice = self.get_slice_bound(end, 'right') + end_slice = self.get_slice_bound(end, 'right', kind) if end_slice is None: end_slice = len(self) @@ -2481,6 +2521,35 @@ class NumericIndex(Index): """ _is_numeric_dtype = True + def _maybe_cast_slice_bound(self, label, side, kind): + """ + This function should be overloaded in subclasses that allow non-trivial + casting on label-slice bounds, e.g. datetime-like indices allowing + strings containing formatted datetimes. + + Parameters + ---------- + label : object + side : {'left', 'right'} + kind : string / None + + Returns + ------- + label : object + + Notes + ----- + Value of `side` parameter should be validated in caller. + + """ + + # we are a numeric index, so we accept + # integer/floats directly + if not (is_integer(label) or is_float(label)): + self._invalid_indexer('slice',label) + + return label + class Int64Index(NumericIndex): """ @@ -2654,27 +2723,30 @@ def astype(self, dtype): self.__class__) return Index(self.values, name=self.name, dtype=dtype) - def _convert_scalar_indexer(self, key, typ=None): - if typ == 'iloc': + def _convert_scalar_indexer(self, key, kind=None): + if kind == 'iloc': return super(Float64Index, self)._convert_scalar_indexer(key, - typ=typ) + kind=kind) return key - def _convert_slice_indexer(self, key, typ=None): - """ convert a slice indexer, by definition these are labels - unless we are iloc """ + def _convert_slice_indexer(self, key, kind=None): + """ + convert a slice indexer, by definition these are labels + unless we are iloc + + Parameters + ---------- + key : label of the slice bound + kind : optional, type of the indexing operation (loc/ix/iloc/None) + """ # if we are not a slice, then we are done if not isinstance(key, slice): return key - if typ == 'iloc': + if kind == 'iloc': return super(Float64Index, self)._convert_slice_indexer(key, - typ=typ) - - # allow floats here - validator = lambda v: v is None or is_integer(v) or is_float(v) - self._validate_slicer(key, validator) + kind=kind) # translate to locations return self.slice_indexer(key.start, key.stop, key.step) @@ -4099,12 +4171,12 @@ def _tuple_index(self): """ return Index(self.values) - def get_slice_bound(self, label, side): + def get_slice_bound(self, label, side, kind): if not isinstance(label, tuple): label = label, return self._partial_tup_index(label, side=side) - def slice_locs(self, start=None, end=None, step=None): + def slice_locs(self, start=None, end=None, step=None, kind=None): """ For an ordered MultiIndex, compute the slice locations for input labels. They can be tuples representing partial levels, e.g. for a @@ -4119,6 +4191,7 @@ def slice_locs(self, start=None, end=None, step=None): If None, defaults to the end step : int or None Slice step + kind : string, optional, defaults None Returns ------- @@ -4130,7 +4203,7 @@ def slice_locs(self, start=None, end=None, step=None): """ # This function adds nothing to its parent implementation (the magic # happens in get_slice_bound method), but it adds meaningful doc. - return super(MultiIndex, self).slice_locs(start, end, step) + return super(MultiIndex, self).slice_locs(start, end, step, kind=kind) def _partial_tup_index(self, tup, side='left'): if len(tup) > self.lexsort_depth: diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 1c951f58a17d8..29fc1d1e4ba78 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -91,8 +91,8 @@ def _get_label(self, label, axis=0): def _get_loc(self, key, axis=0): return self.obj._ixs(key, axis=axis) - def _slice(self, obj, axis=0, typ=None): - return self.obj._slice(obj, axis=axis, typ=typ) + def _slice(self, obj, axis=0, kind=None): + return self.obj._slice(obj, axis=axis, kind=kind) def _get_setitem_indexer(self, key): if self.axis is not None: @@ -163,12 +163,12 @@ def _convert_scalar_indexer(self, key, axis): # if we are accessing via lowered dim, use the last dim ax = self.obj._get_axis(min(axis, self.ndim - 1)) # a scalar - return ax._convert_scalar_indexer(key, typ=self.name) + return ax._convert_scalar_indexer(key, kind=self.name) def _convert_slice_indexer(self, key, axis): # if we are accessing via lowered dim, use the last dim ax = self.obj._get_axis(min(axis, self.ndim - 1)) - return ax._convert_slice_indexer(key, typ=self.name) + return ax._convert_slice_indexer(key, kind=self.name) def _has_valid_setitem_indexer(self, indexer): return True @@ -960,7 +960,7 @@ def _reindex(keys, level=None): keyarr = _asarray_tuplesafe(key) # handle a mixed integer scenario - indexer = labels._convert_list_indexer_for_mixed(keyarr, typ=self.name) + indexer = labels._convert_list_indexer_for_mixed(keyarr, kind=self.name) if indexer is not None: return self.obj.take(indexer, axis=axis) @@ -1107,7 +1107,7 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False): objarr = _asarray_tuplesafe(obj) # If have integer labels, defer to label-based indexing - indexer = labels._convert_list_indexer_for_mixed(objarr, typ=self.name) + indexer = labels._convert_list_indexer_for_mixed(objarr, kind=self.name) if indexer is not None: return indexer @@ -1163,7 +1163,7 @@ def _get_slice_axis(self, slice_obj, axis=0): indexer = self._convert_slice_indexer(slice_obj, axis) if isinstance(indexer, slice): - return self._slice(indexer, axis=axis, typ='iloc') + return self._slice(indexer, axis=axis, kind='iloc') else: return self.obj.take(indexer, axis=axis, convert=False) @@ -1221,7 +1221,7 @@ def _get_slice_axis(self, slice_obj, axis=0): slice_obj.step) if isinstance(indexer, slice): - return self._slice(indexer, axis=axis, typ='iloc') + return self._slice(indexer, axis=axis, kind='iloc') else: return self.obj.take(indexer, axis=axis, convert=False) @@ -1243,25 +1243,7 @@ def _has_valid_type(self, key, axis): # boolean if isinstance(key, slice): - - if ax.is_floating(): - - # allowing keys to be slicers with no fallback - pass - - else: - if key.start is not None: - if key.start not in ax: - raise KeyError( - "start bound [%s] is not the [%s]" % - (key.start, self.obj._get_axis_name(axis)) - ) - if key.stop is not None: - if key.stop not in ax: - raise KeyError( - "stop bound [%s] is not in the [%s]" % - (key.stop, self.obj._get_axis_name(axis)) - ) + return True elif is_bool_indexer(key): return True @@ -1430,7 +1412,7 @@ def _get_slice_axis(self, slice_obj, axis=0): slice_obj = self._convert_slice_indexer(slice_obj, axis) if isinstance(slice_obj, slice): - return self._slice(slice_obj, axis=axis, typ='iloc') + return self._slice(slice_obj, axis=axis, kind='iloc') else: return self.obj.take(slice_obj, axis=axis, convert=False) @@ -1590,7 +1572,7 @@ def convert_to_index_sliceable(obj, key): """ idx = obj.index if isinstance(key, slice): - return idx._convert_slice_indexer(key, typ='getitem') + return idx._convert_slice_indexer(key, kind='getitem') elif isinstance(key, compat.string_types): diff --git a/pandas/core/series.py b/pandas/core/series.py index 901faef484377..036aca72c8230 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -491,7 +491,7 @@ def _ixs(self, i, axis=0): raise except: if isinstance(i, slice): - indexer = self.index._convert_slice_indexer(i, typ='iloc') + indexer = self.index._convert_slice_indexer(i, kind='iloc') return self._get_values(indexer) else: label = self.index[i] @@ -504,8 +504,8 @@ def _ixs(self, i, axis=0): def _is_mixed_type(self): return False - def _slice(self, slobj, axis=0, typ=None): - slobj = self.index._convert_slice_indexer(slobj, typ=typ or 'getitem') + def _slice(self, slobj, axis=0, kind=None): + slobj = self.index._convert_slice_indexer(slobj, kind=kind or 'getitem') return self._get_values(slobj) def __getitem__(self, key): @@ -536,7 +536,7 @@ def __getitem__(self, key): else: # we can try to coerce the indexer (or this will raise) - new_key = self.index._convert_scalar_indexer(key) + new_key = self.index._convert_scalar_indexer(key,kind='getitem') if type(new_key) != type(key): return self.__getitem__(new_key) raise @@ -555,7 +555,7 @@ def __getitem__(self, key): def _get_with(self, key): # other: fancy integer or otherwise if isinstance(key, slice): - indexer = self.index._convert_slice_indexer(key, typ='getitem') + indexer = self.index._convert_slice_indexer(key, kind='getitem') return self._get_values(indexer) elif isinstance(key, ABCDataFrame): raise TypeError('Indexing a Series with DataFrame is not supported, '\ @@ -693,7 +693,7 @@ def _set_with_engine(self, key, value): def _set_with(self, key, value): # other: fancy integer or otherwise if isinstance(key, slice): - indexer = self.index._convert_slice_indexer(key, typ='getitem') + indexer = self.index._convert_slice_indexer(key, kind='getitem') return self._set_values(indexer, value) else: if isinstance(key, tuple): diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index 821720f4035a8..30b06c8a93142 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -378,7 +378,7 @@ def set_value(self, index, col, value, takeable=False): return dense.to_sparse(kind=self._default_kind, fill_value=self._default_fill_value) - def _slice(self, slobj, axis=0, typ=None): + def _slice(self, slobj, axis=0, kind=None): if axis == 0: new_index = self.index[slobj] new_columns = self.columns diff --git a/pandas/sparse/panel.py b/pandas/sparse/panel.py index ee9edbe36ae28..d3f3f59f264c5 100644 --- a/pandas/sparse/panel.py +++ b/pandas/sparse/panel.py @@ -68,10 +68,10 @@ class SparsePanel(Panel): def __init__(self, frames=None, items=None, major_axis=None, minor_axis=None, default_fill_value=np.nan, default_kind='block', copy=False): - + if frames is None: frames = {} - + if isinstance(frames, np.ndarray): new_frames = {} for item, vals in zip(items, frames): @@ -191,7 +191,7 @@ def _ixs(self, i, axis=0): return self.xs(key, axis=axis) - def _slice(self, slobj, axis=0, typ=None): + def _slice(self, slobj, axis=0, kind=None): """ for compat as we don't support Block Manager here """ diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 75c28681ecde5..ef05209ebe54c 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -950,16 +950,30 @@ def test_slice_locs(self): self.assertEqual(idx.slice_locs(start=3), (3, n)) self.assertEqual(idx.slice_locs(3, 8), (3, 6)) self.assertEqual(idx.slice_locs(5, 10), (3, n)) - self.assertEqual(idx.slice_locs(5.0, 10.0), (3, n)) - self.assertEqual(idx.slice_locs(4.5, 10.5), (3, 8)) self.assertEqual(idx.slice_locs(end=8), (0, 6)) self.assertEqual(idx.slice_locs(end=9), (0, 7)) + # reversed idx2 = idx[::-1] self.assertEqual(idx2.slice_locs(8, 2), (2, 6)) - self.assertEqual(idx2.slice_locs(8.5, 1.5), (2, 6)) self.assertEqual(idx2.slice_locs(7, 3), (2, 5)) - self.assertEqual(idx2.slice_locs(10.5, -1), (0, n)) + + # float slicing + idx = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=float)) + n = len(idx) + self.assertEqual(idx.slice_locs(5.0, 10.0), (3, n)) + self.assertEqual(idx.slice_locs(4.5, 10.5), (3, 8)) + idx2 = idx[::-1] + self.assertEqual(idx2.slice_locs(8.5, 1.5), (2, 6)) + self.assertEqual(idx2.slice_locs(10.5, -1), (0, n)) + + # int slicing with floats + idx = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=int)) + self.assertEqual(idx.slice_locs(5.0, 10.0), (3, n)) + self.assertEqual(idx.slice_locs(4.5, 10.5), (3, 8)) + idx2 = idx[::-1] + self.assertEqual(idx2.slice_locs(8.5, 1.5), (2, 6)) + self.assertEqual(idx2.slice_locs(10.5, -1), (0, n)) def test_slice_locs_dup(self): idx = Index(['a', 'a', 'b', 'c', 'd', 'd']) diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index 68c504b2a35c3..bdf2b43d7e945 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -20,7 +20,7 @@ from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_frame_equal, assert_panel_equal, assert_attr_equal) -from pandas import concat +from pandas import concat, lib from pandas.io.common import PerformanceWarning import pandas.util.testing as tm @@ -211,8 +211,6 @@ def _print(result, error = None): except AssertionError: raise - except TypeError: - raise AssertionError(_print('type error')) except Exception as detail: # if we are in fails, the ok, otherwise raise it @@ -608,7 +606,7 @@ def test_iloc_setitem(self): expected = Series([0,1,0],index=[4,5,6]) assert_series_equal(s, expected) - def test_ix_loc_setitem(self): + def test_ix_loc_setitem_consistency(self): # GH 5771 # loc with slice and series @@ -656,6 +654,80 @@ def test_ix_loc_setitem(self): df2.ix[:,2] = pd.to_datetime(df['timestamp'], unit='s') assert_frame_equal(df2,expected) + def test_ix_loc_consistency(self): + + # GH 8613 + # some edge cases where ix/loc should return the same + # this is not an exhaustive case + + def compare(result, expected): + if lib.isscalar(expected): + self.assertEqual(result, expected) + else: + self.assertTrue(expected.equals(result)) + + # failure cases for .loc, but these work for .ix + df = pd.DataFrame(np.random.randn(5,4), columns=list('ABCD')) + for key in [ slice(1,3), tuple([slice(0,2),slice(0,2)]), tuple([slice(0,2),df.columns[0:2]]) ]: + + for index in [ tm.makeStringIndex, tm.makeUnicodeIndex, + tm.makeDateIndex, tm.makePeriodIndex, tm.makeTimedeltaIndex ]: + df.index = index(len(df.index)) + df.ix[key] + + self.assertRaises(TypeError, lambda : df.loc[key]) + + df = pd.DataFrame(np.random.randn(5,4), columns=list('ABCD'), index=pd.date_range('2012-01-01', periods=5)) + + for key in [ '2012-01-03', + '2012-01-31', + slice('2012-01-03','2012-01-03'), + slice('2012-01-03','2012-01-04'), + slice('2012-01-03','2012-01-06',2), + slice('2012-01-03','2012-01-31'), + tuple([[True,True,True,False,True]]), + ]: + + # getitem + + # if the expected raises, then compare the exceptions + try: + expected = df.ix[key] + except KeyError: + self.assertRaises(KeyError, lambda : df.loc[key]) + continue + + result = df.loc[key] + compare(result, expected) + + # setitem + df1 = df.copy() + df2 = df.copy() + + df1.ix[key] = 10 + df2.loc[key] = 10 + compare(df2, df1) + + # edge cases + s = Series([1,2,3,4], index=list('abde')) + + result1 = s['a':'c'] + result2 = s.ix['a':'c'] + result3 = s.loc['a':'c'] + assert_series_equal(result1,result2) + assert_series_equal(result1,result3) + + # now work rather than raising KeyError + s = Series(range(5),[-2,-1,1,2,3]) + + result1 = s.ix[-10:3] + result2 = s.loc[-10:3] + assert_series_equal(result1,result2) + + result1 = s.ix[0:3] + result2 = s.loc[0:3] + assert_series_equal(result1,result2) + def test_loc_setitem_multiindex(self): # GH7190 @@ -776,7 +848,11 @@ def test_loc_getitem_label(self): def test_loc_getitem_label_out_of_range(self): # out of range label - self.check_result('label range', 'loc', 'f', 'ix', 'f', typs = ['ints','labels','mixed','ts','floats'], fails=KeyError) + self.check_result('label range', 'loc', 'f', 'ix', 'f', typs = ['ints','labels','mixed','ts'], fails=KeyError) + self.check_result('label range', 'loc', 'f', 'ix', 'f', typs = ['floats'], fails=TypeError) + self.check_result('label range', 'loc', 20, 'ix', 20, typs = ['ints','labels','mixed'], fails=KeyError) + self.check_result('label range', 'loc', 20, 'ix', 20, typs = ['ts'], axes=0, fails=TypeError) + self.check_result('label range', 'loc', 20, 'ix', 20, typs = ['floats'], axes=0, fails=TypeError) def test_loc_getitem_label_list(self): @@ -814,9 +890,6 @@ def test_loc_getitem_bool(self): def test_loc_getitem_int_slice(self): - # int slices in int - self.check_result('int slice1', 'loc', slice(2,4), 'ix', { 0 : [2,4], 1: [3,6], 2: [4,8] }, typs = ['ints'], fails=KeyError) - # ok self.check_result('int slice2', 'loc', slice(2,4), 'ix', [2,4], typs = ['ints'], axes = 0) self.check_result('int slice2', 'loc', slice(3,6), 'ix', [3,6], typs = ['ints'], axes = 1) @@ -920,7 +993,7 @@ def f(): def test_loc_getitem_label_slice(self): # label slices (with ints) - self.check_result('lab slice', 'loc', slice(1,3), 'ix', slice(1,3), typs = ['labels','mixed','ts','floats','empty'], fails=KeyError) + self.check_result('lab slice', 'loc', slice(1,3), 'ix', slice(1,3), typs = ['labels','mixed','empty','ts','floats'], fails=TypeError) # real label slices self.check_result('lab slice', 'loc', slice('a','c'), 'ix', slice('a','c'), typs = ['labels'], axes=0) @@ -928,23 +1001,18 @@ def test_loc_getitem_label_slice(self): self.check_result('lab slice', 'loc', slice('W','Z'), 'ix', slice('W','Z'), typs = ['labels'], axes=2) self.check_result('ts slice', 'loc', slice('20130102','20130104'), 'ix', slice('20130102','20130104'), typs = ['ts'], axes=0) - self.check_result('ts slice', 'loc', slice('20130102','20130104'), 'ix', slice('20130102','20130104'), typs = ['ts'], axes=1, fails=KeyError) - self.check_result('ts slice', 'loc', slice('20130102','20130104'), 'ix', slice('20130102','20130104'), typs = ['ts'], axes=2, fails=KeyError) + self.check_result('ts slice', 'loc', slice('20130102','20130104'), 'ix', slice('20130102','20130104'), typs = ['ts'], axes=1, fails=TypeError) + self.check_result('ts slice', 'loc', slice('20130102','20130104'), 'ix', slice('20130102','20130104'), typs = ['ts'], axes=2, fails=TypeError) - self.check_result('mixed slice', 'loc', slice(2,8), 'ix', slice(2,8), typs = ['mixed'], axes=0, fails=KeyError) + self.check_result('mixed slice', 'loc', slice(2,8), 'ix', slice(2,8), typs = ['mixed'], axes=0, fails=TypeError) self.check_result('mixed slice', 'loc', slice(2,8), 'ix', slice(2,8), typs = ['mixed'], axes=1, fails=KeyError) self.check_result('mixed slice', 'loc', slice(2,8), 'ix', slice(2,8), typs = ['mixed'], axes=2, fails=KeyError) - self.check_result('mixed slice', 'loc', slice(2,4,2), 'ix', slice(2,4,2), typs = ['mixed'], axes=0) + self.check_result('mixed slice', 'loc', slice(2,4,2), 'ix', slice(2,4,2), typs = ['mixed'], axes=0, fails=TypeError) def test_loc_general(self): - # GH 2922 (these are fails) - df = DataFrame(np.random.rand(4,4),columns=['A','B','C','D']) - self.assertRaises(KeyError, df.loc.__getitem__, tuple([slice(0,2),slice(0,2)])) - df = DataFrame(np.random.rand(4,4),columns=['A','B','C','D'], index=['A','B','C','D']) - self.assertRaises(KeyError, df.loc.__getitem__, tuple([slice(0,2),df.columns[0:2]])) # want this to work result = df.loc[:,"A":"B"].iloc[0:2,:] @@ -3239,10 +3307,10 @@ def test_partial_set_invalid(self): # don't allow not string inserts def f(): df.loc[100.0, :] = df.ix[0] - self.assertRaises(ValueError, f) + self.assertRaises(TypeError, f) def f(): df.loc[100,:] = df.ix[0] - self.assertRaises(ValueError, f) + self.assertRaises(TypeError, f) def f(): df.ix[100.0, :] = df.ix[0] @@ -3887,8 +3955,8 @@ def check_invalid(index, loc=None, iloc=None, ix=None, getitem=None): check_invalid(index()) check_invalid(Index(np.arange(5) * 2.5),loc=KeyError, ix=KeyError, getitem=KeyError) - def check_getitem(index): - + def check_index(index, error): + index = index() s = Series(np.arange(len(index)),index=index) # positional selection @@ -3898,22 +3966,26 @@ def check_getitem(index): result4 = s.iloc[5.0] # by value - self.assertRaises(KeyError, lambda : s.loc[5]) - self.assertRaises(KeyError, lambda : s.loc[5.0]) + self.assertRaises(error, lambda : s.loc[5]) + self.assertRaises(error, lambda : s.loc[5.0]) # this is fallback, so it works result5 = s.ix[5] result6 = s.ix[5.0] + self.assertEqual(result1, result2) self.assertEqual(result1, result3) self.assertEqual(result1, result4) self.assertEqual(result1, result5) self.assertEqual(result1, result6) - # all index types except float/int - for index in [ tm.makeStringIndex, tm.makeUnicodeIndex, - tm.makeDateIndex, tm.makePeriodIndex ]: - check_getitem(index()) + # string-like + for index in [ tm.makeStringIndex, tm.makeUnicodeIndex ]: + check_index(index, KeyError) + + # datetimelike + for index in [ tm.makeDateIndex, tm.makeTimedeltaIndex, tm.makePeriodIndex ]: + check_index(index, TypeError) # exact indexing when found on IntIndex s = Series(np.arange(10),dtype='int64') @@ -3932,6 +4004,12 @@ def check_getitem(index): def test_slice_indexer(self): + def check_iloc_compat(s): + # invalid type for iloc (but works with a warning) + self.assert_produces_warning(FutureWarning, lambda : s.iloc[6.0:8]) + self.assert_produces_warning(FutureWarning, lambda : s.iloc[6.0:8.0]) + self.assert_produces_warning(FutureWarning, lambda : s.iloc[6:8.0]) + def check_slicing_positional(index): s = Series(np.arange(len(index))+10,index=index) @@ -3943,8 +4021,8 @@ def check_slicing_positional(index): assert_series_equal(result1, result2) assert_series_equal(result1, result3) - # not in the index - self.assertRaises(KeyError, lambda : s.loc[2:5]) + # loc will fail + self.assertRaises(TypeError, lambda : s.loc[2:5]) # make all float slicing fail self.assertRaises(TypeError, lambda : s[2.0:5]) @@ -3955,91 +4033,83 @@ def check_slicing_positional(index): self.assertRaises(TypeError, lambda : s.ix[2.0:5.0]) self.assertRaises(TypeError, lambda : s.ix[2:5.0]) - self.assertRaises(KeyError, lambda : s.loc[2.0:5]) - self.assertRaises(KeyError, lambda : s.loc[2.0:5.0]) - self.assertRaises(KeyError, lambda : s.loc[2:5.0]) + self.assertRaises(TypeError, lambda : s.loc[2.0:5]) + self.assertRaises(TypeError, lambda : s.loc[2.0:5.0]) + self.assertRaises(TypeError, lambda : s.loc[2:5.0]) - # these work for now - #self.assertRaises(TypeError, lambda : s.iloc[2.0:5]) - #self.assertRaises(TypeError, lambda : s.iloc[2.0:5.0]) - #self.assertRaises(TypeError, lambda : s.iloc[2:5.0]) + check_iloc_compat(s) # all index types except int, float for index in [ tm.makeStringIndex, tm.makeUnicodeIndex, - tm.makeDateIndex, tm.makePeriodIndex ]: + tm.makeDateIndex, tm.makeTimedeltaIndex, tm.makePeriodIndex ]: check_slicing_positional(index()) - # int + ############ + # IntIndex # + ############ index = tm.makeIntIndex() - s = Series(np.arange(len(index))+10,index) + s = Series(np.arange(len(index))+10,index+5) # this is positional result1 = s[2:5] result4 = s.iloc[2:5] assert_series_equal(result1, result4) - # these are all value based + # these are all label based result2 = s.ix[2:5] result3 = s.loc[2:5] - result4 = s.loc[2.0:5] - result5 = s.loc[2.0:5.0] - result6 = s.loc[2:5.0] assert_series_equal(result2, result3) - assert_series_equal(result2, result4) - assert_series_equal(result2, result5) - assert_series_equal(result2, result6) - # make all float slicing fail - self.assertRaises(TypeError, lambda : s[2.0:5]) - self.assertRaises(TypeError, lambda : s[2.0:5.0]) - self.assertRaises(TypeError, lambda : s[2:5.0]) + # float slicers on an int index + expected = Series([11,12,13],index=[6,7,8]) + for method in [lambda x: x.loc, lambda x: x.ix]: + result = method(s)[6.0:8.5] + assert_series_equal(result, expected) + + result = method(s)[5.5:8.5] + assert_series_equal(result, expected) + + result = method(s)[5.5:8.0] + assert_series_equal(result, expected) - self.assertRaises(TypeError, lambda : s.ix[2.0:5]) - self.assertRaises(TypeError, lambda : s.ix[2.0:5.0]) - self.assertRaises(TypeError, lambda : s.ix[2:5.0]) + # make all float slicing fail for [] with an int index + self.assertRaises(TypeError, lambda : s[6.0:8]) + self.assertRaises(TypeError, lambda : s[6.0:8.0]) + self.assertRaises(TypeError, lambda : s[6:8.0]) - # these work for now - #self.assertRaises(TypeError, lambda : s.iloc[2.0:5]) - #self.assertRaises(TypeError, lambda : s.iloc[2.0:5.0]) - #self.assertRaises(TypeError, lambda : s.iloc[2:5.0]) + check_iloc_compat(s) - # float - index = tm.makeFloatIndex() - s = Series(np.arange(len(index))+10,index=index) + ############## + # FloatIndex # + ############## + s.index = s.index.astype('float64') # these are all value based - result1 = s[2:5] - result2 = s.ix[2:5] - result3 = s.loc[2:5] + result1 = s[6:8] + result2 = s.ix[6:8] + result3 = s.loc[6:8] assert_series_equal(result1, result2) assert_series_equal(result1, result3) - # these are all valid - result1a = s[2.0:5] - result2a = s[2.0:5.0] - result3a = s[2:5.0] - assert_series_equal(result1a, result2a) - assert_series_equal(result1a, result3a) - - result1b = s.ix[2.0:5] - result2b = s.ix[2.0:5.0] - result3b = s.ix[2:5.0] - assert_series_equal(result1b, result2b) - assert_series_equal(result1b, result3b) - - result1c = s.loc[2.0:5] - result2c = s.loc[2.0:5.0] - result3c = s.loc[2:5.0] - assert_series_equal(result1c, result2c) - assert_series_equal(result1c, result3c) - - assert_series_equal(result1a, result1b) - assert_series_equal(result1a, result1c) - - # these work for now - #self.assertRaises(TypeError, lambda : s.iloc[2.0:5]) - #self.assertRaises(TypeError, lambda : s.iloc[2.0:5.0]) - #self.assertRaises(TypeError, lambda : s.iloc[2:5.0]) + # these are valid for all methods + # these are treated like labels (e.g. the rhs IS included) + def compare(slicers, expected): + for method in [lambda x: x, lambda x: x.loc, lambda x: x.ix ]: + for slices in slicers: + + result = method(s)[slices] + assert_series_equal(result, expected) + + compare([slice(6.0,8),slice(6.0,8.0),slice(6,8.0)], + s[(s.index>=6.0)&(s.index<=8)]) + compare([slice(6.5,8),slice(6.5,8.5)], + s[(s.index>=6.5)&(s.index<=8.5)]) + compare([slice(6,8.5)], + s[(s.index>=6.0)&(s.index<=8.5)]) + compare([slice(6.5,6.5)], + s[(s.index>=6.5)&(s.index<=6.5)]) + + check_iloc_compat(s) def test_set_ix_out_of_bounds_axis_0(self): df = pd.DataFrame(randn(2, 5), index=["row%s" % i for i in range(2)], columns=["col%s" % i for i in range(5)]) @@ -4097,9 +4167,7 @@ def test_deprecate_float_indexers(self): import warnings warnings.filterwarnings(action='error', category=FutureWarning) - for index in [ tm.makeStringIndex, tm.makeUnicodeIndex, - tm.makeDateIndex, tm.makePeriodIndex ]: - + def check_index(index): i = index(5) for s in [ Series(np.arange(len(i)),index=i), DataFrame(np.random.randn(len(i),len(i)),index=i,columns=i) ]: @@ -4114,8 +4182,11 @@ def f(): # fallsback to position selection ,series only s = Series(np.arange(len(i)),index=i) s[3] - self.assertRaises(FutureWarning, lambda : - s[3.0]) + self.assertRaises(FutureWarning, lambda : s[3.0]) + + for index in [ tm.makeStringIndex, tm.makeUnicodeIndex, + tm.makeDateIndex, tm.makeTimedeltaIndex, tm.makePeriodIndex ]: + check_index(index) # ints i = index(5) diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index a1904d38ab530..048a9ff4b93a6 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -8,6 +8,7 @@ from pandas import compat import numpy as np from pandas.core import common as com +from pandas.core.common import is_integer, is_float import pandas.tslib as tslib import pandas.lib as lib from pandas.core.index import Index @@ -297,6 +298,21 @@ def resolution(self): from pandas.tseries.frequencies import get_reso_string return get_reso_string(self._resolution) + def _convert_scalar_indexer(self, key, kind=None): + """ + we don't allow integer or float indexing on datetime-like when using loc + + Parameters + ---------- + key : label of the slice bound + kind : optional, type of the indexing operation (loc/ix/iloc/None) + """ + + if kind in ['loc'] and lib.isscalar(key) and (is_integer(key) or is_float(key)): + self._invalid_indexer('index',key) + + return super(DatetimeIndexOpsMixin, self)._convert_scalar_indexer(key, kind=kind) + def _add_datelike(self, other): raise NotImplementedError diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 60281b6b875b9..24d12078fd7f0 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -10,7 +10,7 @@ from pandas.core.common import (_NS_DTYPE, _INT64_DTYPE, _values_from_object, _maybe_box, - ABCSeries) + ABCSeries, is_integer, is_float) from pandas.core.index import Index, Int64Index, Float64Index import pandas.compat as compat from pandas.compat import u @@ -215,9 +215,9 @@ def __new__(cls, data=None, freq = None if periods is not None: - if com.is_float(periods): + if is_float(periods): periods = int(periods) - elif not com.is_integer(periods): + elif not is_integer(periods): raise ValueError('Periods must be a number, got %s' % str(periods)) @@ -1262,7 +1262,7 @@ def get_loc(self, key, method=None): except (KeyError, ValueError): raise KeyError(key) - def _maybe_cast_slice_bound(self, label, side): + def _maybe_cast_slice_bound(self, label, side, kind): """ If label is a string, cast it to datetime according to resolution. @@ -1270,16 +1270,19 @@ def _maybe_cast_slice_bound(self, label, side): ---------- label : object side : {'left', 'right'} + kind : string / None + + Returns + ------- + label : object Notes ----- Value of `side` parameter should be validated in caller. """ - if isinstance(label, float): - raise TypeError('Cannot index datetime64 with float keys') - if isinstance(label, time): - raise KeyError('Cannot index datetime64 with time keys') + if is_float(label) or isinstance(label, time) or is_integer(label): + self._invalid_indexer('slice',label) if isinstance(label, compat.string_types): freq = getattr(self, 'freqstr', @@ -1298,7 +1301,7 @@ def _get_string_slice(self, key, use_lhs=True, use_rhs=True): use_rhs=use_rhs) return loc - def slice_indexer(self, start=None, end=None, step=None): + def slice_indexer(self, start=None, end=None, step=None, kind=None): """ Return indexer for specified label slice. Index.slice_indexer, customized to handle time slicing. @@ -1333,11 +1336,11 @@ def slice_indexer(self, start=None, end=None, step=None): (end is None or isinstance(end, compat.string_types))): mask = True if start is not None: - start_casted = self._maybe_cast_slice_bound(start, 'left') + start_casted = self._maybe_cast_slice_bound(start, 'left', kind) mask = start_casted <= self if end is not None: - end_casted = self._maybe_cast_slice_bound(end, 'right') + end_casted = self._maybe_cast_slice_bound(end, 'right', kind) mask = (self <= end_casted) & mask indexer = mask.nonzero()[0][::step] @@ -1556,7 +1559,7 @@ def delete(self, loc): new_dates = np.delete(self.asi8, loc) freq = None - if lib.is_integer(loc): + if is_integer(loc): if loc in (0, -len(self), -1, len(self) - 1): freq = self.freq else: diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 074ed720991ce..1a2381441ab8d 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -22,7 +22,8 @@ import pandas.core.common as com from pandas.core.common import (isnull, _INT64_DTYPE, _maybe_box, - _values_from_object, ABCSeries) + _values_from_object, ABCSeries, + is_integer, is_float) from pandas import compat from pandas.lib import Timestamp, Timedelta import pandas.lib as lib @@ -166,9 +167,9 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, freq = frequencies.get_standard_freq(freq) if periods is not None: - if com.is_float(periods): + if is_float(periods): periods = int(periods) - elif not com.is_integer(periods): + elif not is_integer(periods): raise ValueError('Periods must be a number, got %s' % str(periods)) @@ -533,7 +534,7 @@ def get_loc(self, key, method=None): try: return self._engine.get_loc(key) except KeyError: - if com.is_integer(key): + if is_integer(key): raise try: @@ -548,7 +549,7 @@ def get_loc(self, key, method=None): except KeyError: raise KeyError(key) - def _maybe_cast_slice_bound(self, label, side): + def _maybe_cast_slice_bound(self, label, side, kind): """ If label is a string or a datetime, cast it to Period.ordinal according to resolution. @@ -557,6 +558,7 @@ def _maybe_cast_slice_bound(self, label, side): ---------- label : object side : {'left', 'right'} + kind : string / None Returns ------- @@ -576,6 +578,8 @@ def _maybe_cast_slice_bound(self, label, side): return bounds[0 if side == 'left' else 1] except Exception: raise KeyError(label) + elif is_integer(label) or is_float(label): + self._invalid_indexer('slice',label) return label diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index 897a28e8f5ea9..e01ff54feab57 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -7,7 +7,7 @@ from pandas.core.common import (ABCSeries, _TD_DTYPE, _INT64_DTYPE, is_timedelta64_dtype, _maybe_box, - _values_from_object, isnull) + _values_from_object, isnull, is_integer, is_float) from pandas.core.index import Index, Int64Index import pandas.compat as compat from pandas.compat import u @@ -156,9 +156,9 @@ def __new__(cls, data=None, unit=None, freq = None if periods is not None: - if com.is_float(periods): + if is_float(periods): periods = int(periods) - elif not com.is_integer(periods): + elif not is_integer(periods): raise ValueError('Periods must be a number, got %s' % str(periods)) @@ -675,7 +675,7 @@ def get_loc(self, key, method=None): except (KeyError, ValueError): raise KeyError(key) - def _maybe_cast_slice_bound(self, label, side): + def _maybe_cast_slice_bound(self, label, side, kind): """ If label is a string, cast it to timedelta according to resolution. @@ -684,10 +684,11 @@ def _maybe_cast_slice_bound(self, label, side): ---------- label : object side : {'left', 'right'} + kind : string / None Returns ------- - bound : Timedelta or object + label : object """ if isinstance(label, compat.string_types): @@ -698,12 +699,16 @@ def _maybe_cast_slice_bound(self, label, side): else: return (lbound + _resolution_map[parsed.resolution]() - Timedelta(1, 'ns')) + elif is_integer(label) or is_float(label): + self._invalid_indexer('slice',label) + return label def _get_string_slice(self, key, use_lhs=True, use_rhs=True): freq = getattr(self, 'freqstr', getattr(self, 'inferred_freq', None)) - + if is_integer(key) or is_float(key): + self._invalid_indexer('slice',key) loc = self._partial_td_slice(key, freq, use_lhs=use_lhs, use_rhs=use_rhs) return loc @@ -866,7 +871,7 @@ def delete(self, loc): new_tds = np.delete(self.asi8, loc) freq = 'infer' - if lib.is_integer(loc): + if is_integer(loc): if loc in (0, -len(self), -1, len(self) - 1): freq = self.freq else: