diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index b2e5d04247e81..c7dcccab00d95 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -630,7 +630,10 @@ def __setitem__(self, key, value): else: key = com.apply_if_callable(key, self.obj) indexer = self._get_setitem_indexer(key) - self._setitem_with_indexer(indexer, value) + self._has_valid_setitem_indexer(key) + + iloc = self if self.name == "iloc" else self.obj.iloc + iloc._setitem_with_indexer(indexer, value) def _validate_key(self, key, axis: int): """ @@ -698,681 +701,725 @@ def _convert_tuple(self, key, is_setter: bool = False): keyidx.append(idx) return tuple(keyidx) - def _setitem_with_indexer(self, indexer, value): - self._has_valid_setitem_indexer(indexer) - - # also has the side effect of consolidating in-place - from pandas import Series + def _handle_lowerdim_multi_index_axis0(self, tup: Tuple): + # we have an axis0 multi-index, handle or raise + axis = self.axis or 0 + try: + # fast path for series or for tup devoid of slices + return self._get_label(tup, axis=axis) + except TypeError: + # slices are unhashable + pass + except KeyError as ek: + # raise KeyError if number of indexers match + # else IndexingError will be raised + if len(tup) <= self.obj.index.nlevels and len(tup) > self.ndim: + raise ek - info_axis = self.obj._info_axis_number + return None - # maybe partial set - take_split_path = self.obj._is_mixed_type + def _getitem_lowerdim(self, tup: Tuple): - # if there is only one block/type, still have to take split path - # unless the block is one-dimensional or it can hold the value - if not take_split_path and self.obj._data.blocks: - (blk,) = self.obj._data.blocks - if 1 < blk.ndim: # in case of dict, keys are indices - val = list(value.values()) if isinstance(value, dict) else value - take_split_path = not blk._can_hold_element(val) + # we can directly get the axis result since the axis is specified + if self.axis is not None: + axis = self.obj._get_axis_number(self.axis) + return self._getitem_axis(tup, axis=axis) - # if we have any multi-indexes that have non-trivial slices - # (not null slices) then we must take the split path, xref - # GH 10360, GH 27841 - if isinstance(indexer, tuple) and len(indexer) == len(self.obj.axes): - for i, ax in zip(indexer, self.obj.axes): - if isinstance(ax, ABCMultiIndex) and not ( - is_integer(i) or com.is_null_slice(i) - ): - take_split_path = True - break + # we may have a nested tuples indexer here + if self._is_nested_tuple_indexer(tup): + return self._getitem_nested_tuple(tup) - if isinstance(indexer, tuple): - nindexer = [] - for i, idx in enumerate(indexer): - if isinstance(idx, dict): + # we maybe be using a tuple to represent multiple dimensions here + ax0 = self.obj._get_axis(0) + # ...but iloc should handle the tuple as simple integer-location + # instead of checking it as multiindex representation (GH 13797) + if isinstance(ax0, ABCMultiIndex) and self.name != "iloc": + result = self._handle_lowerdim_multi_index_axis0(tup) + if result is not None: + return result - # reindex the axis to the new value - # and set inplace - key, _ = convert_missing_indexer(idx) + if len(tup) > self.ndim: + raise IndexingError("Too many indexers. handle elsewhere") - # if this is the items axes, then take the main missing - # path first - # this correctly sets the dtype and avoids cache issues - # essentially this separates out the block that is needed - # to possibly be modified - if self.ndim > 1 and i == self.obj._info_axis_number: + for i, key in enumerate(tup): + if is_label_like(key) or isinstance(key, tuple): + section = self._getitem_axis(key, axis=i) - # add the new item, and set the value - # must have all defined axes if we have a scalar - # or a list-like on the non-info axes if we have a - # list-like - len_non_info_axes = ( - len(_ax) for _i, _ax in enumerate(self.obj.axes) if _i != i - ) - if any(not l for l in len_non_info_axes): - if not is_list_like_indexer(value): - raise ValueError( - "cannot set a frame with no " - "defined index and a scalar" - ) - self.obj[key] = value - return + # we have yielded a scalar ? + if not is_list_like_indexer(section): + return section - # add a new item with the dtype setup - self.obj[key] = _infer_fill_value(value) + elif section.ndim == self.ndim: + # we're in the middle of slicing through a MultiIndex + # revise the key wrt to `section` by inserting an _NS + new_key = tup[:i] + (_NS,) + tup[i + 1 :] - new_indexer = convert_from_missing_indexer_tuple( - indexer, self.obj.axes - ) - self._setitem_with_indexer(new_indexer, value) + else: + new_key = tup[:i] + tup[i + 1 :] - return + # unfortunately need an odious kludge here because of + # DataFrame transposing convention + if ( + isinstance(section, ABCDataFrame) + and i > 0 + and len(new_key) == 2 + ): + a, b = new_key + new_key = b, a - # reindex the axis - # make sure to clear the cache because we are - # just replacing the block manager here - # so the object is the same - index = self.obj._get_axis(i) - labels = index.insert(len(index), key) - self.obj._data = self.obj.reindex(labels, axis=i)._data - self.obj._maybe_update_cacher(clear=True) - self.obj._is_copy = None + if len(new_key) == 1: + new_key = new_key[0] - nindexer.append(labels.get_loc(key)) + # Slices should return views, but calling iloc/loc with a null + # slice returns a new object. + if com.is_null_slice(new_key): + return section + # This is an elided recursive call to iloc/loc/etc' + return getattr(section, self.name)[new_key] - else: - nindexer.append(idx) + raise IndexingError("not applicable") - indexer = tuple(nindexer) - else: + def _getitem_nested_tuple(self, tup: Tuple): + # we have a nested tuple so have at least 1 multi-index level + # we should be able to match up the dimensionality here - indexer, missing = convert_missing_indexer(indexer) + # we have too many indexers for our dim, but have at least 1 + # multi-index dimension, try to see if we have something like + # a tuple passed to a series with a multi-index + if len(tup) > self.ndim: + result = self._handle_lowerdim_multi_index_axis0(tup) + if result is not None: + return result - if missing: - self._setitem_with_indexer_missing(indexer, value) - return + # this is a series with a multi-index specified a tuple of + # selectors + axis = self.axis or 0 + return self._getitem_axis(tup, axis=axis) - # set - item_labels = self.obj._get_axis(info_axis) + # handle the multi-axis by taking sections and reducing + # this is iterative + obj = self.obj + axis = 0 + for i, key in enumerate(tup): - # align and set the values - if take_split_path: - # Above we only set take_split_path to True for 2D cases - assert self.ndim == 2 - assert info_axis == 1 + if com.is_null_slice(key): + axis += 1 + continue - if not isinstance(indexer, tuple): - indexer = _tuplify(self.ndim, indexer) + current_ndim = obj.ndim + obj = getattr(obj, self.name)._getitem_axis(key, axis=axis) + axis += 1 - if isinstance(value, ABCSeries): - value = self._align_series(indexer, value) + # if we have a scalar, we are done + if is_scalar(obj) or not hasattr(obj, "ndim"): + break - info_idx = indexer[info_axis] - if is_integer(info_idx): - info_idx = [info_idx] - labels = item_labels[info_idx] + # has the dim of the obj changed? + # GH 7199 + if obj.ndim < current_ndim: + axis -= 1 - # if we have a partial multiindex, then need to adjust the plane - # indexer here - if len(labels) == 1 and isinstance( - self.obj[labels[0]].axes[0], ABCMultiIndex - ): - item = labels[0] - obj = self.obj[item] - index = obj.index - idx = indexer[:info_axis][0] + return obj - plane_indexer = tuple([idx]) + indexer[info_axis + 1 :] - lplane_indexer = length_of_indexer(plane_indexer[0], index) + def _convert_to_indexer(self, key, axis: int, is_setter: bool = False): + raise AbstractMethodError(self) - # require that we are setting the right number of values that - # we are indexing - if ( - is_list_like_indexer(value) - and np.iterable(value) - and lplane_indexer != len(value) - ): + def __getitem__(self, key): + if type(key) is tuple: + key = tuple(com.apply_if_callable(x, self.obj) for x in key) + if self._is_scalar_access(key): + try: + return self.obj._get_value(*key, takeable=self._takeable) + except (KeyError, IndexError, AttributeError): + # AttributeError for IntervalTree get_value + pass + return self._getitem_tuple(key) + else: + # we by definition only have the 0th axis + axis = self.axis or 0 - if len(obj[idx]) != len(value): - raise ValueError( - "cannot set using a multi-index " - "selection indexer with a different " - "length than the value" - ) + maybe_callable = com.apply_if_callable(key, self.obj) + return self._getitem_axis(maybe_callable, axis=axis) - # make sure we have an ndarray - value = getattr(value, "values", value).ravel() + def _is_scalar_access(self, key: Tuple): + raise NotImplementedError() - # we can directly set the series here - obj._consolidate_inplace() - obj = obj.copy() - obj._data = obj._data.setitem(indexer=tuple([idx]), value=value) - self.obj[item] = obj - return + def _getitem_tuple(self, tup: Tuple): + raise AbstractMethodError(self) - # non-mi - else: - plane_indexer = indexer[:info_axis] + indexer[info_axis + 1 :] - plane_axis = self.obj.axes[:info_axis][0] - lplane_indexer = length_of_indexer(plane_indexer[0], plane_axis) + def _getitem_axis(self, key, axis: int): + raise NotImplementedError() - def setter(item, v): - s = self.obj[item] - pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer + def _has_valid_setitem_indexer(self, indexer) -> bool: + raise AbstractMethodError(self) - # perform the equivalent of a setitem on the info axis - # as we have a null slice or a slice with full bounds - # which means essentially reassign to the columns of a - # multi-dim object - # GH6149 (null slice), GH10408 (full bounds) - if isinstance(pi, tuple) and all( - com.is_null_slice(idx) or com.is_full_slice(idx, len(self.obj)) - for idx in pi - ): - s = v - else: - # set the item, possibly having a dtype change - s._consolidate_inplace() - s = s.copy() - s._data = s._data.setitem(indexer=pi, value=v) - s._maybe_update_cacher(clear=True) - - # reset the sliced object if unique - self.obj[item] = s - - # we need an iterable, with a ndim of at least 1 - # eg. don't pass through np.array(0) - if is_list_like_indexer(value) and getattr(value, "ndim", 1) > 0: - - # we have an equal len Frame - if isinstance(value, ABCDataFrame): - sub_indexer = list(indexer) - multiindex_indexer = isinstance(labels, ABCMultiIndex) - - for item in labels: - if item in value: - sub_indexer[info_axis] = item - v = self._align_series( - tuple(sub_indexer), value[item], multiindex_indexer - ) - else: - v = np.nan + def _getbool_axis(self, key, axis: int): + # caller is responsible for ensuring non-None axis + labels = self.obj._get_axis(axis) + key = check_bool_indexer(labels, key) + inds = key.nonzero()[0] + return self.obj._take_with_is_copy(inds, axis=axis) - setter(item, v) - # we have an equal len ndarray/convertible to our labels - # hasattr first, to avoid coercing to ndarray without reason. - # But we may be relying on the ndarray coercion to check ndim. - # Why not just convert to an ndarray earlier on if needed? - elif np.ndim(value) == 2: +@Appender(IndexingMixin.loc.__doc__) +class _LocIndexer(_LocationIndexer): + _takeable: bool = False + _valid_types = ( + "labels (MUST BE IN THE INDEX), slices of labels (BOTH " + "endpoints included! Can be slices of integers if the " + "index is integers), listlike of labels, boolean" + ) - # note that this coerces the dtype if we are mixed - # GH 7551 - value = np.array(value, dtype=object) - if len(labels) != value.shape[1]: - raise ValueError( - "Must have equal len keys and value " - "when setting with an ndarray" - ) + # ------------------------------------------------------------------- + # Key Checks - for i, item in enumerate(labels): + @Appender(_LocationIndexer._validate_key.__doc__) + def _validate_key(self, key, axis: int): - # setting with a list, recoerces - setter(item, value[:, i].tolist()) + # valid for a collection of labels (we check their presence later) + # slice of labels (where start-end in labels) + # slice of integers (only if in the labels) + # boolean - # we have an equal len list/ndarray - elif _can_do_equal_len( - labels, value, plane_indexer, lplane_indexer, self.obj - ): - setter(labels[0], value) + if isinstance(key, slice): + return - # per label values - else: + if com.is_bool_indexer(key): + return - if len(labels) != len(value): - raise ValueError( - "Must have equal len keys and value " - "when setting with an iterable" - ) + if not is_list_like_indexer(key): + labels = self.obj._get_axis(axis) + labels._convert_scalar_indexer(key, kind="loc") - for item, v in zip(labels, value): - setter(item, v) - else: + def _has_valid_setitem_indexer(self, indexer) -> bool: + return True - # scalar - for item in labels: - setter(item, value) + def _is_scalar_access(self, key: Tuple) -> bool: + """ + Returns + ------- + bool + """ + # this is a shortcut accessor to both .loc and .iloc + # that provide the equivalent access of .at and .iat + # a) avoid getting things via sections and (to minimize dtype changes) + # b) provide a performant path + if len(key) != self.ndim: + return False - else: - if isinstance(indexer, tuple): - indexer = maybe_convert_ix(*indexer) + for i, k in enumerate(key): + if not is_scalar(k): + return False - # if we are setting on the info axis ONLY - # set using those methods to avoid block-splitting - # logic here - if ( - len(indexer) > info_axis - and is_integer(indexer[info_axis]) - and all( - com.is_null_slice(idx) - for i, idx in enumerate(indexer) - if i != info_axis - ) - and item_labels.is_unique - ): - self.obj[item_labels[indexer[info_axis]]] = value - return + ax = self.obj.axes[i] + if isinstance(ax, ABCMultiIndex): + return False - if isinstance(value, (ABCSeries, dict)): - # TODO(EA): ExtensionBlock.setitem this causes issues with - # setting for extensionarrays that store dicts. Need to decide - # if it's worth supporting that. - value = self._align_series(indexer, Series(value)) + if isinstance(k, str) and ax._supports_partial_string_indexing: + # partial string indexing, df.loc['2000', 'A'] + # should not be considered scalar + return False - elif isinstance(value, ABCDataFrame): - value = self._align_frame(indexer, value) + if not ax.is_unique: + return False - # check for chained assignment - self.obj._check_is_chained_assignment_possible() + return True - # actually do the set - self.obj._consolidate_inplace() - self.obj._data = self.obj._data.setitem(indexer=indexer, value=value) - self.obj._maybe_update_cacher(clear=True) + # ------------------------------------------------------------------- + # MultiIndex Handling - def _setitem_with_indexer_missing(self, indexer, value): - """ - Insert new row(s) or column(s) into the Series or DataFrame. + def _multi_take_opportunity(self, tup: Tuple) -> bool: """ - from pandas import Series - - # reindex the axis to the new value - # and set inplace - if self.ndim == 1: - index = self.obj.index - new_index = index.insert(len(index), indexer) - - # we have a coerced indexer, e.g. a float - # that matches in an Int64Index, so - # we will not create a duplicate index, rather - # index to that element - # e.g. 0.0 -> 0 - # GH#12246 - if index.is_unique: - new_indexer = index.get_indexer([new_index[-1]]) - if (new_indexer != -1).any(): - return self._setitem_with_indexer(new_indexer, value) - - # this preserves dtype of the value - new_values = Series([value])._values - if len(self.obj._values): - # GH#22717 handle casting compatibility that np.concatenate - # does incorrectly - new_values = concat_compat([self.obj._values, new_values]) - self.obj._data = self.obj._constructor( - new_values, index=new_index, name=self.obj.name - )._data - self.obj._maybe_update_cacher(clear=True) - - elif self.ndim == 2: + Check whether there is the possibility to use ``_multi_take``. - if not len(self.obj.columns): - # no columns and scalar - raise ValueError("cannot set a frame with no defined columns") + Currently the limit is that all axes being indexed, must be indexed with + list-likes. - if isinstance(value, ABCSeries): - # append a Series - value = value.reindex(index=self.obj.columns, copy=True) - value.name = indexer + Parameters + ---------- + tup : tuple + Tuple of indexers, one per axis. - else: - # a list-list - if is_list_like_indexer(value): - # must have conforming columns - if len(value) != len(self.obj.columns): - raise ValueError("cannot set a row with mismatched columns") + Returns + ------- + bool + Whether the current indexing, + can be passed through `_multi_take`. + """ + if not all(is_list_like_indexer(x) for x in tup): + return False - value = Series(value, index=self.obj.columns, name=indexer) + # just too complicated + if any(com.is_bool_indexer(x) for x in tup): + return False - self.obj._data = self.obj.append(value)._data - self.obj._maybe_update_cacher(clear=True) + return True - def _align_series(self, indexer, ser: ABCSeries, multiindex_indexer: bool = False): + def _multi_take(self, tup: Tuple): """ + Create the indexers for the passed tuple of keys, and + executes the take operation. This allows the take operation to be + executed all at once, rather than once for each dimension. + Improving efficiency. + Parameters ---------- - indexer : tuple, slice, scalar - Indexer used to get the locations that will be set to `ser`. - ser : pd.Series - Values to assign to the locations specified by `indexer`. - multiindex_indexer : boolean, optional - Defaults to False. Should be set to True if `indexer` was from - a `pd.MultiIndex`, to avoid unnecessary broadcasting. + tup : tuple + Tuple of indexers, one per axis. Returns ------- - `np.array` of `ser` broadcast to the appropriate shape for assignment - to the locations selected by `indexer` + values: same type as the object being indexed """ - if isinstance(indexer, (slice, np.ndarray, list, Index)): - indexer = tuple([indexer]) + # GH 836 + d = { + axis: self._get_listlike_indexer(key, axis) + for (key, axis) in zip(tup, self.obj._AXIS_ORDERS) + } + return self.obj._reindex_with_indexers(d, copy=True, allow_dups=True) - if isinstance(indexer, tuple): - - # flatten np.ndarray indexers - def ravel(i): - return i.ravel() if isinstance(i, np.ndarray) else i - - indexer = tuple(map(ravel, indexer)) - - aligners = [not com.is_null_slice(idx) for idx in indexer] - sum_aligners = sum(aligners) - single_aligner = sum_aligners == 1 - is_frame = self.ndim == 2 - obj = self.obj + # ------------------------------------------------------------------- - # are we a single alignable value on a non-primary - # dim (e.g. panel: 1,2, or frame: 0) ? - # hence need to align to a single axis dimension - # rather that find all valid dims + def _get_partial_string_timestamp_match_key(self, key, labels): + """ + Translate any partial string timestamp matches in key, returning the + new key. - # frame - if is_frame: - single_aligner = single_aligner and aligners[0] + (GH 10331) + """ + if isinstance(labels, ABCMultiIndex): + if ( + isinstance(key, str) + and labels.levels[0]._supports_partial_string_indexing + ): + # Convert key '2016-01-01' to + # ('2016-01-01'[, slice(None, None, None)]+) + key = tuple([key] + [slice(None)] * (len(labels.levels) - 1)) - # we have a frame, with multiple indexers on both axes; and a - # series, so need to broadcast (see GH5206) - if sum_aligners == self.ndim and all(is_sequence(_) for _ in indexer): - ser = ser.reindex(obj.axes[0][indexer[0]], copy=True)._values + if isinstance(key, tuple): + # Convert (..., '2016-01-01', ...) in tuple to + # (..., slice('2016-01-01', '2016-01-01', None), ...) + new_key = [] + for i, component in enumerate(key): + if ( + isinstance(component, str) + and labels.levels[i]._supports_partial_string_indexing + ): + new_key.append(slice(component, component, None)) + else: + new_key.append(component) + key = tuple(new_key) - # single indexer - if len(indexer) > 1 and not multiindex_indexer: - len_indexer = len(indexer[1]) - ser = np.tile(ser, len_indexer).reshape(len_indexer, -1).T + return key - return ser + def _getitem_iterable(self, key, axis: int): + """ + Index current object with an an iterable collection of keys. - for i, idx in enumerate(indexer): - ax = obj.axes[i] + Parameters + ---------- + key : iterable + Targeted labels. + axis: int + Dimension on which the indexing is being made. - # multiple aligners (or null slices) - if is_sequence(idx) or isinstance(idx, slice): - if single_aligner and com.is_null_slice(idx): - continue - new_ix = ax[idx] - if not is_list_like_indexer(new_ix): - new_ix = Index([new_ix]) - else: - new_ix = Index(new_ix) - if ser.index.equals(new_ix) or not len(new_ix): - return ser._values.copy() + Raises + ------ + KeyError + If no key was found. Will change in the future to raise if not all + keys were found. - return ser.reindex(new_ix)._values + Returns + ------- + scalar, DataFrame, or Series: indexed value(s). + """ + # we assume that not com.is_bool_indexer(key), as that is + # handled before we get here. + self._validate_key(key, axis) - # 2 dims - elif single_aligner: + # A collection of keys + keyarr, indexer = self._get_listlike_indexer(key, axis, raise_missing=False) + return self.obj._reindex_with_indexers( + {axis: [keyarr, indexer]}, copy=True, allow_dups=True + ) - # reindex along index - ax = self.obj.axes[1] - if ser.index.equals(ax) or not len(ax): - return ser._values.copy() - return ser.reindex(ax)._values + def _getitem_tuple(self, tup: Tuple): + try: + return self._getitem_lowerdim(tup) + except IndexingError: + pass - elif is_scalar(indexer): - ax = self.obj._get_axis(1) + # no multi-index, so validate all of the indexers + self._has_valid_tuple(tup) - if ser.index.equals(ax): - return ser._values.copy() + # ugly hack for GH #836 + if self._multi_take_opportunity(tup): + return self._multi_take(tup) - return ser.reindex(ax)._values + # no shortcut needed + retval = self.obj + for i, key in enumerate(tup): + if com.is_null_slice(key): + continue - raise ValueError("Incompatible indexer with Series") + retval = getattr(retval, self.name)._getitem_axis(key, axis=i) - def _align_frame(self, indexer, df: ABCDataFrame): - is_frame = self.ndim == 2 + return retval - if isinstance(indexer, tuple): + def _getitem_axis(self, key, axis: int): + key = item_from_zerodim(key) + if is_iterator(key): + key = list(key) - idx, cols = None, None - sindexers = [] - for i, ix in enumerate(indexer): - ax = self.obj.axes[i] - if is_sequence(ix) or isinstance(ix, slice): - if isinstance(ix, np.ndarray): - ix = ix.ravel() - if idx is None: - idx = ax[ix] - elif cols is None: - cols = ax[ix] - else: - break - else: - sindexers.append(i) + labels = self.obj._get_axis(axis) + key = self._get_partial_string_timestamp_match_key(key, labels) - if idx is not None and cols is not None: + if isinstance(key, slice): + self._validate_key(key, axis) + return self._get_slice_axis(key, axis=axis) + elif com.is_bool_indexer(key): + return self._getbool_axis(key, axis=axis) + elif is_list_like_indexer(key): - if df.index.equals(idx) and df.columns.equals(cols): - val = df.copy()._values - else: - val = df.reindex(idx, columns=cols)._values - return val + # convert various list-like indexers + # to a list of keys + # we will use the *values* of the object + # and NOT the index if its a PandasObject + if isinstance(labels, ABCMultiIndex): - elif (isinstance(indexer, slice) or is_list_like_indexer(indexer)) and is_frame: - ax = self.obj.index[indexer] - if df.index.equals(ax): - val = df.copy()._values - else: + if isinstance(key, (ABCSeries, np.ndarray)) and key.ndim <= 1: + # Series, or 0,1 ndim ndarray + # GH 14730 + key = list(key) + elif isinstance(key, ABCDataFrame): + # GH 15438 + raise NotImplementedError( + "Indexing a MultiIndex with a " + "DataFrame key is not " + "implemented" + ) + elif hasattr(key, "ndim") and key.ndim > 1: + raise NotImplementedError( + "Indexing a MultiIndex with a " + "multidimensional key is not " + "implemented" + ) - # we have a multi-index and are trying to align - # with a particular, level GH3738 if ( - isinstance(ax, ABCMultiIndex) - and isinstance(df.index, ABCMultiIndex) - and ax.nlevels != df.index.nlevels + not isinstance(key, tuple) + and len(key) + and not isinstance(key[0], tuple) ): - raise TypeError( - "cannot align on a multi-index with out " - "specifying the join levels" - ) + key = tuple([key]) - val = df.reindex(index=ax)._values - return val + # an iterable multi-selection + if not (isinstance(key, tuple) and isinstance(labels, ABCMultiIndex)): - raise ValueError("Incompatible indexer with DataFrame") + if hasattr(key, "ndim") and key.ndim > 1: + raise ValueError("Cannot index with multidimensional key") - def _handle_lowerdim_multi_index_axis0(self, tup: Tuple): - # we have an axis0 multi-index, handle or raise - axis = self.axis or 0 - try: - # fast path for series or for tup devoid of slices - return self._get_label(tup, axis=axis) - except TypeError: - # slices are unhashable - pass - except KeyError as ek: - # raise KeyError if number of indexers match - # else IndexingError will be raised - if len(tup) <= self.obj.index.nlevels and len(tup) > self.ndim: - raise ek + return self._getitem_iterable(key, axis=axis) - return None + # nested tuple slicing + if is_nested_tuple(key, labels): + locs = labels.get_locs(key) + indexer = [slice(None)] * self.ndim + indexer[axis] = locs + return self.obj.iloc[tuple(indexer)] - def _getitem_lowerdim(self, tup: Tuple): + # fall thru to straight lookup + self._validate_key(key, axis) + return self._get_label(key, axis=axis) - # we can directly get the axis result since the axis is specified - if self.axis is not None: - axis = self.obj._get_axis_number(self.axis) - return self._getitem_axis(tup, axis=axis) + def _get_slice_axis(self, slice_obj: slice, axis: int): + """ + This is pretty simple as we just have to deal with labels. + """ + # caller is responsible for ensuring non-None axis + obj = self.obj + if not need_slice(slice_obj): + return obj.copy(deep=False) - # we may have a nested tuples indexer here - if self._is_nested_tuple_indexer(tup): - return self._getitem_nested_tuple(tup) + labels = obj._get_axis(axis) + indexer = labels.slice_indexer( + slice_obj.start, slice_obj.stop, slice_obj.step, kind="loc" + ) - # we maybe be using a tuple to represent multiple dimensions here - ax0 = self.obj._get_axis(0) - # ...but iloc should handle the tuple as simple integer-location - # instead of checking it as multiindex representation (GH 13797) - if isinstance(ax0, ABCMultiIndex) and self.name != "iloc": - result = self._handle_lowerdim_multi_index_axis0(tup) - if result is not None: - return result + if isinstance(indexer, slice): + return self.obj._slice(indexer, axis=axis, kind="iloc") + else: + # DatetimeIndex overrides Index.slice_indexer and may + # return a DatetimeIndex instead of a slice object. + return self.obj.take(indexer, axis=axis) - if len(tup) > self.ndim: - raise IndexingError("Too many indexers. handle elsewhere") + def _convert_to_indexer(self, key, axis: int, is_setter: bool = False): + """ + Convert indexing key into something we can use to do actual fancy + indexing on a ndarray. - for i, key in enumerate(tup): - if is_label_like(key) or isinstance(key, tuple): - section = self._getitem_axis(key, axis=i) + Examples + ix[:5] -> slice(0, 5) + ix[[1,2,3]] -> [1,2,3] + ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz) - # we have yielded a scalar ? - if not is_list_like_indexer(section): - return section + Going by Zen of Python? + 'In the face of ambiguity, refuse the temptation to guess.' + raise AmbiguousIndexError with integer labels? + - No, prefer label-based indexing + """ + labels = self.obj._get_axis(axis) - elif section.ndim == self.ndim: - # we're in the middle of slicing through a MultiIndex - # revise the key wrt to `section` by inserting an _NS - new_key = tup[:i] + (_NS,) + tup[i + 1 :] + if isinstance(key, slice): + return labels._convert_slice_indexer(key, kind="loc") - else: - new_key = tup[:i] + tup[i + 1 :] + if is_scalar(key): + # try to find out correct indexer, if not type correct raise + try: + key = labels._convert_scalar_indexer(key, kind="loc") + except TypeError: + # but we will allow setting + if not is_setter: + raise - # unfortunately need an odious kludge here because of - # DataFrame transposing convention - if ( - isinstance(section, ABCDataFrame) - and i > 0 - and len(new_key) == 2 - ): - a, b = new_key - new_key = b, a + # see if we are positional in nature + is_int_index = labels.is_integer() + is_int_positional = is_integer(key) and not is_int_index - if len(new_key) == 1: - new_key = new_key[0] + if is_scalar(key) or isinstance(labels, ABCMultiIndex): + # Otherwise get_loc will raise InvalidIndexError - # Slices should return views, but calling iloc/loc with a null - # slice returns a new object. - if com.is_null_slice(new_key): - return section - # This is an elided recursive call to iloc/loc/etc' - return getattr(section, self.name)[new_key] + # if we are a label return me + try: + return labels.get_loc(key) + except LookupError: + if isinstance(key, tuple) and isinstance(labels, ABCMultiIndex): + if len(key) == labels.nlevels: + return {"key": key} + raise + except TypeError: + pass + except ValueError: + if not is_int_positional: + raise - raise IndexingError("not applicable") + # a positional + if is_int_positional: - def _getitem_nested_tuple(self, tup: Tuple): - # we have a nested tuple so have at least 1 multi-index level - # we should be able to match up the dimensionality here + # if we are setting and its not a valid location + # its an insert which fails by definition - # we have too many indexers for our dim, but have at least 1 - # multi-index dimension, try to see if we have something like - # a tuple passed to a series with a multi-index - if len(tup) > self.ndim: - result = self._handle_lowerdim_multi_index_axis0(tup) - if result is not None: - return result + # always valid + return {"key": key} - # this is a series with a multi-index specified a tuple of - # selectors - axis = self.axis or 0 - return self._getitem_axis(tup, axis=axis) + if is_nested_tuple(key, labels): + return labels.get_locs(key) - # handle the multi-axis by taking sections and reducing - # this is iterative - obj = self.obj - axis = 0 - for i, key in enumerate(tup): + elif is_list_like_indexer(key): - if com.is_null_slice(key): - axis += 1 - continue + if com.is_bool_indexer(key): + key = check_bool_indexer(labels, key) + (inds,) = key.nonzero() + return inds + else: + # When setting, missing keys are not allowed, even with .loc: + return self._get_listlike_indexer(key, axis, raise_missing=True)[1] + else: + try: + return labels.get_loc(key) + except LookupError: + # allow a not found key only if we are a setter + if not is_list_like_indexer(key): + return {"key": key} + raise - current_ndim = obj.ndim - obj = getattr(obj, self.name)._getitem_axis(key, axis=axis) - axis += 1 + def _get_listlike_indexer(self, key, axis: int, raise_missing: bool = False): + """ + Transform a list-like of keys into a new index and an indexer. - # if we have a scalar, we are done - if is_scalar(obj) or not hasattr(obj, "ndim"): - break + Parameters + ---------- + key : list-like + Targeted labels. + axis: int + Dimension on which the indexing is being made. + raise_missing: bool, default False + Whether to raise a KeyError if some labels were not found. + Will be removed in the future, and then this method will always behave as + if ``raise_missing=True``. - # has the dim of the obj changed? - # GH 7199 - if obj.ndim < current_ndim: - axis -= 1 + Raises + ------ + KeyError + If at least one key was requested but none was found, and + raise_missing=True. - return obj + Returns + ------- + keyarr: Index + New index (coinciding with 'key' if the axis is unique). + values : array-like + Indexer for the return object, -1 denotes keys not found. + """ + ax = self.obj._get_axis(axis) - def _convert_to_indexer(self, key, axis: int, is_setter: bool = False): - raise AbstractMethodError(self) + # Have the index compute an indexer or return None + # if it cannot handle: + indexer, keyarr = ax._convert_listlike_indexer(key) + # We only act on all found values: + if indexer is not None and (indexer != -1).all(): + self._validate_read_indexer(key, indexer, axis, raise_missing=raise_missing) + return ax[indexer], indexer - def __getitem__(self, key): - if type(key) is tuple: - key = tuple(com.apply_if_callable(x, self.obj) for x in key) - if self._is_scalar_access(key): - try: - return self.obj._get_value(*key, takeable=self._takeable) - except (KeyError, IndexError, AttributeError): - # AttributeError for IntervalTree get_value - pass - return self._getitem_tuple(key) + if ax.is_unique and not getattr(ax, "is_overlapping", False): + indexer = ax.get_indexer_for(key) + keyarr = ax.reindex(keyarr)[0] else: - # we by definition only have the 0th axis - axis = self.axis or 0 - - maybe_callable = com.apply_if_callable(key, self.obj) - return self._getitem_axis(maybe_callable, axis=axis) + keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr) - def _is_scalar_access(self, key: Tuple): - raise NotImplementedError() + self._validate_read_indexer(keyarr, indexer, axis, raise_missing=raise_missing) + return keyarr, indexer - def _getitem_tuple(self, tup: Tuple): - raise AbstractMethodError(self) + def _validate_read_indexer( + self, key, indexer, axis: int, raise_missing: bool = False + ): + """ + Check that indexer can be used to return a result. - def _getitem_axis(self, key, axis: int): - raise NotImplementedError() + e.g. at least one element was found, + unless the list of keys was actually empty. - def _has_valid_setitem_indexer(self, indexer) -> bool: - raise AbstractMethodError(self) + Parameters + ---------- + key : list-like + Targeted labels (only used to show correct error message). + indexer: array-like of booleans + Indices corresponding to the key, + (with -1 indicating not found). + axis: int + Dimension on which the indexing is being made. + raise_missing: bool + Whether to raise a KeyError if some labels are not found. Will be + removed in the future, and then this method will always behave as + if raise_missing=True. - def _getbool_axis(self, key, axis: int): - # caller is responsible for ensuring non-None axis - labels = self.obj._get_axis(axis) - key = check_bool_indexer(labels, key) - inds = key.nonzero()[0] - return self.obj._take_with_is_copy(inds, axis=axis) + Raises + ------ + KeyError + If at least one key was requested but none was found, and + raise_missing=True. + """ + ax = self.obj._get_axis(axis) + if len(key) == 0: + return -@Appender(IndexingMixin.loc.__doc__) -class _LocIndexer(_LocationIndexer): - _takeable: bool = False - _valid_types = ( - "labels (MUST BE IN THE INDEX), slices of labels (BOTH " - "endpoints included! Can be slices of integers if the " - "index is integers), listlike of labels, boolean" - ) + # Count missing values: + missing = (indexer < 0).sum() - # ------------------------------------------------------------------- - # Key Checks + if missing: + if missing == len(indexer): + axis_name = self.obj._get_axis_name(axis) + raise KeyError(f"None of [{key}] are in the [{axis_name}]") - @Appender(_LocationIndexer._validate_key.__doc__) - def _validate_key(self, key, axis: int): + # We (temporarily) allow for some missing keys with .loc, except in + # some cases (e.g. setting) in which "raise_missing" will be False + if not (self.name == "loc" and not raise_missing): + not_found = list(set(key) - set(ax)) + raise KeyError(f"{not_found} not in index") - # valid for a collection of labels (we check their presence later) - # slice of labels (where start-end in labels) - # slice of integers (only if in the labels) - # boolean + # we skip the warning on Categorical/Interval + # as this check is actually done (check for + # non-missing values), but a bit later in the + # code, so we want to avoid warning & then + # just raising + if not (ax.is_categorical() or ax.is_interval()): + raise KeyError( + "Passing list-likes to .loc or [] with any missing labels " + "is no longer supported, see " + "https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike" # noqa:E501 + ) - if isinstance(key, slice): - return +@Appender(IndexingMixin.iloc.__doc__) +class _iLocIndexer(_LocationIndexer): + _valid_types = ( + "integer, integer slice (START point is INCLUDED, END " + "point is EXCLUDED), listlike of integers, boolean array" + ) + _takeable = True + + # ------------------------------------------------------------------- + # Key Checks + + def _validate_key(self, key, axis: int): if com.is_bool_indexer(key): + if hasattr(key, "index") and isinstance(key.index, Index): + if key.index.inferred_type == "integer": + raise NotImplementedError( + "iLocation based boolean " + "indexing on an integer type " + "is not available" + ) + raise ValueError( + "iLocation based boolean indexing cannot use " + "an indexable as a mask" + ) return - if not is_list_like_indexer(key): - labels = self.obj._get_axis(axis) - labels._convert_scalar_indexer(key, kind="loc") + if isinstance(key, slice): + return + elif is_integer(key): + self._validate_integer(key, axis) + elif isinstance(key, tuple): + # a tuple should already have been caught by this point + # so don't treat a tuple as a valid indexer + raise IndexingError("Too many indexers") + elif is_list_like_indexer(key): + arr = np.array(key) + len_axis = len(self.obj._get_axis(axis)) + + # check that the key has a numeric dtype + if not is_numeric_dtype(arr.dtype): + raise IndexError(f".iloc requires numeric indexers, got {arr}") + + # check that the key does not exceed the maximum size of the index + if len(arr) and (arr.max() >= len_axis or arr.min() < -len_axis): + raise IndexError("positional indexers are out-of-bounds") + else: + raise ValueError(f"Can only index by location with a [{self._valid_types}]") + + def _has_valid_setitem_indexer(self, indexer): + self._has_valid_positional_setitem_indexer(indexer) + + def _has_valid_positional_setitem_indexer(self, indexer) -> bool: + """ + Validate that a positional indexer cannot enlarge its target + will raise if needed, does not modify the indexer externally. + + Returns + ------- + bool + """ + if isinstance(indexer, dict): + raise IndexError(f"{self.name} cannot enlarge its target object") + else: + if not isinstance(indexer, tuple): + indexer = _tuplify(self.ndim, indexer) + for ax, i in zip(self.obj.axes, indexer): + if isinstance(i, slice): + # should check the stop slice? + pass + elif is_list_like_indexer(i): + # should check the elements? + pass + elif is_integer(i): + if i >= len(ax): + raise IndexError( + f"{self.name} cannot enlarge its target object" + ) + elif isinstance(i, dict): + raise IndexError(f"{self.name} cannot enlarge its target object") - def _has_valid_setitem_indexer(self, indexer) -> bool: return True def _is_scalar_access(self, key: Tuple) -> bool: @@ -1389,670 +1436,627 @@ def _is_scalar_access(self, key: Tuple) -> bool: return False for i, k in enumerate(key): - if not is_scalar(k): + if not is_integer(k): return False ax = self.obj.axes[i] - if isinstance(ax, ABCMultiIndex): - return False - - if isinstance(k, str) and ax._supports_partial_string_indexing: - # partial string indexing, df.loc['2000', 'A'] - # should not be considered scalar - return False - if not ax.is_unique: return False return True - # ------------------------------------------------------------------- - # MultiIndex Handling - - def _multi_take_opportunity(self, tup: Tuple) -> bool: + def _validate_integer(self, key: int, axis: int) -> None: """ - Check whether there is the possibility to use ``_multi_take``. - - Currently the limit is that all axes being indexed, must be indexed with - list-likes. + Check that 'key' is a valid position in the desired axis. Parameters ---------- - tup : tuple - Tuple of indexers, one per axis. + key : int + Requested position. + axis : int + Desired axis. - Returns - ------- - bool - Whether the current indexing, - can be passed through `_multi_take`. + Raises + ------ + IndexError + If 'key' is not a valid position in axis 'axis'. """ - if not all(is_list_like_indexer(x) for x in tup): - return False - - # just too complicated - if any(com.is_bool_indexer(x) for x in tup): - return False - - return True + len_axis = len(self.obj._get_axis(axis)) + if key >= len_axis or key < -len_axis: + raise IndexError("single positional indexer is out-of-bounds") - def _multi_take(self, tup: Tuple): - """ - Create the indexers for the passed tuple of keys, and - executes the take operation. This allows the take operation to be - executed all at once, rather than once for each dimension. - Improving efficiency. + # ------------------------------------------------------------------- - Parameters - ---------- - tup : tuple - Tuple of indexers, one per axis. + def _getitem_tuple(self, tup: Tuple): - Returns - ------- - values: same type as the object being indexed - """ - # GH 836 - d = { - axis: self._get_listlike_indexer(key, axis) - for (key, axis) in zip(tup, self.obj._AXIS_ORDERS) - } - return self.obj._reindex_with_indexers(d, copy=True, allow_dups=True) + self._has_valid_tuple(tup) + try: + return self._getitem_lowerdim(tup) + except IndexingError: + pass - # ------------------------------------------------------------------- + retval = self.obj + axis = 0 + for i, key in enumerate(tup): + if com.is_null_slice(key): + axis += 1 + continue - def _get_partial_string_timestamp_match_key(self, key, labels): - """ - Translate any partial string timestamp matches in key, returning the - new key. + retval = getattr(retval, self.name)._getitem_axis(key, axis=axis) - (GH 10331) - """ - if isinstance(labels, ABCMultiIndex): - if ( - isinstance(key, str) - and labels.levels[0]._supports_partial_string_indexing - ): - # Convert key '2016-01-01' to - # ('2016-01-01'[, slice(None, None, None)]+) - key = tuple([key] + [slice(None)] * (len(labels.levels) - 1)) + # if the dim was reduced, then pass a lower-dim the next time + if retval.ndim < self.ndim: + # TODO: this is never reached in tests; can we confirm that + # it is impossible? + axis -= 1 - if isinstance(key, tuple): - # Convert (..., '2016-01-01', ...) in tuple to - # (..., slice('2016-01-01', '2016-01-01', None), ...) - new_key = [] - for i, component in enumerate(key): - if ( - isinstance(component, str) - and labels.levels[i]._supports_partial_string_indexing - ): - new_key.append(slice(component, component, None)) - else: - new_key.append(component) - key = tuple(new_key) + # try to get for the next axis + axis += 1 - return key + return retval - def _getitem_iterable(self, key, axis: int): + def _get_list_axis(self, key, axis: int): """ - Index current object with an an iterable collection of keys. + Return Series values by list or array of integers. Parameters ---------- - key : iterable - Targeted labels. - axis: int - Dimension on which the indexing is being made. - - Raises - ------ - KeyError - If no key was found. Will change in the future to raise if not all - keys were found. + key : list-like positional indexer + axis : int Returns ------- - scalar, DataFrame, or Series: indexed value(s). + Series object + + Notes + ----- + `axis` can only be zero. """ - # we assume that not com.is_bool_indexer(key), as that is - # handled before we get here. - self._validate_key(key, axis) - - # A collection of keys - keyarr, indexer = self._get_listlike_indexer(key, axis, raise_missing=False) - return self.obj._reindex_with_indexers( - {axis: [keyarr, indexer]}, copy=True, allow_dups=True - ) - - def _getitem_tuple(self, tup: Tuple): - try: - return self._getitem_lowerdim(tup) - except IndexingError: - pass - - # no multi-index, so validate all of the indexers - self._has_valid_tuple(tup) - - # ugly hack for GH #836 - if self._multi_take_opportunity(tup): - return self._multi_take(tup) - - # no shortcut needed - retval = self.obj - for i, key in enumerate(tup): - if com.is_null_slice(key): - continue - - retval = getattr(retval, self.name)._getitem_axis(key, axis=i) - - return retval + try: + return self.obj._take_with_is_copy(key, axis=axis) + except IndexError: + # re-raise with different error message + raise IndexError("positional indexers are out-of-bounds") def _getitem_axis(self, key, axis: int): - key = item_from_zerodim(key) - if is_iterator(key): - key = list(key) - - labels = self.obj._get_axis(axis) - key = self._get_partial_string_timestamp_match_key(key, labels) - if isinstance(key, slice): - self._validate_key(key, axis) return self._get_slice_axis(key, axis=axis) - elif com.is_bool_indexer(key): - return self._getbool_axis(key, axis=axis) - elif is_list_like_indexer(key): - - # convert various list-like indexers - # to a list of keys - # we will use the *values* of the object - # and NOT the index if its a PandasObject - if isinstance(labels, ABCMultiIndex): - - if isinstance(key, (ABCSeries, np.ndarray)) and key.ndim <= 1: - # Series, or 0,1 ndim ndarray - # GH 14730 - key = list(key) - elif isinstance(key, ABCDataFrame): - # GH 15438 - raise NotImplementedError( - "Indexing a MultiIndex with a " - "DataFrame key is not " - "implemented" - ) - elif hasattr(key, "ndim") and key.ndim > 1: - raise NotImplementedError( - "Indexing a MultiIndex with a " - "multidimensional key is not " - "implemented" - ) - if ( - not isinstance(key, tuple) - and len(key) - and not isinstance(key[0], tuple) - ): - key = tuple([key]) + if isinstance(key, list): + key = np.asarray(key) - # an iterable multi-selection - if not (isinstance(key, tuple) and isinstance(labels, ABCMultiIndex)): + if com.is_bool_indexer(key): + self._validate_key(key, axis) + return self._getbool_axis(key, axis=axis) - if hasattr(key, "ndim") and key.ndim > 1: - raise ValueError("Cannot index with multidimensional key") + # a list of integers + elif is_list_like_indexer(key): + return self._get_list_axis(key, axis=axis) - return self._getitem_iterable(key, axis=axis) + # a single integer + else: + key = item_from_zerodim(key) + if not is_integer(key): + raise TypeError("Cannot index by location index with a non-integer key") - # nested tuple slicing - if is_nested_tuple(key, labels): - locs = labels.get_locs(key) - indexer = [slice(None)] * self.ndim - indexer[axis] = locs - return self.obj.iloc[tuple(indexer)] + # validate the location + self._validate_integer(key, axis) - # fall thru to straight lookup - self._validate_key(key, axis) - return self._get_label(key, axis=axis) + return self.obj._ixs(key, axis=axis) def _get_slice_axis(self, slice_obj: slice, axis: int): - """ - This is pretty simple as we just have to deal with labels. - """ # caller is responsible for ensuring non-None axis obj = self.obj + if not need_slice(slice_obj): return obj.copy(deep=False) labels = obj._get_axis(axis) - indexer = labels.slice_indexer( - slice_obj.start, slice_obj.stop, slice_obj.step, kind="loc" - ) - - if isinstance(indexer, slice): - return self.obj._slice(indexer, axis=axis, kind="iloc") - else: - # DatetimeIndex overrides Index.slice_indexer and may - # return a DatetimeIndex instead of a slice object. - return self.obj.take(indexer, axis=axis) + labels._validate_positional_slice(slice_obj) + return self.obj._slice(slice_obj, axis=axis, kind="iloc") def _convert_to_indexer(self, key, axis: int, is_setter: bool = False): """ - Convert indexing key into something we can use to do actual fancy - indexing on a ndarray. - - Examples - ix[:5] -> slice(0, 5) - ix[[1,2,3]] -> [1,2,3] - ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz) - - Going by Zen of Python? - 'In the face of ambiguity, refuse the temptation to guess.' - raise AmbiguousIndexError with integer labels? - - No, prefer label-based indexing + Much simpler as we only have to deal with our valid types. """ labels = self.obj._get_axis(axis) + # make need to convert a float key if isinstance(key, slice): - return labels._convert_slice_indexer(key, kind="loc") - - if is_scalar(key): - # try to find out correct indexer, if not type correct raise - try: - key = labels._convert_scalar_indexer(key, kind="loc") - except TypeError: - # but we will allow setting - if not is_setter: - raise - - # see if we are positional in nature - is_int_index = labels.is_integer() - is_int_positional = is_integer(key) and not is_int_index + labels._validate_positional_slice(key) + return key - if is_scalar(key) or isinstance(labels, ABCMultiIndex): - # Otherwise get_loc will raise InvalidIndexError + elif is_float(key): + labels._validate_indexer("positional", key, "iloc") + return key - # if we are a label return me - try: - return labels.get_loc(key) - except LookupError: - if isinstance(key, tuple) and isinstance(labels, ABCMultiIndex): - if len(key) == labels.nlevels: - return {"key": key} - raise - except TypeError: - pass - except ValueError: - if not is_int_positional: - raise + self._validate_key(key, axis) + return key - # a positional - if is_int_positional: + # ------------------------------------------------------------------- - # if we are setting and its not a valid location - # its an insert which fails by definition + def _setitem_with_indexer(self, indexer, value): - # always valid - return {"key": key} + # also has the side effect of consolidating in-place + from pandas import Series - if is_nested_tuple(key, labels): - return labels.get_locs(key) + info_axis = self.obj._info_axis_number - elif is_list_like_indexer(key): + # maybe partial set + take_split_path = self.obj._is_mixed_type - if com.is_bool_indexer(key): - key = check_bool_indexer(labels, key) - (inds,) = key.nonzero() - return inds - else: - # When setting, missing keys are not allowed, even with .loc: - return self._get_listlike_indexer(key, axis, raise_missing=True)[1] - else: - try: - return labels.get_loc(key) - except LookupError: - # allow a not found key only if we are a setter - if not is_list_like_indexer(key): - return {"key": key} - raise + # if there is only one block/type, still have to take split path + # unless the block is one-dimensional or it can hold the value + if not take_split_path and self.obj._data.blocks: + (blk,) = self.obj._data.blocks + if 1 < blk.ndim: # in case of dict, keys are indices + val = list(value.values()) if isinstance(value, dict) else value + take_split_path = not blk._can_hold_element(val) - def _get_listlike_indexer(self, key, axis: int, raise_missing: bool = False): - """ - Transform a list-like of keys into a new index and an indexer. + # if we have any multi-indexes that have non-trivial slices + # (not null slices) then we must take the split path, xref + # GH 10360, GH 27841 + if isinstance(indexer, tuple) and len(indexer) == len(self.obj.axes): + for i, ax in zip(indexer, self.obj.axes): + if isinstance(ax, ABCMultiIndex) and not ( + is_integer(i) or com.is_null_slice(i) + ): + take_split_path = True + break - Parameters - ---------- - key : list-like - Targeted labels. - axis: int - Dimension on which the indexing is being made. - raise_missing: bool, default False - Whether to raise a KeyError if some labels were not found. - Will be removed in the future, and then this method will always behave as - if ``raise_missing=True``. + if isinstance(indexer, tuple): + nindexer = [] + for i, idx in enumerate(indexer): + if isinstance(idx, dict): - Raises - ------ - KeyError - If at least one key was requested but none was found, and - raise_missing=True. + # reindex the axis to the new value + # and set inplace + key, _ = convert_missing_indexer(idx) - Returns - ------- - keyarr: Index - New index (coinciding with 'key' if the axis is unique). - values : array-like - Indexer for the return object, -1 denotes keys not found. - """ - ax = self.obj._get_axis(axis) + # if this is the items axes, then take the main missing + # path first + # this correctly sets the dtype and avoids cache issues + # essentially this separates out the block that is needed + # to possibly be modified + if self.ndim > 1 and i == self.obj._info_axis_number: - # Have the index compute an indexer or return None - # if it cannot handle: - indexer, keyarr = ax._convert_listlike_indexer(key) - # We only act on all found values: - if indexer is not None and (indexer != -1).all(): - self._validate_read_indexer(key, indexer, axis, raise_missing=raise_missing) - return ax[indexer], indexer + # add the new item, and set the value + # must have all defined axes if we have a scalar + # or a list-like on the non-info axes if we have a + # list-like + len_non_info_axes = ( + len(_ax) for _i, _ax in enumerate(self.obj.axes) if _i != i + ) + if any(not l for l in len_non_info_axes): + if not is_list_like_indexer(value): + raise ValueError( + "cannot set a frame with no " + "defined index and a scalar" + ) + self.obj[key] = value + return - if ax.is_unique and not getattr(ax, "is_overlapping", False): - indexer = ax.get_indexer_for(key) - keyarr = ax.reindex(keyarr)[0] + # add a new item with the dtype setup + self.obj[key] = _infer_fill_value(value) + + new_indexer = convert_from_missing_indexer_tuple( + indexer, self.obj.axes + ) + self._setitem_with_indexer(new_indexer, value) + + return + + # reindex the axis + # make sure to clear the cache because we are + # just replacing the block manager here + # so the object is the same + index = self.obj._get_axis(i) + labels = index.insert(len(index), key) + self.obj._data = self.obj.reindex(labels, axis=i)._data + self.obj._maybe_update_cacher(clear=True) + self.obj._is_copy = None + + nindexer.append(labels.get_loc(key)) + + else: + nindexer.append(idx) + + indexer = tuple(nindexer) else: - keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr) - self._validate_read_indexer(keyarr, indexer, axis, raise_missing=raise_missing) - return keyarr, indexer + indexer, missing = convert_missing_indexer(indexer) - def _validate_read_indexer( - self, key, indexer, axis: int, raise_missing: bool = False - ): - """ - Check that indexer can be used to return a result. + if missing: + self._setitem_with_indexer_missing(indexer, value) + return - e.g. at least one element was found, - unless the list of keys was actually empty. + # set + item_labels = self.obj._get_axis(info_axis) - Parameters - ---------- - key : list-like - Targeted labels (only used to show correct error message). - indexer: array-like of booleans - Indices corresponding to the key, - (with -1 indicating not found). - axis: int - Dimension on which the indexing is being made. - raise_missing: bool - Whether to raise a KeyError if some labels are not found. Will be - removed in the future, and then this method will always behave as - if raise_missing=True. + # align and set the values + if take_split_path: + # Above we only set take_split_path to True for 2D cases + assert self.ndim == 2 + assert info_axis == 1 - Raises - ------ - KeyError - If at least one key was requested but none was found, and - raise_missing=True. - """ - ax = self.obj._get_axis(axis) + if not isinstance(indexer, tuple): + indexer = _tuplify(self.ndim, indexer) - if len(key) == 0: - return + if isinstance(value, ABCSeries): + value = self._align_series(indexer, value) - # Count missing values: - missing = (indexer < 0).sum() + info_idx = indexer[info_axis] + if is_integer(info_idx): + info_idx = [info_idx] + labels = item_labels[info_idx] - if missing: - if missing == len(indexer): - axis_name = self.obj._get_axis_name(axis) - raise KeyError(f"None of [{key}] are in the [{axis_name}]") + # if we have a partial multiindex, then need to adjust the plane + # indexer here + if len(labels) == 1 and isinstance( + self.obj[labels[0]].axes[0], ABCMultiIndex + ): + item = labels[0] + obj = self.obj[item] + index = obj.index + idx = indexer[:info_axis][0] - # We (temporarily) allow for some missing keys with .loc, except in - # some cases (e.g. setting) in which "raise_missing" will be False - if not (self.name == "loc" and not raise_missing): - not_found = list(set(key) - set(ax)) - raise KeyError(f"{not_found} not in index") + plane_indexer = tuple([idx]) + indexer[info_axis + 1 :] + lplane_indexer = length_of_indexer(plane_indexer[0], index) - # we skip the warning on Categorical/Interval - # as this check is actually done (check for - # non-missing values), but a bit later in the - # code, so we want to avoid warning & then - # just raising - if not (ax.is_categorical() or ax.is_interval()): - raise KeyError( - "Passing list-likes to .loc or [] with any missing labels " - "is no longer supported, see " - "https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike" # noqa:E501 - ) + # require that we are setting the right number of values that + # we are indexing + if ( + is_list_like_indexer(value) + and np.iterable(value) + and lplane_indexer != len(value) + ): + if len(obj[idx]) != len(value): + raise ValueError( + "cannot set using a multi-index " + "selection indexer with a different " + "length than the value" + ) -@Appender(IndexingMixin.iloc.__doc__) -class _iLocIndexer(_LocationIndexer): - _valid_types = ( - "integer, integer slice (START point is INCLUDED, END " - "point is EXCLUDED), listlike of integers, boolean array" - ) - _takeable = True + # make sure we have an ndarray + value = getattr(value, "values", value).ravel() - # ------------------------------------------------------------------- - # Key Checks + # we can directly set the series here + obj._consolidate_inplace() + obj = obj.copy() + obj._data = obj._data.setitem(indexer=tuple([idx]), value=value) + self.obj[item] = obj + return - def _validate_key(self, key, axis: int): - if com.is_bool_indexer(key): - if hasattr(key, "index") and isinstance(key.index, Index): - if key.index.inferred_type == "integer": - raise NotImplementedError( - "iLocation based boolean " - "indexing on an integer type " - "is not available" - ) - raise ValueError( - "iLocation based boolean indexing cannot use " - "an indexable as a mask" - ) - return + # non-mi + else: + plane_indexer = indexer[:info_axis] + indexer[info_axis + 1 :] + plane_axis = self.obj.axes[:info_axis][0] + lplane_indexer = length_of_indexer(plane_indexer[0], plane_axis) - if isinstance(key, slice): - return - elif is_integer(key): - self._validate_integer(key, axis) - elif isinstance(key, tuple): - # a tuple should already have been caught by this point - # so don't treat a tuple as a valid indexer - raise IndexingError("Too many indexers") - elif is_list_like_indexer(key): - arr = np.array(key) - len_axis = len(self.obj._get_axis(axis)) + def setter(item, v): + s = self.obj[item] + pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer - # check that the key has a numeric dtype - if not is_numeric_dtype(arr.dtype): - raise IndexError(f".iloc requires numeric indexers, got {arr}") + # perform the equivalent of a setitem on the info axis + # as we have a null slice or a slice with full bounds + # which means essentially reassign to the columns of a + # multi-dim object + # GH6149 (null slice), GH10408 (full bounds) + if isinstance(pi, tuple) and all( + com.is_null_slice(idx) or com.is_full_slice(idx, len(self.obj)) + for idx in pi + ): + s = v + else: + # set the item, possibly having a dtype change + s._consolidate_inplace() + s = s.copy() + s._data = s._data.setitem(indexer=pi, value=v) + s._maybe_update_cacher(clear=True) + + # reset the sliced object if unique + self.obj[item] = s + + # we need an iterable, with a ndim of at least 1 + # eg. don't pass through np.array(0) + if is_list_like_indexer(value) and getattr(value, "ndim", 1) > 0: + + # we have an equal len Frame + if isinstance(value, ABCDataFrame): + sub_indexer = list(indexer) + multiindex_indexer = isinstance(labels, ABCMultiIndex) + + for item in labels: + if item in value: + sub_indexer[info_axis] = item + v = self._align_series( + tuple(sub_indexer), value[item], multiindex_indexer + ) + else: + v = np.nan + + setter(item, v) + + # we have an equal len ndarray/convertible to our labels + # hasattr first, to avoid coercing to ndarray without reason. + # But we may be relying on the ndarray coercion to check ndim. + # Why not just convert to an ndarray earlier on if needed? + elif np.ndim(value) == 2: + + # note that this coerces the dtype if we are mixed + # GH 7551 + value = np.array(value, dtype=object) + if len(labels) != value.shape[1]: + raise ValueError( + "Must have equal len keys and value " + "when setting with an ndarray" + ) + + for i, item in enumerate(labels): + + # setting with a list, recoerces + setter(item, value[:, i].tolist()) + + # we have an equal len list/ndarray + elif _can_do_equal_len( + labels, value, plane_indexer, lplane_indexer, self.obj + ): + setter(labels[0], value) + + # per label values + else: + + if len(labels) != len(value): + raise ValueError( + "Must have equal len keys and value " + "when setting with an iterable" + ) + + for item, v in zip(labels, value): + setter(item, v) + else: + + # scalar + for item in labels: + setter(item, value) - # check that the key does not exceed the maximum size of the index - if len(arr) and (arr.max() >= len_axis or arr.min() < -len_axis): - raise IndexError("positional indexers are out-of-bounds") else: - raise ValueError(f"Can only index by location with a [{self._valid_types}]") + if isinstance(indexer, tuple): + indexer = maybe_convert_ix(*indexer) - def _has_valid_setitem_indexer(self, indexer): - self._has_valid_positional_setitem_indexer(indexer) + # if we are setting on the info axis ONLY + # set using those methods to avoid block-splitting + # logic here + if ( + len(indexer) > info_axis + and is_integer(indexer[info_axis]) + and all( + com.is_null_slice(idx) + for i, idx in enumerate(indexer) + if i != info_axis + ) + and item_labels.is_unique + ): + self.obj[item_labels[indexer[info_axis]]] = value + return - def _has_valid_positional_setitem_indexer(self, indexer) -> bool: - """ - Validate that a positional indexer cannot enlarge its target - will raise if needed, does not modify the indexer externally. + if isinstance(value, (ABCSeries, dict)): + # TODO(EA): ExtensionBlock.setitem this causes issues with + # setting for extensionarrays that store dicts. Need to decide + # if it's worth supporting that. + value = self._align_series(indexer, Series(value)) - Returns - ------- - bool + elif isinstance(value, ABCDataFrame): + value = self._align_frame(indexer, value) + + # check for chained assignment + self.obj._check_is_chained_assignment_possible() + + # actually do the set + self.obj._consolidate_inplace() + self.obj._data = self.obj._data.setitem(indexer=indexer, value=value) + self.obj._maybe_update_cacher(clear=True) + + def _setitem_with_indexer_missing(self, indexer, value): """ - if isinstance(indexer, dict): - raise IndexError(f"{self.name} cannot enlarge its target object") - else: - if not isinstance(indexer, tuple): - indexer = _tuplify(self.ndim, indexer) - for ax, i in zip(self.obj.axes, indexer): - if isinstance(i, slice): - # should check the stop slice? - pass - elif is_list_like_indexer(i): - # should check the elements? - pass - elif is_integer(i): - if i >= len(ax): - raise IndexError( - f"{self.name} cannot enlarge its target object" - ) - elif isinstance(i, dict): - raise IndexError(f"{self.name} cannot enlarge its target object") + Insert new row(s) or column(s) into the Series or DataFrame. + """ + from pandas import Series + + # reindex the axis to the new value + # and set inplace + if self.ndim == 1: + index = self.obj.index + new_index = index.insert(len(index), indexer) + + # we have a coerced indexer, e.g. a float + # that matches in an Int64Index, so + # we will not create a duplicate index, rather + # index to that element + # e.g. 0.0 -> 0 + # GH#12246 + if index.is_unique: + new_indexer = index.get_indexer([new_index[-1]]) + if (new_indexer != -1).any(): + return self._setitem_with_indexer(new_indexer, value) - return True + # this preserves dtype of the value + new_values = Series([value])._values + if len(self.obj._values): + # GH#22717 handle casting compatibility that np.concatenate + # does incorrectly + new_values = concat_compat([self.obj._values, new_values]) + self.obj._data = self.obj._constructor( + new_values, index=new_index, name=self.obj.name + )._data + self.obj._maybe_update_cacher(clear=True) - def _is_scalar_access(self, key: Tuple) -> bool: - """ - Returns - ------- - bool - """ - # this is a shortcut accessor to both .loc and .iloc - # that provide the equivalent access of .at and .iat - # a) avoid getting things via sections and (to minimize dtype changes) - # b) provide a performant path - if len(key) != self.ndim: - return False + elif self.ndim == 2: - for i, k in enumerate(key): - if not is_integer(k): - return False + if not len(self.obj.columns): + # no columns and scalar + raise ValueError("cannot set a frame with no defined columns") - ax = self.obj.axes[i] - if not ax.is_unique: - return False + if isinstance(value, ABCSeries): + # append a Series + value = value.reindex(index=self.obj.columns, copy=True) + value.name = indexer - return True + else: + # a list-list + if is_list_like_indexer(value): + # must have conforming columns + if len(value) != len(self.obj.columns): + raise ValueError("cannot set a row with mismatched columns") - def _validate_integer(self, key: int, axis: int) -> None: - """ - Check that 'key' is a valid position in the desired axis. + value = Series(value, index=self.obj.columns, name=indexer) + self.obj._data = self.obj.append(value)._data + self.obj._maybe_update_cacher(clear=True) + + def _align_series(self, indexer, ser: ABCSeries, multiindex_indexer: bool = False): + """ Parameters ---------- - key : int - Requested position. - axis : int - Desired axis. + indexer : tuple, slice, scalar + Indexer used to get the locations that will be set to `ser`. + ser : pd.Series + Values to assign to the locations specified by `indexer`. + multiindex_indexer : boolean, optional + Defaults to False. Should be set to True if `indexer` was from + a `pd.MultiIndex`, to avoid unnecessary broadcasting. - Raises - ------ - IndexError - If 'key' is not a valid position in axis 'axis'. + Returns + ------- + `np.array` of `ser` broadcast to the appropriate shape for assignment + to the locations selected by `indexer` """ - len_axis = len(self.obj._get_axis(axis)) - if key >= len_axis or key < -len_axis: - raise IndexError("single positional indexer is out-of-bounds") + if isinstance(indexer, (slice, np.ndarray, list, Index)): + indexer = tuple([indexer]) - # ------------------------------------------------------------------- + if isinstance(indexer, tuple): - def _getitem_tuple(self, tup: Tuple): + # flatten np.ndarray indexers + def ravel(i): + return i.ravel() if isinstance(i, np.ndarray) else i - self._has_valid_tuple(tup) - try: - return self._getitem_lowerdim(tup) - except IndexingError: - pass + indexer = tuple(map(ravel, indexer)) - retval = self.obj - axis = 0 - for i, key in enumerate(tup): - if com.is_null_slice(key): - axis += 1 - continue + aligners = [not com.is_null_slice(idx) for idx in indexer] + sum_aligners = sum(aligners) + single_aligner = sum_aligners == 1 + is_frame = self.ndim == 2 + obj = self.obj - retval = getattr(retval, self.name)._getitem_axis(key, axis=axis) + # are we a single alignable value on a non-primary + # dim (e.g. panel: 1,2, or frame: 0) ? + # hence need to align to a single axis dimension + # rather that find all valid dims - # if the dim was reduced, then pass a lower-dim the next time - if retval.ndim < self.ndim: - # TODO: this is never reached in tests; can we confirm that - # it is impossible? - axis -= 1 + # frame + if is_frame: + single_aligner = single_aligner and aligners[0] - # try to get for the next axis - axis += 1 + # we have a frame, with multiple indexers on both axes; and a + # series, so need to broadcast (see GH5206) + if sum_aligners == self.ndim and all(is_sequence(_) for _ in indexer): + ser = ser.reindex(obj.axes[0][indexer[0]], copy=True)._values - return retval + # single indexer + if len(indexer) > 1 and not multiindex_indexer: + len_indexer = len(indexer[1]) + ser = np.tile(ser, len_indexer).reshape(len_indexer, -1).T - def _get_list_axis(self, key, axis: int): - """ - Return Series values by list or array of integers. + return ser - Parameters - ---------- - key : list-like positional indexer - axis : int + for i, idx in enumerate(indexer): + ax = obj.axes[i] - Returns - ------- - Series object + # multiple aligners (or null slices) + if is_sequence(idx) or isinstance(idx, slice): + if single_aligner and com.is_null_slice(idx): + continue + new_ix = ax[idx] + if not is_list_like_indexer(new_ix): + new_ix = Index([new_ix]) + else: + new_ix = Index(new_ix) + if ser.index.equals(new_ix) or not len(new_ix): + return ser._values.copy() - Notes - ----- - `axis` can only be zero. - """ - try: - return self.obj._take_with_is_copy(key, axis=axis) - except IndexError: - # re-raise with different error message - raise IndexError("positional indexers are out-of-bounds") + return ser.reindex(new_ix)._values - def _getitem_axis(self, key, axis: int): - if isinstance(key, slice): - return self._get_slice_axis(key, axis=axis) + # 2 dims + elif single_aligner: - if isinstance(key, list): - key = np.asarray(key) + # reindex along index + ax = self.obj.axes[1] + if ser.index.equals(ax) or not len(ax): + return ser._values.copy() + return ser.reindex(ax)._values - if com.is_bool_indexer(key): - self._validate_key(key, axis) - return self._getbool_axis(key, axis=axis) + elif is_scalar(indexer): + ax = self.obj._get_axis(1) - # a list of integers - elif is_list_like_indexer(key): - return self._get_list_axis(key, axis=axis) + if ser.index.equals(ax): + return ser._values.copy() - # a single integer - else: - key = item_from_zerodim(key) - if not is_integer(key): - raise TypeError("Cannot index by location index with a non-integer key") + return ser.reindex(ax)._values - # validate the location - self._validate_integer(key, axis) + raise ValueError("Incompatible indexer with Series") - return self.obj._ixs(key, axis=axis) + def _align_frame(self, indexer, df: ABCDataFrame): + is_frame = self.ndim == 2 - def _get_slice_axis(self, slice_obj: slice, axis: int): - # caller is responsible for ensuring non-None axis - obj = self.obj + if isinstance(indexer, tuple): - if not need_slice(slice_obj): - return obj.copy(deep=False) + idx, cols = None, None + sindexers = [] + for i, ix in enumerate(indexer): + ax = self.obj.axes[i] + if is_sequence(ix) or isinstance(ix, slice): + if isinstance(ix, np.ndarray): + ix = ix.ravel() + if idx is None: + idx = ax[ix] + elif cols is None: + cols = ax[ix] + else: + break + else: + sindexers.append(i) - labels = obj._get_axis(axis) - labels._validate_positional_slice(slice_obj) - return self.obj._slice(slice_obj, axis=axis, kind="iloc") + if idx is not None and cols is not None: - def _convert_to_indexer(self, key, axis: int, is_setter: bool = False): - """ - Much simpler as we only have to deal with our valid types. - """ - labels = self.obj._get_axis(axis) + if df.index.equals(idx) and df.columns.equals(cols): + val = df.copy()._values + else: + val = df.reindex(idx, columns=cols)._values + return val - # make need to convert a float key - if isinstance(key, slice): - labels._validate_positional_slice(key) - return key + elif (isinstance(indexer, slice) or is_list_like_indexer(indexer)) and is_frame: + ax = self.obj.index[indexer] + if df.index.equals(ax): + val = df.copy()._values + else: - elif is_float(key): - labels._validate_indexer("positional", key, "iloc") - return key + # we have a multi-index and are trying to align + # with a particular, level GH3738 + if ( + isinstance(ax, ABCMultiIndex) + and isinstance(df.index, ABCMultiIndex) + and ax.nlevels != df.index.nlevels + ): + raise TypeError( + "cannot align on a multi-index with out " + "specifying the join levels" + ) - self._validate_key(key, axis) - return key + val = df.reindex(index=ax)._values + return val + + raise ValueError("Incompatible indexer with DataFrame") class _ScalarAccessIndexer(_NDFrameIndexerBase):