From 5506cc6b78cff164fccc7b19ec23ae205e28588f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 11 Jul 2019 09:31:53 -0700 Subject: [PATCH 01/12] mark indexing methods --- pandas/core/frame.py | 15 +++++++++++++++ pandas/core/generic.py | 15 ++++++++++++--- pandas/core/indexing.py | 6 ------ pandas/core/series.py | 17 +++++++++++++++++ 4 files changed, 44 insertions(+), 9 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 55a9eb6a0810a..6035f5c4461aa 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2751,6 +2751,7 @@ def transpose(self, *args, **kwargs): # ---------------------------------------------------------------------- # Picklability + # TODO: can we get rid of these? # legacy pickle formats def _unpickle_frame_compat(self, state): # pragma: no cover if len(state) == 2: # pragma: no cover @@ -2783,6 +2784,7 @@ def _unpickle_matrix_compat(self, state): # pragma: no cover # ---------------------------------------------------------------------- # Getting and setting elements + # indexing method def get_value(self, index, col, takeable=False): """ Quickly retrieve single value at passed column and index. @@ -2810,6 +2812,7 @@ def get_value(self, index, col, takeable=False): ) return self._get_value(index, col, takeable=takeable) + # indexing method def _get_value(self, index, col, takeable=False): if takeable: @@ -2837,6 +2840,7 @@ def _get_value(self, index, col, takeable=False): _get_value.__doc__ = get_value.__doc__ + # indexing method def set_value(self, index, col, value, takeable=False): """ Put single value at passed column and index. @@ -2866,6 +2870,7 @@ def set_value(self, index, col, value, takeable=False): ) return self._set_value(index, col, value, takeable=takeable) + # indexing method def _set_value(self, index, col, value, takeable=False): try: if takeable is True: @@ -2889,6 +2894,7 @@ def _set_value(self, index, col, value, takeable=False): _set_value.__doc__ = set_value.__doc__ + # indexing method def _ixs(self, i, axis=0): """ Parameters @@ -2956,6 +2962,7 @@ def _ixs(self, i, axis=0): return result + # indexing method def __getitem__(self, key): key = lib.item_from_zerodim(key) key = com.apply_if_callable(key, self) @@ -3011,6 +3018,7 @@ def __getitem__(self, key): return data + # indexing method def _getitem_bool_array(self, key): # also raises Exception if object array with NA values # warning here just in case -- previously __setitem__ was @@ -3034,6 +3042,7 @@ def _getitem_bool_array(self, key): indexer = key.nonzero()[0] return self._take(indexer, axis=0) + # indexing method def _getitem_multilevel(self, key): loc = self.columns.get_loc(key) if isinstance(loc, (slice, Series, np.ndarray, Index)): @@ -3071,6 +3080,7 @@ def _getitem_multilevel(self, key): else: return self._get_item_cache(key) + # indexing method def _getitem_frame(self, key): if key.values.size and not is_bool_dtype(key.values): raise ValueError("Must pass DataFrame with boolean values only") @@ -3465,6 +3475,7 @@ def _box_col_values(self, values, items): klass = self._constructor_sliced return klass(values, index=self.index, name=items, fastpath=True) + # indexing method def __setitem__(self, key, value): key = com.apply_if_callable(key, self) @@ -3481,10 +3492,12 @@ def __setitem__(self, key, value): # set column self._set_item(key, value) + # indexing method def _setitem_slice(self, key, value): self._check_setitem_copy() self.loc._setitem_with_indexer(key, value) + # indexing method def _setitem_array(self, key, value): # also raises Exception if object array with NA values if com.is_bool_indexer(key): @@ -3507,6 +3520,7 @@ def _setitem_array(self, key, value): self._check_setitem_copy() self.loc._setitem_with_indexer((slice(None), indexer), value) + # indexing method def _setitem_frame(self, key, value): # support boolean setting with DataFrame input, e.g. # df[df > df2] = 0 @@ -3544,6 +3558,7 @@ def _ensure_valid_index(self, value): value.index.copy(), axis=1, fill_value=np.nan ) + # indexing method def _set_item(self, key, value): """ Add series to DataFrame in specified column. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 4e05dfca43e78..24d750b9a9b71 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3265,6 +3265,7 @@ def _create_indexer(cls, name, indexer): _indexer = functools.partial(indexer, name) setattr(cls, name, property(_indexer, doc=indexer.__doc__)) + # indexing method def get(self, key, default=None): """ Get item from object for given key (ex: DataFrame column). @@ -3284,9 +3285,11 @@ def get(self, key, default=None): except (KeyError, ValueError, IndexError): return default + # indexing method def __getitem__(self, item): return self._get_item_cache(item) + # indexing method def _get_item_cache(self, item): """Return the cached item, item represents a label indexer.""" cache = self._item_cache @@ -3312,6 +3315,7 @@ def _reset_cacher(self): if hasattr(self, "_cacher"): del self._cacher + # indexing method def _iget_item_cache(self, item): """Return the cached item, item represents a positional indexer.""" ax = self._info_axis @@ -3386,6 +3390,7 @@ def _clear_item_cache(self, i=None): else: self._item_cache.clear() + # indexing method def _slice(self, slobj, axis=0, kind=None): """ Construct a slice of this container. @@ -3402,6 +3407,7 @@ def _slice(self, slobj, axis=0, kind=None): result._set_is_copy(self, copy=is_copy) return result + # indexing method def _set_item(self, key, value): self._data.set(key, value) self._clear_item_cache() @@ -3512,6 +3518,7 @@ def _check_setitem_copy(self, stacklevel=4, t="setting", force=False): elif value == "warn": warnings.warn(t, com.SettingWithCopyWarning, stacklevel=stacklevel) + # indexing method def __delitem__(self, key): """ Delete item @@ -3546,6 +3553,7 @@ def __delitem__(self, key): except KeyError: pass + # indexing method def _take(self, indices, axis=0, is_copy=True): """ Return the elements in the given *positional* indices along an axis. @@ -3592,6 +3600,7 @@ def _take(self, indices, axis=0, is_copy=True): return result + # indexing method def take(self, indices, axis=0, is_copy=True, **kwargs): """ Return the elements in the given *positional* indices along an axis. @@ -3670,6 +3679,7 @@ class max_speed nv.validate_take(tuple(), kwargs) return self._take(indices, axis=axis, is_copy=is_copy) + # indexing method def xs(self, key, axis=0, level=None, drop_level=True): """ Return cross-section from the Series/DataFrame. @@ -5288,9 +5298,6 @@ def _dir_additions(self): } return super()._dir_additions().union(additions) - # ---------------------------------------------------------------------- - # Getting and setting elements - # ---------------------------------------------------------------------- # Consolidation of internals @@ -9031,6 +9038,7 @@ def _align_series( return left.__finalize__(self), right.__finalize__(other) + # indexing method def _where( self, cond, @@ -10348,6 +10356,7 @@ def describe_1d(data): d.columns = data.columns.copy() return d + # TODO: doesn't need to be a method def _check_percentile(self, q): """ Validate percentiles (used by describe and quantile). diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index c31d6538ad2c3..539c884ab05da 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -826,9 +826,6 @@ def _getitem_tuple(self, tup): # no shortcut needed retval = self.obj for i, key in enumerate(tup): - if i >= self.obj.ndim: - raise IndexingError("Too many indexers") - if com.is_null_slice(key): continue @@ -2114,9 +2111,6 @@ def _getitem_tuple(self, tup): retval = self.obj axis = 0 for i, key in enumerate(tup): - if i >= self.obj.ndim: - raise IndexingError("Too many indexers") - if com.is_null_slice(key): axis += 1 continue diff --git a/pandas/core/series.py b/pandas/core/series.py index 4b78907e66106..a76639bdddc19 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -991,6 +991,7 @@ def imag(self, v): # ---------------------------------------------------------------------- + # TODO: can we get rid of this? def _unpickle_series_compat(self, state): if isinstance(state, dict): self._data = state["_data"] @@ -1028,6 +1029,7 @@ def axes(self): """ return [self.index] + # indexing method def _ixs(self, i, axis=0): """ Return the i-th value or values in the Series by location. @@ -1065,10 +1067,12 @@ def _ixs(self, i, axis=0): def _is_mixed_type(self): return False + # indexing method def _slice(self, slobj, axis=0, kind=None): slobj = self.index._convert_slice_indexer(slobj, kind=kind or "getitem") return self._get_values(slobj) + # indexing method def __getitem__(self, key): key = com.apply_if_callable(key, self) try: @@ -1116,6 +1120,7 @@ def __getitem__(self, key): return self._get_with(key) + # indexing method def _get_with(self, key): # other: fancy integer or otherwise if isinstance(key, slice): @@ -1167,6 +1172,7 @@ def _get_with(self, key): return self._get_values(key) raise + # indexing method def _get_values_tuple(self, key): # mpl hackaround if com._any_none(*key): @@ -1181,6 +1187,7 @@ def _get_values_tuple(self, key): self ) + # indexing method def _get_values(self, indexer): try: return self._constructor( @@ -1189,6 +1196,7 @@ def _get_values(self, indexer): except Exception: return self._values[indexer] + # indexing method def __setitem__(self, key, value): key = com.apply_if_callable(key, self) @@ -1247,6 +1255,7 @@ def setitem(key, value): if cacher_needs_updating: self._maybe_update_cacher() + # indexing method def _set_with_engine(self, key, value): values = self._values try: @@ -1256,6 +1265,7 @@ def _set_with_engine(self, key, value): values[self.index.get_loc(key)] = value return + # indexing method def _set_with(self, key, value): # other: fancy integer or otherwise if isinstance(key, slice): @@ -1291,6 +1301,7 @@ def _set_with(self, key, value): else: self._set_labels(key, value) + # indexing method def _set_labels(self, key, value): if isinstance(key, Index): key = key.values @@ -1302,6 +1313,7 @@ def _set_labels(self, key, value): raise ValueError("%s not contained in the index" % str(key[mask])) self._set_values(indexer, value) + # indexing method def _set_values(self, key, value): if isinstance(key, Series): key = key._values @@ -1365,6 +1377,7 @@ def repeat(self, repeats, axis=None): new_values = self._values.repeat(repeats) return self._constructor(new_values, index=new_index).__finalize__(self) + # indexing method def get_value(self, label, takeable=False): """ Quickly retrieve single value at passed index label. @@ -1390,6 +1403,7 @@ def get_value(self, label, takeable=False): ) return self._get_value(label, takeable=takeable) + # indexing method def _get_value(self, label, takeable=False): if takeable is True: return com.maybe_box_datetimelike(self._values[label]) @@ -1397,6 +1411,7 @@ def _get_value(self, label, takeable=False): _get_value.__doc__ = get_value.__doc__ + # indexing method def set_value(self, label, value, takeable=False): """ Quickly set single value at passed label. @@ -1430,6 +1445,7 @@ def set_value(self, label, value, takeable=False): ) return self._set_value(label, value, takeable=takeable) + # indexing method def _set_value(self, label, value, takeable=False): try: if takeable: @@ -4364,6 +4380,7 @@ def memory_usage(self, index=True, deep=False): v += self.index.memory_usage(deep=deep) return v + # indexing method @Appender(generic.NDFrame._take.__doc__) def _take(self, indices, axis=0, is_copy=False): From 31f44f5bde82b38fce0cdf213e0047bfb3ee7b1e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 11 Jul 2019 10:58:49 -0700 Subject: [PATCH 02/12] types --- pandas/core/indexing.py | 34 ++++++++++------------------------ 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 4650ded627d63..89f02df19e93f 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -143,10 +143,7 @@ def __getitem__(self, key): key = com.apply_if_callable(key, self.obj) return self._getitem_axis(key, axis=axis) - def _get_label(self, label, axis=None): - if axis is None: - axis = self.axis or 0 - + def _get_label(self, label, axis: int): if self.ndim == 1: # for perf reasons we want to try _xs first # as its basically direct indexing @@ -161,9 +158,7 @@ def _get_label(self, label, axis=None): def _get_loc(self, key, axis: int): return self.obj._ixs(key, axis=axis) - def _slice(self, obj, axis=None, kind=None): - if axis is None: - axis = self.axis + def _slice(self, obj, axis: int, kind=None): return self.obj._slice(obj, axis=axis, kind=kind) def _get_setitem_indexer(self, key): @@ -879,10 +874,10 @@ def _convert_for_reindex(self, key, axis: int): def _handle_lowerdim_multi_index_axis0(self, tup): # we have an axis0 multi-index, handle or raise - + axis = self.axis or 0 try: # fast path for series or for tup devoid of slices - return self._get_label(tup, axis=self.axis) + return self._get_label(tup, axis=axis) except TypeError: # slices are unhashable pass @@ -980,7 +975,8 @@ def _getitem_nested_tuple(self, tup): # this is a series with a multi-index specified a tuple of # selectors - return self._getitem_axis(tup, axis=self.axis) + axis = self.axis or 0 + return self._getitem_axis(tup, axis=axis) # handle the multi-axis by taking sections and reducing # this is iterative @@ -1007,11 +1003,7 @@ def _getitem_nested_tuple(self, tup): return obj - def _getitem_axis(self, key, axis=None): - - if axis is None: - axis = self.axis or 0 - + def _getitem_axis(self, key, axis: int): if is_iterator(key): key = list(key) self._validate_key(key, axis) @@ -1436,7 +1428,7 @@ def _is_scalar_access(self, key): def _getitem_scalar(self, key): raise NotImplementedError() - def _getitem_axis(self, key, axis=None): + def _getitem_axis(self, key, axis: int): raise NotImplementedError() def _getbool_axis(self, key, axis: int): @@ -1783,10 +1775,7 @@ def _get_partial_string_timestamp_match_key(self, key, labels): return key - def _getitem_axis(self, key, axis=None): - if axis is None: - axis = self.axis or 0 - + def _getitem_axis(self, key, axis: int): key = item_from_zerodim(key) if is_iterator(key): key = list(key) @@ -2137,10 +2126,7 @@ def _get_list_axis(self, key, axis: int): # re-raise with different error message raise IndexError("positional indexers are out-of-bounds") - def _getitem_axis(self, key, axis=None): - if axis is None: - axis = self.axis or 0 - + def _getitem_axis(self, key, axis: int): if isinstance(key, slice): return self._get_slice_axis(key, axis=axis) From 6148c028790616715074a90a9c0a0a66e8ff4165 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 11 Jul 2019 11:58:48 -0700 Subject: [PATCH 03/12] change hasattr check --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c23e7a02b0ea0..6d8d84bf8af02 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3502,7 +3502,7 @@ def __delitem__(self, key): deleted = False maybe_shortcut = False - if hasattr(self, "columns") and isinstance(self.columns, MultiIndex): + if self.ndim == 2 and isinstance(self.columns, MultiIndex): try: maybe_shortcut = key not in self.columns._engine except TypeError: From 152d884b48a36cfb864481235f83d11510696ad1 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 11 Jul 2019 12:08:51 -0700 Subject: [PATCH 04/12] refactor out setitem_with_indexer_missing --- pandas/core/indexing.py | 119 ++++++++++++++++++++-------------------- 1 file changed, 60 insertions(+), 59 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 89f02df19e93f..5fdfea5366243 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -401,65 +401,7 @@ def _setitem_with_indexer(self, indexer, value): indexer, missing = convert_missing_indexer(indexer) if missing: - - # reindex the axis to the new value - # and set inplace - if self.ndim == 1: - index = self.obj.index - new_index = index.insert(len(index), indexer) - - # we have a coerced indexer, e.g. a float - # that matches in an Int64Index, so - # we will not create a duplicate index, rather - # index to that element - # e.g. 0.0 -> 0 - # GH12246 - if index.is_unique: - new_indexer = index.get_indexer([new_index[-1]]) - if (new_indexer != -1).any(): - return self._setitem_with_indexer(new_indexer, value) - - # this preserves dtype of the value - new_values = Series([value])._values - if len(self.obj._values): - # GH#22717 handle casting compatibility that np.concatenate - # does incorrectly - new_values = _concat_compat([self.obj._values, new_values]) - self.obj._data = self.obj._constructor( - new_values, index=new_index, name=self.obj.name - )._data - self.obj._maybe_update_cacher(clear=True) - return self.obj - - elif self.ndim == 2: - - # no columns and scalar - if not len(self.obj.columns): - raise ValueError( - "cannot set a frame with no defined " "columns" - ) - - # append a Series - if isinstance(value, Series): - - value = value.reindex(index=self.obj.columns, copy=True) - value.name = indexer - - # a list-list - else: - - # must have conforming columns - if is_list_like_indexer(value): - if len(value) != len(self.obj.columns): - raise ValueError( - "cannot set a row with " "mismatched columns" - ) - - value = Series(value, index=self.obj.columns, name=indexer) - - self.obj._data = self.obj.append(value)._data - self.obj._maybe_update_cacher(clear=True) - return self.obj + return self._setitem_with_indexer_missing(indexer, value) # set item_labels = self.obj._get_axis(info_axis) @@ -651,6 +593,65 @@ def setter(item, v): self.obj._data = self.obj._data.setitem(indexer=indexer, value=value) self.obj._maybe_update_cacher(clear=True) + def _setitem_with_indexer_missing(self, indexer, value): + """ + Insert new row(s) or column(s) into the Series or DataFrame. + """ + from pandas import Series + + # reindex the axis to the new value + # and set inplace + if self.ndim == 1: + index = self.obj.index + new_index = index.insert(len(index), indexer) + + # we have a coerced indexer, e.g. a float + # that matches in an Int64Index, so + # we will not create a duplicate index, rather + # index to that element + # e.g. 0.0 -> 0 + # GH#12246 + if index.is_unique: + new_indexer = index.get_indexer([new_index[-1]]) + if (new_indexer != -1).any(): + return self._setitem_with_indexer(new_indexer, value) + + # this preserves dtype of the value + new_values = Series([value])._values + if len(self.obj._values): + # GH#22717 handle casting compatibility that np.concatenate + # does incorrectly + new_values = _concat_compat([self.obj._values, new_values]) + self.obj._data = self.obj._constructor( + new_values, index=new_index, name=self.obj.name + )._data + self.obj._maybe_update_cacher(clear=True) + return self.obj + + elif self.ndim == 2: + + if not len(self.obj.columns): + # no columns and scalar + raise ValueError("cannot set a frame with no defined columns") + + if isinstance(value, ABCSeries): + # append a Series + value = value.reindex(index=self.obj.columns, copy=True) + value.name = indexer + + else: + # a list-list + if is_list_like_indexer(value): + # must have conforming columns + if len(value) != len(self.obj.columns): + raise ValueError("cannot set a row with mismatched columns") + + value = Series(value, index=self.obj.columns, name=indexer) + + self.obj._data = self.obj.append(value)._data + self.obj._maybe_update_cacher(clear=True) + return self.obj + def _align_series(self, indexer, ser, multiindex_indexer=False): """ Parameters From 35a76c8dfe369df585b608c09ef564e0dff8dd93 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 11 Jul 2019 13:49:50 -0700 Subject: [PATCH 05/12] remove no-longer-necessary Panel-compat code from GH#10360 --- pandas/core/indexing.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 5fdfea5366243..3c1ec0eb6ac56 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -325,19 +325,6 @@ def _setitem_with_indexer(self, indexer, value): val = list(value.values()) if isinstance(value, dict) else value take_split_path = not blk._can_hold_element(val) - if isinstance(indexer, tuple) and len(indexer) == len(self.obj.axes): - - for i, ax in zip(indexer, self.obj.axes): - - # if we have any multi-indexes that have non-trivial slices - # (not null slices) then we must take the split path, xref - # GH 10360 - if isinstance(ax, MultiIndex) and not ( - is_integer(i) or com.is_null_slice(i) - ): - take_split_path = True - break - if isinstance(indexer, tuple): nindexer = [] for i, idx in enumerate(indexer): From 44eb31780354add43451f0d729d619309c8e76a0 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 11 Jul 2019 15:11:27 -0700 Subject: [PATCH 06/12] cleanups becasue we know 2d --- pandas/core/indexing.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 3c1ec0eb6ac56..93c5fc85ba9cd 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -395,6 +395,9 @@ def _setitem_with_indexer(self, indexer, value): # align and set the values if take_split_path: + # Above we only set take_split_path to True for 2D cases + assert self.ndim == 2 + assert info_axis == 1 if not isinstance(indexer, tuple): indexer = self._tuplify(indexer) @@ -448,11 +451,8 @@ def _setitem_with_indexer(self, indexer, value): # non-mi else: plane_indexer = indexer[:info_axis] + indexer[info_axis + 1 :] - if info_axis > 0: - plane_axis = self.obj.axes[:info_axis][0] - lplane_indexer = length_of_indexer(plane_indexer[0], plane_axis) - else: - lplane_indexer = 0 + plane_axis = self.obj.axes[:info_axis][0] + lplane_indexer = length_of_indexer(plane_indexer[0], plane_axis) def setter(item, v): s = self.obj[item] @@ -502,9 +502,7 @@ def setter(item, v): # hasattr first, to avoid coercing to ndarray without reason. # But we may be relying on the ndarray coercion to check ndim. # Why not just convert to an ndarray earlier on if needed? - elif (hasattr(value, "ndim") and value.ndim == 2) or ( - not hasattr(value, "ndim") and np.array(value).ndim - ) == 2: + elif np.ndim(value) == 2: # note that this coerces the dtype if we are mixed # GH 7551 From bf170ebc107ca05a1c611b48e76389826a85f555 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 11 Jul 2019 16:31:51 -0700 Subject: [PATCH 07/12] all extant usages of _get_loc pass an int key --- pandas/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 93c5fc85ba9cd..431b25ab2d107 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -155,7 +155,7 @@ def _get_label(self, label, axis: int): return self.obj._xs(label, axis=axis) - def _get_loc(self, key, axis: int): + def _get_loc(self, key: int, axis: int): return self.obj._ixs(key, axis=axis) def _slice(self, obj, axis: int, kind=None): From 4e6e1135f8e555df36f79eb977d20423c40a876e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 11 Jul 2019 16:35:01 -0700 Subject: [PATCH 08/12] trim never-reached branches --- pandas/core/frame.py | 76 +++++++++++++++++--------------------------- 1 file changed, 29 insertions(+), 47 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6035f5c4461aa..2af9c72c54058 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2895,11 +2895,11 @@ def _set_value(self, index, col, value, takeable=False): _set_value.__doc__ = set_value.__doc__ # indexing method - def _ixs(self, i, axis=0): + def _ixs(self, i: int, axis: int = 0): """ Parameters ---------- - i : int, slice, or sequence of integers + i : int axis : int Notes @@ -2908,59 +2908,41 @@ def _ixs(self, i, axis=0): """ # irow if axis == 0: - if isinstance(i, slice): - return self[i] - else: - label = self.index[i] - if isinstance(label, Index): - # a location index by definition - result = self.take(i, axis=axis) - copy = True - else: - new_values = self._data.fast_xs(i) - if is_scalar(new_values): - return new_values - - # if we are a copy, mark as such - copy = ( - isinstance(new_values, np.ndarray) and new_values.base is None - ) - result = self._constructor_sliced( - new_values, - index=self.columns, - name=self.index[i], - dtype=new_values.dtype, - ) - result._set_is_copy(self, copy=copy) - return result + label = self.index[i] + new_values = self._data.fast_xs(i) + if is_scalar(new_values): + return new_values + + # if we are a copy, mark as such + copy = isinstance(new_values, np.ndarray) and new_values.base is None + result = self._constructor_sliced( + new_values, + index=self.columns, + name=self.index[i], + dtype=new_values.dtype, + ) + result._set_is_copy(self, copy=copy) + return result # icol else: label = self.columns[i] - if isinstance(i, slice): - # need to return view - lab_slice = slice(label[0], label[-1]) - return self.loc[:, lab_slice] - else: - if isinstance(label, Index): - return self._take(i, axis=1) + index_len = len(self.index) - index_len = len(self.index) + # if the values returned are not the same length + # as the index (iow a not found value), iget returns + # a 0-len ndarray. This is effectively catching + # a numpy error (as numpy should really raise) + values = self._data.iget(i) - # if the values returned are not the same length - # as the index (iow a not found value), iget returns - # a 0-len ndarray. This is effectively catching - # a numpy error (as numpy should really raise) - values = self._data.iget(i) + if index_len and not len(values): + values = np.array([np.nan] * index_len, dtype=object) + result = self._box_col_values(values, label) - if index_len and not len(values): - values = np.array([np.nan] * index_len, dtype=object) - result = self._box_col_values(values, label) + # this is a cached value, mark it so + result._set_as_cached(label, self) - # this is a cached value, mark it so - result._set_as_cached(label, self) - - return result + return result # indexing method def __getitem__(self, key): From df7cac46a90ed23da88a2441d4bdb040b5b26048 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 11 Jul 2019 16:44:54 -0700 Subject: [PATCH 09/12] remove marks --- pandas/core/frame.py | 14 -------------- pandas/core/generic.py | 11 ----------- pandas/core/series.py | 16 ---------------- 3 files changed, 41 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2af9c72c54058..0b04362cbba2d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2784,7 +2784,6 @@ def _unpickle_matrix_compat(self, state): # pragma: no cover # ---------------------------------------------------------------------- # Getting and setting elements - # indexing method def get_value(self, index, col, takeable=False): """ Quickly retrieve single value at passed column and index. @@ -2812,7 +2811,6 @@ def get_value(self, index, col, takeable=False): ) return self._get_value(index, col, takeable=takeable) - # indexing method def _get_value(self, index, col, takeable=False): if takeable: @@ -2840,7 +2838,6 @@ def _get_value(self, index, col, takeable=False): _get_value.__doc__ = get_value.__doc__ - # indexing method def set_value(self, index, col, value, takeable=False): """ Put single value at passed column and index. @@ -2870,7 +2867,6 @@ def set_value(self, index, col, value, takeable=False): ) return self._set_value(index, col, value, takeable=takeable) - # indexing method def _set_value(self, index, col, value, takeable=False): try: if takeable is True: @@ -2894,7 +2890,6 @@ def _set_value(self, index, col, value, takeable=False): _set_value.__doc__ = set_value.__doc__ - # indexing method def _ixs(self, i: int, axis: int = 0): """ Parameters @@ -2944,7 +2939,6 @@ def _ixs(self, i: int, axis: int = 0): return result - # indexing method def __getitem__(self, key): key = lib.item_from_zerodim(key) key = com.apply_if_callable(key, self) @@ -3000,7 +2994,6 @@ def __getitem__(self, key): return data - # indexing method def _getitem_bool_array(self, key): # also raises Exception if object array with NA values # warning here just in case -- previously __setitem__ was @@ -3024,7 +3017,6 @@ def _getitem_bool_array(self, key): indexer = key.nonzero()[0] return self._take(indexer, axis=0) - # indexing method def _getitem_multilevel(self, key): loc = self.columns.get_loc(key) if isinstance(loc, (slice, Series, np.ndarray, Index)): @@ -3062,7 +3054,6 @@ def _getitem_multilevel(self, key): else: return self._get_item_cache(key) - # indexing method def _getitem_frame(self, key): if key.values.size and not is_bool_dtype(key.values): raise ValueError("Must pass DataFrame with boolean values only") @@ -3457,7 +3448,6 @@ def _box_col_values(self, values, items): klass = self._constructor_sliced return klass(values, index=self.index, name=items, fastpath=True) - # indexing method def __setitem__(self, key, value): key = com.apply_if_callable(key, self) @@ -3474,12 +3464,10 @@ def __setitem__(self, key, value): # set column self._set_item(key, value) - # indexing method def _setitem_slice(self, key, value): self._check_setitem_copy() self.loc._setitem_with_indexer(key, value) - # indexing method def _setitem_array(self, key, value): # also raises Exception if object array with NA values if com.is_bool_indexer(key): @@ -3502,7 +3490,6 @@ def _setitem_array(self, key, value): self._check_setitem_copy() self.loc._setitem_with_indexer((slice(None), indexer), value) - # indexing method def _setitem_frame(self, key, value): # support boolean setting with DataFrame input, e.g. # df[df > df2] = 0 @@ -3540,7 +3527,6 @@ def _ensure_valid_index(self, value): value.index.copy(), axis=1, fill_value=np.nan ) - # indexing method def _set_item(self, key, value): """ Add series to DataFrame in specified column. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6d8d84bf8af02..ae42adbf10f5b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3241,7 +3241,6 @@ def _create_indexer(cls, name, indexer): _indexer = functools.partial(indexer, name) setattr(cls, name, property(_indexer, doc=indexer.__doc__)) - # indexing method def get(self, key, default=None): """ Get item from object for given key (ex: DataFrame column). @@ -3261,11 +3260,9 @@ def get(self, key, default=None): except (KeyError, ValueError, IndexError): return default - # indexing method def __getitem__(self, item): return self._get_item_cache(item) - # indexing method def _get_item_cache(self, item): """Return the cached item, item represents a label indexer.""" cache = self._item_cache @@ -3291,7 +3288,6 @@ def _reset_cacher(self): if hasattr(self, "_cacher"): del self._cacher - # indexing method def _iget_item_cache(self, item): """Return the cached item, item represents a positional indexer.""" ax = self._info_axis @@ -3366,7 +3362,6 @@ def _clear_item_cache(self, i=None): else: self._item_cache.clear() - # indexing method def _slice(self, slobj, axis=0, kind=None): """ Construct a slice of this container. @@ -3383,7 +3378,6 @@ def _slice(self, slobj, axis=0, kind=None): result._set_is_copy(self, copy=is_copy) return result - # indexing method def _set_item(self, key, value): self._data.set(key, value) self._clear_item_cache() @@ -3494,7 +3488,6 @@ def _check_setitem_copy(self, stacklevel=4, t="setting", force=False): elif value == "warn": warnings.warn(t, com.SettingWithCopyWarning, stacklevel=stacklevel) - # indexing method def __delitem__(self, key): """ Delete item @@ -3529,7 +3522,6 @@ def __delitem__(self, key): except KeyError: pass - # indexing method def _take(self, indices, axis=0, is_copy=True): """ Return the elements in the given *positional* indices along an axis. @@ -3576,7 +3568,6 @@ def _take(self, indices, axis=0, is_copy=True): return result - # indexing method def take(self, indices, axis=0, is_copy=True, **kwargs): """ Return the elements in the given *positional* indices along an axis. @@ -3655,7 +3646,6 @@ class max_speed nv.validate_take(tuple(), kwargs) return self._take(indices, axis=axis, is_copy=is_copy) - # indexing method def xs(self, key, axis=0, level=None, drop_level=True): """ Return cross-section from the Series/DataFrame. @@ -9014,7 +9004,6 @@ def _align_series( return left.__finalize__(self), right.__finalize__(other) - # indexing method def _where( self, cond, diff --git a/pandas/core/series.py b/pandas/core/series.py index 4b50e995f5a20..7cb287b0d8375 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1029,7 +1029,6 @@ def axes(self): """ return [self.index] - # indexing method def _ixs(self, i, axis=0): """ Return the i-th value or values in the Series by location. @@ -1067,12 +1066,10 @@ def _ixs(self, i, axis=0): def _is_mixed_type(self): return False - # indexing method def _slice(self, slobj, axis=0, kind=None): slobj = self.index._convert_slice_indexer(slobj, kind=kind or "getitem") return self._get_values(slobj) - # indexing method def __getitem__(self, key): key = com.apply_if_callable(key, self) try: @@ -1120,7 +1117,6 @@ def __getitem__(self, key): return self._get_with(key) - # indexing method def _get_with(self, key): # other: fancy integer or otherwise if isinstance(key, slice): @@ -1172,7 +1168,6 @@ def _get_with(self, key): return self._get_values(key) raise - # indexing method def _get_values_tuple(self, key): # mpl hackaround if com._any_none(*key): @@ -1187,7 +1182,6 @@ def _get_values_tuple(self, key): self ) - # indexing method def _get_values(self, indexer): try: return self._constructor( @@ -1196,7 +1190,6 @@ def _get_values(self, indexer): except Exception: return self._values[indexer] - # indexing method def __setitem__(self, key, value): key = com.apply_if_callable(key, self) @@ -1255,7 +1248,6 @@ def setitem(key, value): if cacher_needs_updating: self._maybe_update_cacher() - # indexing method def _set_with_engine(self, key, value): values = self._values try: @@ -1265,7 +1257,6 @@ def _set_with_engine(self, key, value): values[self.index.get_loc(key)] = value return - # indexing method def _set_with(self, key, value): # other: fancy integer or otherwise if isinstance(key, slice): @@ -1308,7 +1299,6 @@ def _set_with(self, key, value): else: self._set_labels(key, value) - # indexing method def _set_labels(self, key, value): if isinstance(key, Index): key = key.values @@ -1320,7 +1310,6 @@ def _set_labels(self, key, value): raise ValueError("%s not contained in the index" % str(key[mask])) self._set_values(indexer, value) - # indexing method def _set_values(self, key, value): if isinstance(key, Series): key = key._values @@ -1384,7 +1373,6 @@ def repeat(self, repeats, axis=None): new_values = self._values.repeat(repeats) return self._constructor(new_values, index=new_index).__finalize__(self) - # indexing method def get_value(self, label, takeable=False): """ Quickly retrieve single value at passed index label. @@ -1410,7 +1398,6 @@ def get_value(self, label, takeable=False): ) return self._get_value(label, takeable=takeable) - # indexing method def _get_value(self, label, takeable=False): if takeable is True: return com.maybe_box_datetimelike(self._values[label]) @@ -1418,7 +1405,6 @@ def _get_value(self, label, takeable=False): _get_value.__doc__ = get_value.__doc__ - # indexing method def set_value(self, label, value, takeable=False): """ Quickly set single value at passed label. @@ -1452,7 +1438,6 @@ def set_value(self, label, value, takeable=False): ) return self._set_value(label, value, takeable=takeable) - # indexing method def _set_value(self, label, value, takeable=False): try: if takeable: @@ -4387,7 +4372,6 @@ def memory_usage(self, index=True, deep=False): v += self.index.memory_usage(deep=deep) return v - # indexing method @Appender(generic.NDFrame._take.__doc__) def _take(self, indices, axis=0, is_copy=False): From 5fb03e127bce9a3a8a83ac61c0b210c67b58e74a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 11 Jul 2019 16:47:19 -0700 Subject: [PATCH 10/12] simplify ixs --- pandas/core/series.py | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 7cb287b0d8375..ad5aaa5e96a71 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1029,38 +1029,25 @@ def axes(self): """ return [self.index] - def _ixs(self, i, axis=0): + def _ixs(self, i: int, axis: int = 0): """ Return the i-th value or values in the Series by location. Parameters ---------- - i : int, slice, or sequence of integers + i : int Returns ------- scalar (int) or Series (slice, sequence) """ - try: - # dispatch to the values if we need - values = self._values - if isinstance(values, np.ndarray): - return libindex.get_value_at(values, i) - else: - return values[i] - except IndexError: - raise - except Exception: - if isinstance(i, slice): - indexer = self.index._convert_slice_indexer(i, kind="iloc") - return self._get_values(indexer) - else: - label = self.index[i] - if isinstance(label, Index): - return self.take(i, axis=axis, convert=True) - else: - return libindex.get_value_at(self, i) + # dispatch to the values if we need + values = self._values + if isinstance(values, np.ndarray): + return libindex.get_value_at(values, i) + else: + return values[i] @property def _is_mixed_type(self): From 75e0bc11b880087c3b082100c390334a7b634ea0 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 11 Jul 2019 16:48:41 -0700 Subject: [PATCH 11/12] remove comments --- pandas/core/frame.py | 1 - pandas/core/generic.py | 1 - pandas/core/series.py | 1 - 3 files changed, 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0b04362cbba2d..168cd6a8ebb67 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2751,7 +2751,6 @@ def transpose(self, *args, **kwargs): # ---------------------------------------------------------------------- # Picklability - # TODO: can we get rid of these? # legacy pickle formats def _unpickle_frame_compat(self, state): # pragma: no cover if len(state) == 2: # pragma: no cover diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ae42adbf10f5b..ceef7dd2a5a7d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10321,7 +10321,6 @@ def describe_1d(data): d.columns = data.columns.copy() return d - # TODO: doesn't need to be a method def _check_percentile(self, q): """ Validate percentiles (used by describe and quantile). diff --git a/pandas/core/series.py b/pandas/core/series.py index ad5aaa5e96a71..fa9f79d7d2683 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -991,7 +991,6 @@ def imag(self, v): # ---------------------------------------------------------------------- - # TODO: can we get rid of this? def _unpickle_series_compat(self, state): if isinstance(state, dict): self._data = state["_data"] From 437f8fed5a9754b26a5099f04d631c8ef39a0995 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 12 Jul 2019 07:55:22 -0700 Subject: [PATCH 12/12] dont use index_len --- pandas/core/frame.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7f4ecde7557fc..53cb0cedc208b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2921,7 +2921,6 @@ def _ixs(self, i: int, axis: int = 0): # icol else: label = self.columns[i] - index_len = len(self.index) # if the values returned are not the same length # as the index (iow a not found value), iget returns @@ -2929,8 +2928,8 @@ def _ixs(self, i: int, axis: int = 0): # a numpy error (as numpy should really raise) values = self._data.iget(i) - if index_len and not len(values): - values = np.array([np.nan] * index_len, dtype=object) + if len(self.index) and not len(values): + values = np.array([np.nan] * len(self.index), dtype=object) result = self._box_col_values(values, label) # this is a cached value, mark it so