diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9a84f1ddd87a5..301aa08236ff5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2777,81 +2777,7 @@ def _unpickle_matrix_compat(self, state): # pragma: no cover self._data = dm._data # ---------------------------------------------------------------------- - # Getting and setting elements - - def _get_value(self, index, col, takeable: bool = False): - """ - Quickly retrieve single value at passed column and index. - - Parameters - ---------- - index : row label - col : column label - takeable : interpret the index/col as indexers, default False - - Returns - ------- - scalar - """ - if takeable: - series = self._iget_item_cache(col) - return com.maybe_box_datetimelike(series._values[index]) - - series = self._get_item_cache(col) - engine = self.index._engine - - try: - return engine.get_value(series._values, index) - except KeyError: - # GH 20629 - if self.index.nlevels > 1: - # partial indexing forbidden - raise - except (TypeError, ValueError): - pass - - # we cannot handle direct indexing - # use positional - col = self.columns.get_loc(col) - index = self.index.get_loc(index) - return self._get_value(index, col, takeable=True) - - def _set_value(self, index, col, value, takeable: bool = False): - """ - Put single value at passed column and index. - - Parameters - ---------- - index : row label - col : column label - value : scalar - takeable : interpret the index/col as indexers, default False - - Returns - ------- - DataFrame - If label pair is contained, will be reference to calling DataFrame, - otherwise a new object. - """ - try: - if takeable is True: - series = self._iget_item_cache(col) - return series._set_value(index, value, takeable=True) - - series = self._get_item_cache(col) - engine = self.index._engine - engine.set_value(series._values, index, value) - return self - except (KeyError, TypeError): - - # set using a non-recursive method & reset the cache - if takeable: - self.iloc[index, col] = value - else: - self.loc[index, col] = value - self._item_cache.pop(col, None) - - return self + # Indexing Methods def _ixs(self, i: int, axis: int = 0): """ @@ -3021,6 +2947,199 @@ def _getitem_frame(self, key): raise ValueError("Must pass DataFrame with boolean values only") return self.where(key) + def _get_value(self, index, col, takeable: bool = False): + """ + Quickly retrieve single value at passed column and index. + + Parameters + ---------- + index : row label + col : column label + takeable : interpret the index/col as indexers, default False + + Returns + ------- + scalar + """ + if takeable: + series = self._iget_item_cache(col) + return com.maybe_box_datetimelike(series._values[index]) + + series = self._get_item_cache(col) + engine = self.index._engine + + try: + return engine.get_value(series._values, index) + except KeyError: + # GH 20629 + if self.index.nlevels > 1: + # partial indexing forbidden + raise + except (TypeError, ValueError): + pass + + # we cannot handle direct indexing + # use positional + col = self.columns.get_loc(col) + index = self.index.get_loc(index) + return self._get_value(index, col, takeable=True) + + def __setitem__(self, key, value): + key = com.apply_if_callable(key, self) + + # see if we can slice the rows + indexer = convert_to_index_sliceable(self, key) + if indexer is not None: + return self._setitem_slice(indexer, value) + + if isinstance(key, DataFrame) or getattr(key, "ndim", None) == 2: + self._setitem_frame(key, value) + elif isinstance(key, (Series, np.ndarray, list, Index)): + self._setitem_array(key, value) + else: + # set column + self._set_item(key, value) + + def _setitem_slice(self, key, value): + self._check_setitem_copy() + self.loc[key] = value + + def _setitem_array(self, key, value): + # also raises Exception if object array with NA values + if com.is_bool_indexer(key): + if len(key) != len(self.index): + raise ValueError( + "Item wrong length %d instead of %d!" % (len(key), len(self.index)) + ) + key = check_bool_indexer(self.index, key) + indexer = key.nonzero()[0] + self._check_setitem_copy() + self.loc._setitem_with_indexer(indexer, value) + else: + if isinstance(value, DataFrame): + if len(value.columns) != len(key): + raise ValueError("Columns must be same length as key") + for k1, k2 in zip(key, value.columns): + self[k1] = value[k2] + else: + indexer = self.loc._get_listlike_indexer( + key, axis=1, raise_missing=False + )[1] + self._check_setitem_copy() + self.loc._setitem_with_indexer((slice(None), indexer), value) + + def _setitem_frame(self, key, value): + # support boolean setting with DataFrame input, e.g. + # df[df > df2] = 0 + if isinstance(key, np.ndarray): + if key.shape != self.shape: + raise ValueError("Array conditional must be same shape as self") + key = self._constructor(key, **self._construct_axes_dict()) + + if key.values.size and not is_bool_dtype(key.values): + raise TypeError( + "Must pass DataFrame or 2-d ndarray with boolean values only" + ) + + self._check_inplace_setting(value) + self._check_setitem_copy() + self._where(-key, value, inplace=True) + + def _set_item(self, key, value): + """ + Add series to DataFrame in specified column. + + If series is a numpy-array (not a Series/TimeSeries), it must be the + same length as the DataFrames index or an error will be thrown. + + Series/TimeSeries will be conformed to the DataFrames index to + ensure homogeneity. + """ + + self._ensure_valid_index(value) + value = self._sanitize_column(key, value) + NDFrame._set_item(self, key, value) + + # check if we are modifying a copy + # try to set first as we want an invalid + # value exception to occur first + if len(self): + self._check_setitem_copy() + + def _set_value(self, index, col, value, takeable: bool = False): + """ + Put single value at passed column and index. + + Parameters + ---------- + index : row label + col : column label + value : scalar + takeable : interpret the index/col as indexers, default False + + Returns + ------- + DataFrame + If label pair is contained, will be reference to calling DataFrame, + otherwise a new object. + """ + try: + if takeable is True: + series = self._iget_item_cache(col) + return series._set_value(index, value, takeable=True) + + series = self._get_item_cache(col) + engine = self.index._engine + engine.set_value(series._values, index, value) + return self + except (KeyError, TypeError): + + # set using a non-recursive method & reset the cache + if takeable: + self.iloc[index, col] = value + else: + self.loc[index, col] = value + self._item_cache.pop(col, None) + + return self + + def _ensure_valid_index(self, value): + """ + Ensure that if we don't have an index, that we can create one from the + passed value. + """ + # GH5632, make sure that we are a Series convertible + if not len(self.index) and is_list_like(value): + try: + value = Series(value) + except (ValueError, NotImplementedError, TypeError): + raise ValueError( + "Cannot set a frame with no defined index " + "and a value that cannot be converted to a " + "Series" + ) + + self._data = self._data.reindex_axis( + value.index.copy(), axis=1, fill_value=np.nan + ) + + def _box_item_values(self, key, values): + items = self.columns[self.columns.get_loc(key)] + if values.ndim == 2: + return self._constructor(values.T, columns=items, index=self.index) + else: + return self._box_col_values(values, items) + + def _box_col_values(self, values, items): + """ + Provide boxed values for a column. + """ + klass = self._constructor_sliced + return klass(values, index=self.index, name=items, fastpath=True) + + # ---------------------------------------------------------------------- + # Unsorted + def query(self, expr, inplace=False, **kwargs): """ Query the columns of a DataFrame with a boolean expression. @@ -3392,122 +3511,6 @@ def is_dtype_instance_mapper(idx, dtype): dtype_indexer = include_these & exclude_these return self.loc[_get_info_slice(self, dtype_indexer)] - def _box_item_values(self, key, values): - items = self.columns[self.columns.get_loc(key)] - if values.ndim == 2: - return self._constructor(values.T, columns=items, index=self.index) - else: - return self._box_col_values(values, items) - - def _box_col_values(self, values, items): - """ - Provide boxed values for a column. - """ - klass = self._constructor_sliced - return klass(values, index=self.index, name=items, fastpath=True) - - def __setitem__(self, key, value): - key = com.apply_if_callable(key, self) - - # see if we can slice the rows - indexer = convert_to_index_sliceable(self, key) - if indexer is not None: - return self._setitem_slice(indexer, value) - - if isinstance(key, DataFrame) or getattr(key, "ndim", None) == 2: - self._setitem_frame(key, value) - elif isinstance(key, (Series, np.ndarray, list, Index)): - self._setitem_array(key, value) - else: - # set column - self._set_item(key, value) - - def _setitem_slice(self, key, value): - self._check_setitem_copy() - self.loc[key] = value - - def _setitem_array(self, key, value): - # also raises Exception if object array with NA values - if com.is_bool_indexer(key): - if len(key) != len(self.index): - raise ValueError( - "Item wrong length %d instead of %d!" % (len(key), len(self.index)) - ) - key = check_bool_indexer(self.index, key) - indexer = key.nonzero()[0] - self._check_setitem_copy() - self.loc._setitem_with_indexer(indexer, value) - else: - if isinstance(value, DataFrame): - if len(value.columns) != len(key): - raise ValueError("Columns must be same length as key") - for k1, k2 in zip(key, value.columns): - self[k1] = value[k2] - else: - indexer = self.loc._get_listlike_indexer( - key, axis=1, raise_missing=False - )[1] - self._check_setitem_copy() - self.loc._setitem_with_indexer((slice(None), indexer), value) - - def _setitem_frame(self, key, value): - # support boolean setting with DataFrame input, e.g. - # df[df > df2] = 0 - if isinstance(key, np.ndarray): - if key.shape != self.shape: - raise ValueError("Array conditional must be same shape as self") - key = self._constructor(key, **self._construct_axes_dict()) - - if key.values.size and not is_bool_dtype(key.values): - raise TypeError( - "Must pass DataFrame or 2-d ndarray with boolean values only" - ) - - self._check_inplace_setting(value) - self._check_setitem_copy() - self._where(-key, value, inplace=True) - - def _ensure_valid_index(self, value): - """ - Ensure that if we don't have an index, that we can create one from the - passed value. - """ - # GH5632, make sure that we are a Series convertible - if not len(self.index) and is_list_like(value): - try: - value = Series(value) - except (ValueError, NotImplementedError, TypeError): - raise ValueError( - "Cannot set a frame with no defined index " - "and a value that cannot be converted to a " - "Series" - ) - - self._data = self._data.reindex_axis( - value.index.copy(), axis=1, fill_value=np.nan - ) - - def _set_item(self, key, value): - """ - Add series to DataFrame in specified column. - - If series is a numpy-array (not a Series/TimeSeries), it must be the - same length as the DataFrames index or an error will be thrown. - - Series/TimeSeries will be conformed to the DataFrames index to - ensure homogeneity. - """ - - self._ensure_valid_index(value) - value = self._sanitize_column(key, value) - NDFrame._set_item(self, key, value) - - # check if we are modifying a copy - # try to set first as we want an invalid - # value exception to occur first - if len(self): - self._check_setitem_copy() - def insert(self, loc, column, value, allow_duplicates=False): """ Insert column into DataFrame at specified location. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9053edf2d1424..db54c3006a6a2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3231,41 +3231,8 @@ def _create_indexer(cls, name, indexer): _indexer = functools.partial(indexer, name) setattr(cls, name, property(_indexer, doc=indexer.__doc__)) - def get(self, key, default=None): - """ - Get item from object for given key (ex: DataFrame column). - - Returns default value if not found. - - Parameters - ---------- - key : object - - Returns - ------- - value : same type as items contained in object - """ - try: - return self[key] - except (KeyError, ValueError, IndexError): - return default - - def __getitem__(self, item): - return self._get_item_cache(item) - - def _get_item_cache(self, item): - """Return the cached item, item represents a label indexer.""" - cache = self._item_cache - res = cache.get(item) - if res is None: - values = self._data.get(item) - res = self._box_item_values(item, values) - cache[item] = res - res._set_as_cached(item, self) - - # for a chain - res._is_copy = self._is_copy - return res + # ---------------------------------------------------------------------- + # Lookup Caching def _set_as_cached(self, item, cacher): """Set the _cacher attribute on the calling object with a weakref to @@ -3278,18 +3245,6 @@ def _reset_cacher(self): if hasattr(self, "_cacher"): del self._cacher - def _iget_item_cache(self, item): - """Return the cached item, item represents a positional indexer.""" - ax = self._info_axis - if ax.is_unique: - lower = self._get_item_cache(ax[item]) - else: - lower = self.take(item, axis=self._info_axis_number) - return lower - - def _box_item_values(self, key, values): - raise AbstractMethodError(self) - def _maybe_cache_changed(self, item, value): """The object has called back to us saying maybe it has changed. """ @@ -3307,11 +3262,6 @@ def _get_cacher(self): cacher = cacher[1]() return cacher - @property - def _is_view(self): - """Return boolean indicating if self is view of another array """ - return self._data.is_view - def _maybe_update_cacher(self, clear=False, verify_is_copy=True): """ See if we need to update our parent cacher if clear, then clear our @@ -3352,165 +3302,8 @@ def _clear_item_cache(self, i=None): else: self._item_cache.clear() - def _slice(self, slobj, axis=0, kind=None): - """ - Construct a slice of this container. - - kind parameter is maintained for compatibility with Series slicing. - """ - axis = self._get_block_manager_axis(axis) - result = self._constructor(self._data.get_slice(slobj, axis=axis)) - result = result.__finalize__(self) - - # this could be a view - # but only in a single-dtyped view sliceable case - is_copy = axis != 0 or result._is_view - result._set_is_copy(self, copy=is_copy) - return result - - def _set_item(self, key, value): - self._data.set(key, value) - self._clear_item_cache() - - def _set_is_copy(self, ref=None, copy=True): - if not copy: - self._is_copy = None - else: - if ref is not None: - self._is_copy = weakref.ref(ref) - else: - self._is_copy = None - - def _check_is_chained_assignment_possible(self): - """ - Check if we are a view, have a cacher, and are of mixed type. - If so, then force a setitem_copy check. - - Should be called just near setting a value - - Will return a boolean if it we are a view and are cached, but a - single-dtype meaning that the cacher should be updated following - setting. - """ - if self._is_view and self._is_cached: - ref = self._get_cacher() - if ref is not None and ref._is_mixed_type: - self._check_setitem_copy(stacklevel=4, t="referant", force=True) - return True - elif self._is_copy: - self._check_setitem_copy(stacklevel=4, t="referant") - return False - - def _check_setitem_copy(self, stacklevel=4, t="setting", force=False): - """ - - Parameters - ---------- - stacklevel : integer, default 4 - the level to show of the stack when the error is output - t : string, the type of setting error - force : boolean, default False - if True, then force showing an error - - validate if we are doing a setitem on a chained copy. - - If you call this function, be sure to set the stacklevel such that the - user will see the error *at the level of setting* - - It is technically possible to figure out that we are setting on - a copy even WITH a multi-dtyped pandas object. In other words, some - blocks may be views while other are not. Currently _is_view will ALWAYS - return False for multi-blocks to avoid having to handle this case. - - df = DataFrame(np.arange(0,9), columns=['count']) - df['group'] = 'b' - - # This technically need not raise SettingWithCopy if both are view - # (which is not # generally guaranteed but is usually True. However, - # this is in general not a good practice and we recommend using .loc. - df.iloc[0:5]['group'] = 'a' - - """ - - # return early if the check is not needed - if not (force or self._is_copy): - return - - value = config.get_option("mode.chained_assignment") - if value is None: - return - - # see if the copy is not actually referred; if so, then dissolve - # the copy weakref - if self._is_copy is not None and not isinstance(self._is_copy, str): - r = self._is_copy() - if not gc.get_referents(r) or r.shape == self.shape: - self._is_copy = None - return - - # a custom message - if isinstance(self._is_copy, str): - t = self._is_copy - - elif t == "referant": - t = ( - "\n" - "A value is trying to be set on a copy of a slice from a " - "DataFrame\n\n" - "See the caveats in the documentation: " - "http://pandas.pydata.org/pandas-docs/stable/user_guide/" - "indexing.html#returning-a-view-versus-a-copy" - ) - - else: - t = ( - "\n" - "A value is trying to be set on a copy of a slice from a " - "DataFrame.\n" - "Try using .loc[row_indexer,col_indexer] = value " - "instead\n\nSee the caveats in the documentation: " - "http://pandas.pydata.org/pandas-docs/stable/user_guide/" - "indexing.html#returning-a-view-versus-a-copy" - ) - - if value == "raise": - raise com.SettingWithCopyError(t) - elif value == "warn": - warnings.warn(t, com.SettingWithCopyWarning, stacklevel=stacklevel) - - def __delitem__(self, key): - """ - Delete item - """ - deleted = False - - maybe_shortcut = False - if self.ndim == 2 and isinstance(self.columns, MultiIndex): - try: - maybe_shortcut = key not in self.columns._engine - except TypeError: - pass - - if maybe_shortcut: - # Allow shorthand to delete all columns whose first len(key) - # elements match key: - if not isinstance(key, tuple): - key = (key,) - for col in self.columns: - if isinstance(col, tuple) and col[: len(key)] == key: - del self[col] - deleted = True - if not deleted: - # If the above loop ran and didn't delete anything because - # there was no match, this call should raise the appropriate - # exception: - self._data.delete(key) - - # delete from the caches - try: - del self._item_cache[key] - except KeyError: - pass + # ---------------------------------------------------------------------- + # Indexing Methods def take(self, indices, axis=0, is_copy=True, **kwargs): """ @@ -3766,6 +3559,222 @@ class animal locomotion _xs = xs # type: Callable + def get(self, key, default=None): + """ + Get item from object for given key (ex: DataFrame column). + + Returns default value if not found. + + Parameters + ---------- + key : object + + Returns + ------- + value : same type as items contained in object + """ + try: + return self[key] + except (KeyError, ValueError, IndexError): + return default + + def __getitem__(self, item): + return self._get_item_cache(item) + + def _get_item_cache(self, item): + """Return the cached item, item represents a label indexer.""" + cache = self._item_cache + res = cache.get(item) + if res is None: + values = self._data.get(item) + res = self._box_item_values(item, values) + cache[item] = res + res._set_as_cached(item, self) + + # for a chain + res._is_copy = self._is_copy + return res + + def _iget_item_cache(self, item): + """Return the cached item, item represents a positional indexer.""" + ax = self._info_axis + if ax.is_unique: + lower = self._get_item_cache(ax[item]) + else: + lower = self.take(item, axis=self._info_axis_number) + return lower + + def _box_item_values(self, key, values): + raise AbstractMethodError(self) + + def _slice(self, slobj, axis=0, kind=None): + """ + Construct a slice of this container. + + kind parameter is maintained for compatibility with Series slicing. + """ + axis = self._get_block_manager_axis(axis) + result = self._constructor(self._data.get_slice(slobj, axis=axis)) + result = result.__finalize__(self) + + # this could be a view + # but only in a single-dtyped view sliceable case + is_copy = axis != 0 or result._is_view + result._set_is_copy(self, copy=is_copy) + return result + + def _set_item(self, key, value): + self._data.set(key, value) + self._clear_item_cache() + + def _set_is_copy(self, ref=None, copy=True): + if not copy: + self._is_copy = None + else: + if ref is not None: + self._is_copy = weakref.ref(ref) + else: + self._is_copy = None + + def _check_is_chained_assignment_possible(self): + """ + Check if we are a view, have a cacher, and are of mixed type. + If so, then force a setitem_copy check. + + Should be called just near setting a value + + Will return a boolean if it we are a view and are cached, but a + single-dtype meaning that the cacher should be updated following + setting. + """ + if self._is_view and self._is_cached: + ref = self._get_cacher() + if ref is not None and ref._is_mixed_type: + self._check_setitem_copy(stacklevel=4, t="referant", force=True) + return True + elif self._is_copy: + self._check_setitem_copy(stacklevel=4, t="referant") + return False + + def _check_setitem_copy(self, stacklevel=4, t="setting", force=False): + """ + + Parameters + ---------- + stacklevel : integer, default 4 + the level to show of the stack when the error is output + t : string, the type of setting error + force : boolean, default False + if True, then force showing an error + + validate if we are doing a setitem on a chained copy. + + If you call this function, be sure to set the stacklevel such that the + user will see the error *at the level of setting* + + It is technically possible to figure out that we are setting on + a copy even WITH a multi-dtyped pandas object. In other words, some + blocks may be views while other are not. Currently _is_view will ALWAYS + return False for multi-blocks to avoid having to handle this case. + + df = DataFrame(np.arange(0,9), columns=['count']) + df['group'] = 'b' + + # This technically need not raise SettingWithCopy if both are view + # (which is not # generally guaranteed but is usually True. However, + # this is in general not a good practice and we recommend using .loc. + df.iloc[0:5]['group'] = 'a' + + """ + + # return early if the check is not needed + if not (force or self._is_copy): + return + + value = config.get_option("mode.chained_assignment") + if value is None: + return + + # see if the copy is not actually referred; if so, then dissolve + # the copy weakref + if self._is_copy is not None and not isinstance(self._is_copy, str): + r = self._is_copy() + if not gc.get_referents(r) or r.shape == self.shape: + self._is_copy = None + return + + # a custom message + if isinstance(self._is_copy, str): + t = self._is_copy + + elif t == "referant": + t = ( + "\n" + "A value is trying to be set on a copy of a slice from a " + "DataFrame\n\n" + "See the caveats in the documentation: " + "http://pandas.pydata.org/pandas-docs/stable/user_guide/" + "indexing.html#returning-a-view-versus-a-copy" + ) + + else: + t = ( + "\n" + "A value is trying to be set on a copy of a slice from a " + "DataFrame.\n" + "Try using .loc[row_indexer,col_indexer] = value " + "instead\n\nSee the caveats in the documentation: " + "http://pandas.pydata.org/pandas-docs/stable/user_guide/" + "indexing.html#returning-a-view-versus-a-copy" + ) + + if value == "raise": + raise com.SettingWithCopyError(t) + elif value == "warn": + warnings.warn(t, com.SettingWithCopyWarning, stacklevel=stacklevel) + + def __delitem__(self, key): + """ + Delete item + """ + deleted = False + + maybe_shortcut = False + if self.ndim == 2 and isinstance(self.columns, MultiIndex): + try: + maybe_shortcut = key not in self.columns._engine + except TypeError: + pass + + if maybe_shortcut: + # Allow shorthand to delete all columns whose first len(key) + # elements match key: + if not isinstance(key, tuple): + key = (key,) + for col in self.columns: + if isinstance(col, tuple) and col[: len(key)] == key: + del self[col] + deleted = True + if not deleted: + # If the above loop ran and didn't delete anything because + # there was no match, this call should raise the appropriate + # exception: + self._data.delete(key) + + # delete from the caches + try: + del self._item_cache[key] + except KeyError: + pass + + # ---------------------------------------------------------------------- + # Unsorted + + @property + def _is_view(self): + """Return boolean indicating if self is view of another array """ + return self._data.is_view + def reindex_like(self, other, method=None, copy=True, limit=None, tolerance=None): """ Return an object with matching indices as other object. diff --git a/pandas/core/series.py b/pandas/core/series.py index 418b3fc8c57d0..d7c486ccefe1b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1030,6 +1030,36 @@ def axes(self): """ return [self.index] + # ---------------------------------------------------------------------- + # Indexing Methods + + @Appender(generic.NDFrame.take.__doc__) + def take(self, indices, axis=0, is_copy=False, **kwargs): + nv.validate_take(tuple(), kwargs) + + indices = ensure_platform_int(indices) + new_index = self.index.take(indices) + + if is_categorical_dtype(self): + # https://github.com/pandas-dev/pandas/issues/20664 + # TODO: remove when the default Categorical.take behavior changes + indices = maybe_convert_indices(indices, len(self._get_axis(axis))) + kwargs = {"allow_fill": False} + else: + kwargs = {} + new_values = self._values.take(indices, **kwargs) + + result = self._constructor( + new_values, index=new_index, fastpath=True + ).__finalize__(self) + + # Maybe set copy if we didn't actually change the index. + if is_copy: + if not result._get_axis(axis).equals(self._get_axis(axis)): + result._set_is_copy(self) + + return result + def _ixs(self, i: int, axis: int = 0): """ Return the i-th value or values in the Series by location. @@ -1050,10 +1080,6 @@ def _ixs(self, i: int, axis: int = 0): else: return values[i] - @property - def _is_mixed_type(self): - return False - def _slice(self, slobj, axis=0, kind=None): slobj = self.index._convert_slice_indexer(slobj, kind=kind or "getitem") return self._get_values(slobj) @@ -1178,6 +1204,23 @@ def _get_values(self, indexer): except Exception: return self._values[indexer] + def _get_value(self, label, takeable: bool = False): + """ + Quickly retrieve single value at passed index label. + + Parameters + ---------- + label : object + takeable : interpret the index as indexers, default False + + Returns + ------- + scalar value + """ + if takeable: + return com.maybe_box_datetimelike(self._values[label]) + return self.index.get_value(self._values, label) + def __setitem__(self, key, value): key = com.apply_if_callable(key, self) @@ -1310,6 +1353,46 @@ def _set_values(self, key, value): self._data = self._data.setitem(indexer=key, value=value) self._maybe_update_cacher() + def _set_value(self, label, value, takeable: bool = False): + """ + Quickly set single value at passed label. + + If label is not contained, a new object is created with the label + placed at the end of the result index. + + Parameters + ---------- + label : object + Partial indexing with MultiIndex not allowed + value : object + Scalar value + takeable : interpret the index as indexers, default False + + Returns + ------- + Series + If label is contained, will be reference to calling Series, + otherwise a new object. + """ + try: + if takeable: + self._values[label] = value + else: + self.index._engine.set_value(self._values, label, value) + except (KeyError, TypeError): + + # set using a non-recursive method + self.loc[label] = value + + return self + + # ---------------------------------------------------------------------- + # Unsorted + + @property + def _is_mixed_type(self): + return False + def repeat(self, repeats, axis=None): """ Repeat elements of a Series. @@ -1367,56 +1450,6 @@ def repeat(self, repeats, axis=None): new_values = self._values.repeat(repeats) return self._constructor(new_values, index=new_index).__finalize__(self) - def _get_value(self, label, takeable: bool = False): - """ - Quickly retrieve single value at passed index label. - - Parameters - ---------- - label : object - takeable : interpret the index as indexers, default False - - Returns - ------- - scalar value - """ - if takeable: - return com.maybe_box_datetimelike(self._values[label]) - return self.index.get_value(self._values, label) - - def _set_value(self, label, value, takeable: bool = False): - """ - Quickly set single value at passed label. - - If label is not contained, a new object is created with the label - placed at the end of the result index. - - Parameters - ---------- - label : object - Partial indexing with MultiIndex not allowed - value : object - Scalar value - takeable : interpret the index as indexers, default False - - Returns - ------- - Series - If label is contained, will be reference to calling Series, - otherwise a new object. - """ - try: - if takeable: - self._values[label] = value - else: - self.index._engine.set_value(self._values, label, value) - except (KeyError, TypeError): - - # set using a non-recursive method - self.loc[label] = value - - return self - def reset_index(self, level=None, drop=False, name=None, inplace=False): """ Generate a new DataFrame or Series with the index reset. @@ -4384,33 +4417,6 @@ def memory_usage(self, index=True, deep=False): v += self.index.memory_usage(deep=deep) return v - @Appender(generic.NDFrame.take.__doc__) - def take(self, indices, axis=0, is_copy=False, **kwargs): - nv.validate_take(tuple(), kwargs) - - indices = ensure_platform_int(indices) - new_index = self.index.take(indices) - - if is_categorical_dtype(self): - # https://github.com/pandas-dev/pandas/issues/20664 - # TODO: remove when the default Categorical.take behavior changes - indices = maybe_convert_indices(indices, len(self._get_axis(axis))) - kwargs = {"allow_fill": False} - else: - kwargs = {} - new_values = self._values.take(indices, **kwargs) - - result = self._constructor( - new_values, index=new_index, fastpath=True - ).__finalize__(self) - - # Maybe set copy if we didn't actually change the index. - if is_copy: - if not result._get_axis(axis).equals(self._get_axis(axis)): - result._set_is_copy(self) - - return result - def isin(self, values): """ Check whether `values` are contained in Series. diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 2dbf4807cf144..3e44a7f941a86 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -447,6 +447,9 @@ def sp_maker(x, index=None): # always return a SparseArray! return clean + # ---------------------------------------------------------------------- + # Indexing Methods + def _get_value(self, index, col, takeable=False): """ Quickly retrieve single value at passed column and index @@ -470,34 +473,6 @@ def _get_value(self, index, col, takeable=False): return series._get_value(index, takeable=takeable) - def _set_value(self, index, col, value, takeable=False): - """ - Put single value at passed column and index - - Please use .at[] or .iat[] accessors. - - Parameters - ---------- - index : row label - col : column label - value : scalar value - takeable : interpret the index/col as indexers, default False - - Notes - ----- - This method *always* returns a new object. It is currently not - particularly efficient (and potentially very expensive) but is provided - for API compatibility with DataFrame - - Returns - ------- - frame : DataFrame - """ - dense = self.to_dense()._set_value(index, col, value, takeable=takeable) - return dense.to_sparse( - kind=self._default_kind, fill_value=self._default_fill_value - ) - def _slice(self, slobj, axis=0, kind=None): if axis == 0: new_index = self.index[slobj] @@ -529,6 +504,34 @@ def xs(self, key, axis=0, copy=False): data = self.take([i])._internal_get_values()[0] return Series(data, index=self.columns) + def _set_value(self, index, col, value, takeable=False): + """ + Put single value at passed column and index + + Please use .at[] or .iat[] accessors. + + Parameters + ---------- + index : row label + col : column label + value : scalar value + takeable : interpret the index/col as indexers, default False + + Notes + ----- + This method *always* returns a new object. It is currently not + particularly efficient (and potentially very expensive) but is provided + for API compatibility with DataFrame + + Returns + ------- + frame : DataFrame + """ + dense = self.to_dense()._set_value(index, col, value, takeable=takeable) + return dense.to_sparse( + kind=self._default_kind, fill_value=self._default_fill_value + ) + # ---------------------------------------------------------------------- # Arithmetic-related methods diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index f5d39c47150a2..1ebee1995bc29 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -310,6 +310,9 @@ def _set_subtyp(self, is_all_dates): else: object.__setattr__(self, "_subtyp", "sparse_series") + # ---------------------------------------------------------------------- + # Indexing Methods + def _ixs(self, i, axis=0): """ Return the i-th value or values in the SparseSeries by location @@ -340,52 +343,6 @@ def __getitem__(self, key): else: return super().__getitem__(key) - def _get_values(self, indexer): - try: - return self._constructor( - self._data.get_slice(indexer), fastpath=True - ).__finalize__(self) - except Exception: - return self[indexer] - - def _set_with_engine(self, key, value): - return self._set_value(key, value) - - def abs(self): - """ - Return an object with absolute value taken. Only applicable to objects - that are all numeric - - Returns - ------- - abs: same type as caller - """ - return self._constructor(np.abs(self.values), index=self.index).__finalize__( - self - ) - - def get(self, label, default=None): - """ - Returns value occupying requested label, default to specified - missing value if not present. Analogous to dict.get - - Parameters - ---------- - label : object - Label value looking for - default : object, optional - Value to return if label not in index - - Returns - ------- - y : scalar - """ - if label in self.index: - loc = self.index.get_loc(label) - return self._get_val_at(loc) - else: - return default - def _get_value(self, label, takeable=False): """ Retrieve single value at passed index label @@ -404,6 +361,17 @@ def _get_value(self, label, takeable=False): loc = label if takeable is True else self.index.get_loc(label) return self._get_val_at(loc) + def _get_values(self, indexer): + try: + return self._constructor( + self._data.get_slice(indexer), fastpath=True + ).__finalize__(self) + except Exception: + return self[indexer] + + def _set_with_engine(self, key, value): + return self._set_value(key, value) + def _set_value(self, label, value, takeable=False): """ Quickly set single value at passed label. If label is not contained, a @@ -457,6 +425,44 @@ def _set_values(self, key, value): values = SparseArray(values, fill_value=self.fill_value, kind=self.kind) self._data = SingleBlockManager(values, self.index) + # ---------------------------------------------------------------------- + # Unsorted + + def abs(self): + """ + Return an object with absolute value taken. Only applicable to objects + that are all numeric + + Returns + ------- + abs: same type as caller + """ + return self._constructor(np.abs(self.values), index=self.index).__finalize__( + self + ) + + def get(self, label, default=None): + """ + Returns value occupying requested label, default to specified + missing value if not present. Analogous to dict.get + + Parameters + ---------- + label : object + Label value looking for + default : object, optional + Value to return if label not in index + + Returns + ------- + y : scalar + """ + if label in self.index: + loc = self.index.get_loc(label) + return self._get_val_at(loc) + else: + return default + def to_dense(self): """ Convert SparseSeries to a Series.