diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 55a9eb6a0810a..263c4013de281 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2937,7 +2937,7 @@ def _ixs(self, i, axis=0): return self.loc[:, lab_slice] else: if isinstance(label, Index): - return self._take(i, axis=1) + return self.take(i, axis=1) index_len = len(self.index) @@ -2999,7 +2999,7 @@ def __getitem__(self, key): if getattr(indexer, "dtype", None) == bool: indexer = np.where(indexer)[0] - data = self._take(indexer, axis=1) + data = self.take(indexer, axis=1) if is_single_key: # What does looking for a single key in a non-unique index return? @@ -3032,7 +3032,7 @@ def _getitem_bool_array(self, key): # be reindexed to match DataFrame rows key = check_bool_indexer(self.index, key) indexer = key.nonzero()[0] - return self._take(indexer, axis=0) + return self.take(indexer, axis=0) def _getitem_multilevel(self, key): loc = self.columns.get_loc(key) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0e2253aed1c88..e19b1f70ce2f7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3294,7 +3294,7 @@ def _iget_item_cache(self, item): if ax.is_unique: lower = self._get_item_cache(ax[item]) else: - lower = self._take(item, axis=self._info_axis_number) + lower = self.take(item, axis=self._info_axis_number) return lower def _box_item_values(self, key, values): @@ -3522,52 +3522,6 @@ def __delitem__(self, key): except KeyError: pass - def _take(self, indices, axis=0, is_copy=True): - """ - Return the elements in the given *positional* indices along an axis. - - This means that we are not indexing according to actual values in - the index attribute of the object. We are indexing according to the - actual position of the element in the object. - - This is the internal version of ``.take()`` and will contain a wider - selection of parameters useful for internal use but not as suitable - for public usage. - - Parameters - ---------- - indices : array-like - An array of ints indicating which positions to take. - axis : int, default 0 - The axis on which to select elements. "0" means that we are - selecting rows, "1" means that we are selecting columns, etc. - is_copy : bool, default True - Whether to return a copy of the original object or not. - - Returns - ------- - taken : same type as caller - An array-like containing the elements taken from the object. - - See Also - -------- - numpy.ndarray.take - numpy.take - """ - self._consolidate_inplace() - - new_data = self._data.take( - indices, axis=self._get_block_manager_axis(axis), verify=True - ) - result = self._constructor(new_data).__finalize__(self) - - # Maybe set copy if we didn't actually change the index. - if is_copy: - if not result._get_axis(axis).equals(self._get_axis(axis)): - result._set_is_copy(self) - - return result - def take(self, indices, axis=0, is_copy=True, **kwargs): """ Return the elements in the given *positional* indices along an axis. @@ -3644,7 +3598,20 @@ class max_speed 3 lion mammal 80.5 """ nv.validate_take(tuple(), kwargs) - return self._take(indices, axis=axis, is_copy=is_copy) + + self._consolidate_inplace() + + new_data = self._data.take( + indices, axis=self._get_block_manager_axis(axis), verify=True + ) + result = self._constructor(new_data).__finalize__(self) + + # Maybe set copy if we didn't actually change the index. + if is_copy: + if not result._get_axis(axis).equals(self._get_axis(axis)): + result._set_is_copy(self) + + return result def xs(self, key, axis=0, level=None, drop_level=True): """ @@ -3773,9 +3740,9 @@ class animal locomotion if isinstance(loc, np.ndarray): if loc.dtype == np.bool_: inds, = loc.nonzero() - return self._take(inds, axis=axis) + return self.take(inds, axis=axis) else: - return self._take(loc, axis=axis) + return self.take(loc, axis=axis) if not is_scalar(loc): new_index = self.index[loc] @@ -8091,7 +8058,7 @@ def at_time(self, time, asof=False, axis=None): except AttributeError: raise TypeError("Index must be DatetimeIndex") - return self._take(indexer, axis=axis) + return self.take(indexer, axis=axis) def between_time( self, start_time, end_time, include_start=True, include_end=True, axis=None @@ -8168,7 +8135,7 @@ def between_time( except AttributeError: raise TypeError("Index must be DatetimeIndex") - return self._take(indexer, axis=axis) + return self.take(indexer, axis=axis) def resample( self, diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index aa71fd68086fb..9aba9723e0546 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -679,7 +679,7 @@ def get_group(self, name, obj=None): if not len(inds): raise KeyError(name) - return obj._take(inds, axis=self.axis) + return obj.take(inds, axis=self.axis) def __iter__(self): """ diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 3cf358261e685..a127d092b7b1a 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -194,7 +194,7 @@ def _set_grouper(self, obj, sort=False): # use stable sort to support first, last, nth indexer = self.indexer = ax.argsort(kind="mergesort") ax = ax.take(indexer) - obj = obj._take(indexer, axis=self.axis, is_copy=False) + obj = obj.take(indexer, axis=self.axis, is_copy=False) self.obj = obj self.grouper = ax diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 33341a489866b..e341a66bb7459 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -675,7 +675,7 @@ def _aggregate_series_fast(self, obj, func): # avoids object / Series creation overhead dummy = obj._get_values(slice(None, 0)) indexer = get_group_index_sorter(group_index, ngroups) - obj = obj._take(indexer) + obj = obj.take(indexer) group_index = algorithms.take_nd(group_index, indexer, allow_fill=False) grouper = reduction.SeriesGrouper(obj, func, group_index, ngroups, dummy) result, counts = grouper.get_result() @@ -915,7 +915,7 @@ def __iter__(self): yield i, self._chop(sdata, slice(start, end)) def _get_sorted_data(self): - return self.data._take(self.sort_idx, axis=self.axis) + return self.data.take(self.sort_idx, axis=self.axis) def _chop(self, sdata, slice_obj): return sdata.iloc[slice_obj] diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 01f338a021cec..6040385acbe40 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1137,7 +1137,7 @@ def _getitem_iterable(self, key, axis: int): # A boolean indexer key = check_bool_indexer(labels, key) inds, = key.nonzero() - return self.obj._take(inds, axis=axis) + return self.obj.take(inds, axis=axis) else: # A collection of keys keyarr, indexer = self._get_listlike_indexer(key, axis, raise_missing=False) @@ -1448,7 +1448,7 @@ def _getbool_axis(self, key, axis: int): key = check_bool_indexer(labels, key) inds, = key.nonzero() try: - return self.obj._take(inds, axis=axis) + return self.obj.take(inds, axis=axis) except Exception as detail: raise self._exception(detail) @@ -1469,7 +1469,7 @@ def _get_slice_axis(self, slice_obj: slice, axis: int): else: # DatetimeIndex overrides Index.slice_indexer and may # return a DatetimeIndex instead of a slice object. - return self.obj._take(indexer, axis=axis) + return self.obj.take(indexer, axis=axis) class _LocIndexer(_LocationIndexer): @@ -2138,7 +2138,7 @@ def _get_list_axis(self, key, axis: int): Series object """ try: - return self.obj._take(key, axis=axis) + return self.obj.take(key, axis=axis) except IndexError: # re-raise with different error message raise IndexError("positional indexers are out-of-bounds") diff --git a/pandas/core/series.py b/pandas/core/series.py index acb0826953508..73a71a2a41f4c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4371,8 +4371,9 @@ def memory_usage(self, index=True, deep=False): v += self.index.memory_usage(deep=deep) return v - @Appender(generic.NDFrame._take.__doc__) - def _take(self, indices, axis=0, is_copy=False): + @Appender(generic.NDFrame.take.__doc__) + def take(self, indices, axis=0, is_copy=False, **kwargs): + nv.validate_take(tuple(), kwargs) indices = ensure_platform_int(indices) new_index = self.index.take(indices)