Skip to content

REF: de-duplicate NDFrame.take, remove Manager.take keyword #51482

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 13 additions & 18 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3816,6 +3816,7 @@ def _clear_item_cache(self) -> None:
# ----------------------------------------------------------------------
# Indexing Methods

@final
def take(self: NDFrameT, indices, axis: Axis = 0, **kwargs) -> NDFrameT:
"""
Return the elements in the given *positional* indices along an axis.
Expand Down Expand Up @@ -3893,20 +3894,6 @@ class max_speed

nv.validate_take((), kwargs)

return self._take(indices, axis)

@final
def _take(
self: NDFrameT,
indices,
axis: Axis = 0,
convert_indices: bool_t = True,
) -> NDFrameT:
"""
Internal version of the `take` allowing specification of additional args.

See the docstring of `take` for full explanation of the parameters.
"""
if not isinstance(indices, slice):
indices = np.asarray(indices, dtype=np.intp)
if (
Expand All @@ -3916,8 +3903,14 @@ def _take(
and is_range_indexer(indices, len(self))
):
return self.copy(deep=None)
elif self.ndim == 1:
# TODO: be consistent here for DataFrame vs Series
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@phofl following this ill do do the deprecation we discussed

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good!

raise TypeError(
f"{type(self).__name__}.take requires a sequence of integers, "
"not slice."
)
else:
# We can get here with a slice via DataFrame.__geittem__
# We can get here with a slice via DataFrame.__getitem__
indices = np.arange(
indices.start, indices.stop, indices.step, dtype=np.intp
)
Expand All @@ -3926,21 +3919,23 @@ def _take(
indices,
axis=self._get_block_manager_axis(axis),
verify=True,
convert_indices=convert_indices,
)
return self._constructor(new_data).__finalize__(self, method="take")

@final
def _take_with_is_copy(self: NDFrameT, indices, axis: Axis = 0) -> NDFrameT:
"""
Internal version of the `take` method that sets the `_is_copy`
attribute to keep track of the parent dataframe (using in indexing
for the SettingWithCopyWarning).

For Series this does the same as the public take (it never sets `_is_copy`).

See the docstring of `take` for full explanation of the parameters.
"""
result = self._take(indices=indices, axis=axis)
result = self.take(indices=indices, axis=axis)
# Maybe set copy if we didn't actually change the index.
if not result._get_axis(axis).equals(self._get_axis(axis)):
if self.ndim == 2 and not result._get_axis(axis).equals(self._get_axis(axis)):
result._set_is_copy(self)
return result

Expand Down
5 changes: 4 additions & 1 deletion pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1572,7 +1572,10 @@ def _wrap_transform_fast_result(self, result: NDFrameT) -> NDFrameT:
# GH#46209
# Don't convert indices: negative indices need to give rise
# to null values in the result
output = result._take(ids, axis=axis, convert_indices=False)
new_ax = result.axes[axis].take(ids)
output = result._reindex_with_indexers(
{axis: (new_ax, ids)}, allow_dups=True, copy=False
)
output = output.set_axis(obj._get_axis(self.axis), axis=axis)
return output

Expand Down
4 changes: 1 addition & 3 deletions pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -633,7 +633,6 @@ def take(
indexer: npt.NDArray[np.intp],
axis: AxisInt = 1,
verify: bool = True,
convert_indices: bool = True,
) -> T:
"""
Take items along any axis.
Expand All @@ -647,8 +646,7 @@ def take(
raise ValueError("indexer should be 1-dimensional")

n = self.shape_proper[axis]
if convert_indices:
indexer = maybe_convert_indices(indexer, n, verify=verify)
indexer = maybe_convert_indices(indexer, n, verify=verify)

new_labels = self._axes[axis].take(indexer)
return self._reindex_indexer(
Expand Down
6 changes: 1 addition & 5 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -910,7 +910,6 @@ def take(
indexer: npt.NDArray[np.intp],
axis: AxisInt = 1,
verify: bool = True,
convert_indices: bool = True,
) -> T:
"""
Take items along any axis.
Expand All @@ -920,8 +919,6 @@ def take(
verify : bool, default True
Check that all entries are between 0 and len(self) - 1, inclusive.
Pass verify=False if this check has been done by the caller.
convert_indices : bool, default True
Whether to attempt to convert indices to positive values.

Returns
-------
Expand All @@ -931,8 +928,7 @@ def take(
assert indexer.dtype == np.intp, indexer.dtype

n = self.shape[axis]
if convert_indices:
indexer = maybe_convert_indices(indexer, n, verify=verify)
indexer = maybe_convert_indices(indexer, n, verify=verify)

new_labels = self.axes[axis].take(indexer)
return self.reindex_indexer(
Expand Down
31 changes: 0 additions & 31 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@
maybe_cast_pointwise_result,
)
from pandas.core.dtypes.common import (
ensure_platform_int,
is_dict_like,
is_extension_array_dtype,
is_integer,
Expand Down Expand Up @@ -908,36 +907,6 @@ def axes(self) -> list[Index]:
# ----------------------------------------------------------------------
# Indexing Methods

@Appender(NDFrame.take.__doc__)
def take(self, indices, axis: Axis = 0, **kwargs) -> Series:
nv.validate_take((), kwargs)

indices = ensure_platform_int(indices)

if (
indices.ndim == 1
and using_copy_on_write()
and is_range_indexer(indices, len(self))
):
return self.copy(deep=None)

new_index = self.index.take(indices)
new_values = self._values.take(indices)

result = self._constructor(new_values, index=new_index, fastpath=True)
return result.__finalize__(self, method="take")

def _take_with_is_copy(self, indices, axis: Axis = 0) -> Series:
"""
Internal version of the `take` method that sets the `_is_copy`
attribute to keep track of the parent dataframe (using in indexing
for the SettingWithCopyWarning). For Series this does the same
as the public take (it never sets `_is_copy`).

See the docstring of `take` for full explanation of the parameters.
"""
return self.take(indices=indices, axis=axis)

def _ixs(self, i: int, axis: AxisInt = 0) -> Any:
"""
Return the i-th value or values in the Series by location.
Expand Down
14 changes: 11 additions & 3 deletions pandas/tests/series/indexing/test_take.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ def test_take():
expected = Series([4, 2, 4], index=[4, 3, 4])
tm.assert_series_equal(actual, expected)

msg = lambda x: f"index {x} is out of bounds for( axis 0 with)? size 5"
with pytest.raises(IndexError, match=msg(10)):
msg = "indices are out-of-bounds"
with pytest.raises(IndexError, match=msg):
ser.take([1, 10])
with pytest.raises(IndexError, match=msg(5)):
with pytest.raises(IndexError, match=msg):
ser.take([2, 5])


Expand All @@ -31,3 +31,11 @@ def test_take_categorical():
pd.Categorical(["b", "b", "a"], categories=["a", "b", "c"]), index=[1, 1, 0]
)
tm.assert_series_equal(result, expected)


def test_take_slice_raises():
ser = Series([-1, 5, 6, 2, 4])

msg = "Series.take requires a sequence of integers, not slice"
with pytest.raises(TypeError, match=msg):
ser.take(slice(0, 3, 1))