Skip to content

REF: stricter typing in Manager.take #51478

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3895,6 +3895,7 @@ class max_speed

return self._take(indices, axis)

@final
def _take(
self: NDFrameT,
indices,
Expand All @@ -3915,6 +3916,11 @@ def _take(
and is_range_indexer(indices, len(self))
):
return self.copy(deep=None)
else:
# We can get here with a slice via DataFrame.__geittem__
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typo in getitem

Is this a column or row-based slice?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

column

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mind if i add the typo fix to my next Assorted CLN branch?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

id like to add a check for slice in __getitem__ before we get here to return self._slice(indexer) instead, but that may (probably should) cause us to return a view instead of copy.

Once that is changed, I'd like to deprecate allowing slice in DataFrame.take (which we don't allow in Series.take). It isn't documented, but still.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah fine by me.

Take returns a view for CoW even if it’s not a slice iirc. deprecating sounds good though.

indices = np.arange(
indices.start, indices.stop, indices.step, dtype=np.intp
)

new_data = self._mgr.take(
indices,
Expand Down
11 changes: 4 additions & 7 deletions pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -630,21 +630,18 @@ def _reindex_indexer(

def take(
self: T,
indexer,
indexer: npt.NDArray[np.intp],
axis: AxisInt = 1,
verify: bool = True,
convert_indices: bool = True,
) -> T:
"""
Take items along any axis.
"""
axis = self._normalize_axis(axis)
assert isinstance(indexer, np.ndarray), type(indexer)
assert indexer.dtype == np.intp, indexer.dtype

indexer = (
np.arange(indexer.start, indexer.stop, indexer.step, dtype="int64")
if isinstance(indexer, slice)
else np.asanyarray(indexer, dtype="int64")
)
axis = self._normalize_axis(axis)

if not indexer.ndim == 1:
raise ValueError("indexer should be 1-dimensional")
Expand Down
12 changes: 4 additions & 8 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -897,15 +897,15 @@ def _make_na_block(

def take(
self: T,
indexer,
indexer: npt.NDArray[np.intp],
axis: AxisInt = 1,
verify: bool = True,
convert_indices: bool = True,
) -> T:
"""
Take items along any axis.

indexer : np.ndarray or slice
indexer : np.ndarray[np.intp]
axis : int, default 1
verify : bool, default True
Check that all entries are between 0 and len(self) - 1, inclusive.
Expand All @@ -917,12 +917,8 @@ def take(
-------
BlockManager
"""
# We have 6 tests that get here with a slice
indexer = (
np.arange(indexer.start, indexer.stop, indexer.step, dtype=np.intp)
if isinstance(indexer, slice)
else np.asanyarray(indexer, dtype=np.intp)
)
assert isinstance(indexer, np.ndarray), type(indexer)
assert indexer.dtype == np.intp, indexer.dtype

n = self.shape[axis]
if convert_indices:
Expand Down
12 changes: 7 additions & 5 deletions pandas/tests/internals/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -1003,13 +1003,15 @@ def assert_take_ok(mgr, axis, indexer):

for ax in range(mgr.ndim):
# take/fancy indexer
assert_take_ok(mgr, ax, indexer=[])
assert_take_ok(mgr, ax, indexer=[0, 0, 0])
assert_take_ok(mgr, ax, indexer=list(range(mgr.shape[ax])))
assert_take_ok(mgr, ax, indexer=np.array([], dtype=np.intp))
assert_take_ok(mgr, ax, indexer=np.array([0, 0, 0], dtype=np.intp))
assert_take_ok(
mgr, ax, indexer=np.array(list(range(mgr.shape[ax])), dtype=np.intp)
)

if mgr.shape[ax] >= 3:
assert_take_ok(mgr, ax, indexer=[0, 1, 2])
assert_take_ok(mgr, ax, indexer=[-1, -2, -3])
assert_take_ok(mgr, ax, indexer=np.array([0, 1, 2], dtype=np.intp))
assert_take_ok(mgr, ax, indexer=np.array([-1, -2, -3], dtype=np.intp))

@pytest.mark.parametrize("mgr", MANAGERS)
@pytest.mark.parametrize("fill_value", [None, np.nan, 100.0])
Expand Down