-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
PERF: Allow ensure_index_from_sequence to return RangeIndex #57786
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 13 commits
aa5589c
47af1ce
e7e0fcb
67f1998
676a1f4
e37500e
2cc41fc
7e1dac7
c7da6ac
90ff39c
66d3456
5c01d6a
7e6fcea
1ab4c1e
b5144f4
64918cf
08da810
8957995
b7e5dc1
f0592c5
33fa1f4
4015ac6
5fb9516
ff186c1
4e7cf98
b5b9a89
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4235,7 +4235,6 @@ def join( | |
|
||
return self._join_via_get_indexer(other, how, sort) | ||
|
||
@final | ||
def _join_empty( | ||
self, other: Index, how: JoinHow, sort: bool | ||
) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: | ||
|
@@ -7156,6 +7155,43 @@ def shape(self) -> Shape: | |
return (len(self),) | ||
|
||
|
||
def maybe_sequence_to_range(sequence) -> Any | range: | ||
""" | ||
Convert a 1D sequence to a range if possible. | ||
|
||
Returns the input if not possible. | ||
|
||
Parameters | ||
---------- | ||
sequence : 1D sequence | ||
names : sequence of str | ||
|
||
Returns | ||
------- | ||
Any : input or range | ||
""" | ||
if hasattr(sequence, "dtype") and not isinstance(sequence, np.ndarray): | ||
return sequence | ||
np_sequence = np.asarray(sequence) | ||
if np_sequence.dtype.kind != "i" or len(sequence) == 1: | ||
return sequence | ||
elif len(sequence) == 0: | ||
return range(0) | ||
diff = np_sequence[1] - np_sequence[0] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are only the first two elements important here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We're taking an example diff here (just between the first 2 elements) and later seeing if the rest of the diffs between the rest of the elements match |
||
if isna(diff) or diff == 0: | ||
return sequence | ||
elif len(sequence) == 2: | ||
return range(sequence[0], sequence[1] + diff, diff) | ||
maybe_range_indexer, remainder = np.divmod(np_sequence - np_sequence[0], diff) | ||
if ( | ||
lib.is_range_indexer(maybe_range_indexer, len(maybe_range_indexer)) | ||
and not remainder.any() | ||
): | ||
return range(sequence[0], sequence[-1] + diff, diff) | ||
else: | ||
return sequence | ||
|
||
|
||
def ensure_index_from_sequences(sequences, names=None) -> Index: | ||
""" | ||
Construct an index from sequences of data. | ||
|
@@ -7174,8 +7210,8 @@ def ensure_index_from_sequences(sequences, names=None) -> Index: | |
|
||
Examples | ||
-------- | ||
>>> ensure_index_from_sequences([[1, 2, 3]], names=["name"]) | ||
Index([1, 2, 3], dtype='int64', name='name') | ||
>>> ensure_index_from_sequences([[1, 2, 4]], names=["name"]) | ||
Index([1, 2, 4], dtype='int64', name='name') | ||
|
||
>>> ensure_index_from_sequences([["a", "a"], ["a", "b"]], names=["L1", "L2"]) | ||
MultiIndex([('a', 'a'), | ||
|
@@ -7191,8 +7227,9 @@ def ensure_index_from_sequences(sequences, names=None) -> Index: | |
if len(sequences) == 1: | ||
if names is not None: | ||
names = names[0] | ||
return Index(sequences[0], name=names) | ||
return Index(maybe_sequence_to_range(sequences[0]), name=names) | ||
else: | ||
# TODO: Apply maybe_sequence_to_range to sequences? | ||
return MultiIndex.from_arrays(sequences, names=names) | ||
|
||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I find the duck typing here somewhat non-obvious - maybe comments will help? Is this first branch supposed to be for extension types?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Essentially yes, I think I can use a more specific check here
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated the check here to be more explicit (exclude Series and Index objects)