Skip to content

Commit 38086f1

Browse files
authored
PERF: Allow ensure_index_from_sequence to return RangeIndex (#57786)
1 parent b9be19b commit 38086f1

File tree

3 files changed

+50
-26
lines changed

3 files changed

+50
-26
lines changed

pandas/core/indexes/base.py

+41-3
Original file line numberDiff line numberDiff line change
@@ -7154,6 +7154,43 @@ def shape(self) -> Shape:
71547154
return (len(self),)
71557155

71567156

7157+
def maybe_sequence_to_range(sequence) -> Any | range:
7158+
"""
7159+
Convert a 1D, non-pandas sequence to a range if possible.
7160+
7161+
Returns the input if not possible.
7162+
7163+
Parameters
7164+
----------
7165+
sequence : 1D sequence
7166+
names : sequence of str
7167+
7168+
Returns
7169+
-------
7170+
Any : input or range
7171+
"""
7172+
if isinstance(sequence, (ABCSeries, Index)):
7173+
return sequence
7174+
np_sequence = np.asarray(sequence)
7175+
if np_sequence.dtype.kind != "i" or len(np_sequence) == 1:
7176+
return sequence
7177+
elif len(np_sequence) == 0:
7178+
return range(0)
7179+
diff = np_sequence[1] - np_sequence[0]
7180+
if diff == 0:
7181+
return sequence
7182+
elif len(np_sequence) == 2:
7183+
return range(np_sequence[0], np_sequence[1] + diff, diff)
7184+
maybe_range_indexer, remainder = np.divmod(np_sequence - np_sequence[0], diff)
7185+
if (
7186+
lib.is_range_indexer(maybe_range_indexer, len(maybe_range_indexer))
7187+
and not remainder.any()
7188+
):
7189+
return range(np_sequence[0], np_sequence[-1] + diff, diff)
7190+
else:
7191+
return sequence
7192+
7193+
71577194
def ensure_index_from_sequences(sequences, names=None) -> Index:
71587195
"""
71597196
Construct an index from sequences of data.
@@ -7172,8 +7209,8 @@ def ensure_index_from_sequences(sequences, names=None) -> Index:
71727209
71737210
Examples
71747211
--------
7175-
>>> ensure_index_from_sequences([[1, 2, 3]], names=["name"])
7176-
Index([1, 2, 3], dtype='int64', name='name')
7212+
>>> ensure_index_from_sequences([[1, 2, 4]], names=["name"])
7213+
Index([1, 2, 4], dtype='int64', name='name')
71777214
71787215
>>> ensure_index_from_sequences([["a", "a"], ["a", "b"]], names=["L1", "L2"])
71797216
MultiIndex([('a', 'a'),
@@ -7189,8 +7226,9 @@ def ensure_index_from_sequences(sequences, names=None) -> Index:
71897226
if len(sequences) == 1:
71907227
if names is not None:
71917228
names = names[0]
7192-
return Index(sequences[0], name=names)
7229+
return Index(maybe_sequence_to_range(sequences[0]), name=names)
71937230
else:
7231+
# TODO: Apply maybe_sequence_to_range to sequences?
71947232
return MultiIndex.from_arrays(sequences, names=names)
71957233

71967234

pandas/core/indexes/range.py

+4-20
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
doc,
3030
)
3131

32-
from pandas.core.dtypes import missing
3332
from pandas.core.dtypes.base import ExtensionDtype
3433
from pandas.core.dtypes.common import (
3534
ensure_platform_int,
@@ -475,28 +474,13 @@ def _shallow_copy(self, values, name: Hashable = no_default):
475474
if values.dtype.kind == "i" and values.ndim == 1:
476475
# GH 46675 & 43885: If values is equally spaced, return a
477476
# more memory-compact RangeIndex instead of Index with 64-bit dtype
478-
if len(values) == 0:
479-
return type(self)._simple_new(_empty_range, name=name)
480-
elif len(values) == 1:
477+
if len(values) == 1:
481478
start = values[0]
482479
new_range = range(start, start + self.step, self.step)
483480
return type(self)._simple_new(new_range, name=name)
484-
diff = values[1] - values[0]
485-
if not missing.isna(diff) and diff != 0:
486-
if len(values) == 2:
487-
# Can skip is_range_indexer check
488-
new_range = range(values[0], values[-1] + diff, diff)
489-
return type(self)._simple_new(new_range, name=name)
490-
else:
491-
maybe_range_indexer, remainder = np.divmod(values - values[0], diff)
492-
if (
493-
lib.is_range_indexer(
494-
maybe_range_indexer, len(maybe_range_indexer)
495-
)
496-
and not remainder.any()
497-
):
498-
new_range = range(values[0], values[-1] + diff, diff)
499-
return type(self)._simple_new(new_range, name=name)
481+
maybe_range = ibase.maybe_sequence_to_range(values)
482+
if isinstance(maybe_range, range):
483+
return type(self)._simple_new(maybe_range, name=name)
500484
return self._constructor._simple_new(values, name=name)
501485

502486
def _view(self) -> Self:

pandas/tests/indexes/test_base.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -1514,8 +1514,10 @@ class TestIndexUtils:
15141514
@pytest.mark.parametrize(
15151515
"data, names, expected",
15161516
[
1517-
([[1, 2, 3]], None, Index([1, 2, 3])),
1518-
([[1, 2, 3]], ["name"], Index([1, 2, 3], name="name")),
1517+
([[1, 2, 4]], None, Index([1, 2, 4])),
1518+
([[1, 2, 4]], ["name"], Index([1, 2, 4], name="name")),
1519+
([[1, 2, 3]], None, RangeIndex(1, 4)),
1520+
([[1, 2, 3]], ["name"], RangeIndex(1, 4, name="name")),
15191521
(
15201522
[["a", "a"], ["c", "d"]],
15211523
None,
@@ -1530,7 +1532,7 @@ class TestIndexUtils:
15301532
)
15311533
def test_ensure_index_from_sequences(self, data, names, expected):
15321534
result = ensure_index_from_sequences(data, names)
1533-
tm.assert_index_equal(result, expected)
1535+
tm.assert_index_equal(result, expected, exact=True)
15341536

15351537
def test_ensure_index_mixed_closed_intervals(self):
15361538
# GH27172

0 commit comments

Comments
 (0)