Skip to content

PERF: Allow Index.to_frame to return RangeIndex columns #58018

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Mar 28, 2024
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ Performance improvements
- Performance improvement in :meth:`DataFrameGroupBy.ffill`, :meth:`DataFrameGroupBy.bfill`, :meth:`SeriesGroupBy.ffill`, and :meth:`SeriesGroupBy.bfill` (:issue:`56902`)
- Performance improvement in :meth:`Index.join` by propagating cached attributes in cases where the result matches one of the inputs (:issue:`57023`)
- Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`)
- Performance improvement in :meth:`Index.to_frame` returning a :class:`RangeIndex` columns of a :class:`Index` when possible. (:issue:`58018`)
- Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`)
- Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask or integers returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`)
- Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`)
Expand Down
20 changes: 13 additions & 7 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1374,16 +1374,19 @@ def _format_attrs(self) -> list[tuple[str_t, str_t | int | bool | None]]:
return attrs

@final
def _get_level_names(self) -> Hashable | Sequence[Hashable]:
def _get_level_names(self) -> range | Sequence[Hashable]:
"""
Return a name or list of names with None replaced by the level number.
"""
if self._is_multi:
return [
level if name is None else name for level, name in enumerate(self.names)
]
return maybe_sequence_to_range(
[
level if name is None else name
for level, name in enumerate(self.names)
]
)
else:
return 0 if self.name is None else self.name
return range(1) if self.name is None else [self.name]

@final
def _mpl_repr(self) -> np.ndarray:
Expand Down Expand Up @@ -1630,8 +1633,11 @@ def to_frame(
from pandas import DataFrame

if name is lib.no_default:
name = self._get_level_names()
result = DataFrame({name: self}, copy=False)
result_name = self._get_level_names()
else:
result_name = [name]
result = DataFrame(self, copy=False)
result.columns = result_name

if index:
result.index = self
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/indexes/multi/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pandas import (
DataFrame,
MultiIndex,
RangeIndex,
)
import pandas._testing as tm

Expand Down Expand Up @@ -148,6 +149,13 @@ def test_to_frame_duplicate_labels():
tm.assert_frame_equal(result, expected)


def test_to_frame_column_rangeindex():
mi = MultiIndex.from_arrays([[1, 2], ["a", "b"]])
result = mi.to_frame().columns
expected = RangeIndex(2)
tm.assert_index_equal(result, expected, exact=True)


def test_to_flat_index(idx):
expected = pd.Index(
(
Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/indexes/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,3 +508,10 @@ def test_compare_read_only_array():
idx = pd.Index(arr)
result = idx > 69
assert result.dtype == bool


def test_to_frame_column_rangeindex():
idx = pd.Index([1])
result = idx.to_frame().columns
expected = RangeIndex(1)
tm.assert_index_equal(result, expected, exact=True)