diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 26dd6f83ad44a..be7c3277759f5 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -299,6 +299,7 @@ Performance improvements - Performance improvement in :meth:`DataFrameGroupBy.ffill`, :meth:`DataFrameGroupBy.bfill`, :meth:`SeriesGroupBy.ffill`, and :meth:`SeriesGroupBy.bfill` (:issue:`56902`) - Performance improvement in :meth:`Index.join` by propagating cached attributes in cases where the result matches one of the inputs (:issue:`57023`) - Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`) +- Performance improvement in :meth:`Index.to_frame` returning a :class:`RangeIndex` columns of a :class:`Index` when possible. (:issue:`58018`) - Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`) - Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask or integers returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`) - Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 76dd19a9424f5..e510d487ac954 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1374,16 +1374,19 @@ def _format_attrs(self) -> list[tuple[str_t, str_t | int | bool | None]]: return attrs @final - def _get_level_names(self) -> Hashable | Sequence[Hashable]: + def _get_level_names(self) -> range | Sequence[Hashable]: """ Return a name or list of names with None replaced by the level number. """ if self._is_multi: - return [ - level if name is None else name for level, name in enumerate(self.names) - ] + return maybe_sequence_to_range( + [ + level if name is None else name + for level, name in enumerate(self.names) + ] + ) else: - return 0 if self.name is None else self.name + return range(1) if self.name is None else [self.name] @final def _mpl_repr(self) -> np.ndarray: @@ -1630,8 +1633,11 @@ def to_frame( from pandas import DataFrame if name is lib.no_default: - name = self._get_level_names() - result = DataFrame({name: self}, copy=False) + result_name = self._get_level_names() + else: + result_name = Index([name]) # type: ignore[assignment] + result = DataFrame(self, copy=False) + result.columns = result_name if index: result.index = self diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py index 3c2ca045d6f99..f6b10c989326f 100644 --- a/pandas/tests/indexes/multi/test_conversion.py +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -5,6 +5,7 @@ from pandas import ( DataFrame, MultiIndex, + RangeIndex, ) import pandas._testing as tm @@ -148,6 +149,13 @@ def test_to_frame_duplicate_labels(): tm.assert_frame_equal(result, expected) +def test_to_frame_column_rangeindex(): + mi = MultiIndex.from_arrays([[1, 2], ["a", "b"]]) + result = mi.to_frame().columns + expected = RangeIndex(2) + tm.assert_index_equal(result, expected, exact=True) + + def test_to_flat_index(idx): expected = pd.Index( ( diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index eb0010066a7f6..a2dee61295c74 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -508,3 +508,17 @@ def test_compare_read_only_array(): idx = pd.Index(arr) result = idx > 69 assert result.dtype == bool + + +def test_to_frame_column_rangeindex(): + idx = pd.Index([1]) + result = idx.to_frame().columns + expected = RangeIndex(1) + tm.assert_index_equal(result, expected, exact=True) + + +def test_to_frame_name_tuple_multiindex(): + idx = pd.Index([1]) + result = idx.to_frame(name=(1, 2)) + expected = pd.DataFrame([1], columns=MultiIndex.from_arrays([[1], [2]]), index=idx) + tm.assert_frame_equal(result, expected)