From d701c7e21c6819cece6fe526243d81dddc990527 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 26 Mar 2024 12:28:25 -0700 Subject: [PATCH 1/5] PERF: Allow Index.to_frame to return RangeIndex columns --- pandas/core/indexes/base.py | 27 +++++++++++-------- pandas/tests/indexes/multi/test_conversion.py | 8 ++++++ pandas/tests/indexes/test_common.py | 7 +++++ 3 files changed, 31 insertions(+), 11 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 76dd19a9424f5..d0eda5cb51f6f 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1374,16 +1374,19 @@ def _format_attrs(self) -> list[tuple[str_t, str_t | int | bool | None]]: return attrs @final - def _get_level_names(self) -> Hashable | Sequence[Hashable]: + def _get_level_names(self) -> range | Sequence[Hashable]: """ Return a name or list of names with None replaced by the level number. """ if self._is_multi: - return [ - level if name is None else name for level, name in enumerate(self.names) - ] + return maybe_sequence_to_range( + [ + level if name is None else name + for level, name in enumerate(self.names) + ] + ) else: - return 0 if self.name is None else self.name + return range(1) if self.name is None else [self.name] @final def _mpl_repr(self) -> np.ndarray: @@ -1629,13 +1632,15 @@ def to_frame( """ from pandas import DataFrame - if name is lib.no_default: - name = self._get_level_names() - result = DataFrame({name: self}, copy=False) - if index: - result.index = self - return result + result_index = self + else: + result_index = None + if name is lib.no_default: + result_name = self._get_level_names() + else: + result_name = [name] + return DataFrame(self, index=result_index, columns=result_name, copy=False) # type: ignore[arg-type] # -------------------------------------------------------------------- # Name-Centric Methods diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py index 3c2ca045d6f99..f6b10c989326f 100644 --- a/pandas/tests/indexes/multi/test_conversion.py +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -5,6 +5,7 @@ from pandas import ( DataFrame, MultiIndex, + RangeIndex, ) import pandas._testing as tm @@ -148,6 +149,13 @@ def test_to_frame_duplicate_labels(): tm.assert_frame_equal(result, expected) +def test_to_frame_column_rangeindex(): + mi = MultiIndex.from_arrays([[1, 2], ["a", "b"]]) + result = mi.to_frame().columns + expected = RangeIndex(2) + tm.assert_index_equal(result, expected, exact=True) + + def test_to_flat_index(idx): expected = pd.Index( ( diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index eb0010066a7f6..4f3d9e321c8f4 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -508,3 +508,10 @@ def test_compare_read_only_array(): idx = pd.Index(arr) result = idx > 69 assert result.dtype == bool + + +def test_to_frame_column_rangeindex(): + idx = pd.Index([1]) + result = idx.to_frame().columns + expected = RangeIndex(1) + tm.assert_index_equal(result, expected, exact=True) From befc11cba330bce0c991776aa20523c01a8f7fe9 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 26 Mar 2024 12:30:52 -0700 Subject: [PATCH 2/5] Add whatsnew --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index fb33601263c5d..a6cd6d57c37f9 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -289,6 +289,7 @@ Performance improvements - Performance improvement in :meth:`DataFrameGroupBy.ffill`, :meth:`DataFrameGroupBy.bfill`, :meth:`SeriesGroupBy.ffill`, and :meth:`SeriesGroupBy.bfill` (:issue:`56902`) - Performance improvement in :meth:`Index.join` by propagating cached attributes in cases where the result matches one of the inputs (:issue:`57023`) - Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`) +- Performance improvement in :meth:`Index.to_frame` returning a :class:`RangeIndex` columns of a :class:`Index` when possible. (:issue:`58018`) - Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`) - Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask or integers returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`) - Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`) From b77666adb2d9fe2f883eadf704b450175b11a2ca Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 26 Mar 2024 13:41:43 -0700 Subject: [PATCH 3/5] Avoid name mismatch --- pandas/core/indexes/base.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index d0eda5cb51f6f..1e302ba238987 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1632,15 +1632,16 @@ def to_frame( """ from pandas import DataFrame - if index: - result_index = self - else: - result_index = None if name is lib.no_default: result_name = self._get_level_names() else: result_name = [name] - return DataFrame(self, index=result_index, columns=result_name, copy=False) # type: ignore[arg-type] + result = DataFrame(self, copy=False) + result.columns = result_name + + if index: + result.index = self + return result # -------------------------------------------------------------------- # Name-Centric Methods From 7bbaeb9a4d9eb380f9f16450c7f2ef1b67686242 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 27 Mar 2024 12:24:42 -0700 Subject: [PATCH 4/5] Wrap in index, add test for name tuple --- pandas/core/indexes/base.py | 2 +- pandas/tests/indexes/test_common.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 1e302ba238987..6de6bdd292295 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1635,7 +1635,7 @@ def to_frame( if name is lib.no_default: result_name = self._get_level_names() else: - result_name = [name] + result_name = Index([name]) result = DataFrame(self, copy=False) result.columns = result_name diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index 4f3d9e321c8f4..a2dee61295c74 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -515,3 +515,10 @@ def test_to_frame_column_rangeindex(): result = idx.to_frame().columns expected = RangeIndex(1) tm.assert_index_equal(result, expected, exact=True) + + +def test_to_frame_name_tuple_multiindex(): + idx = pd.Index([1]) + result = idx.to_frame(name=(1, 2)) + expected = pd.DataFrame([1], columns=MultiIndex.from_arrays([[1], [2]]), index=idx) + tm.assert_frame_equal(result, expected) From 65eb37376fff6908914bcb26912ace2ec5390dc0 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 27 Mar 2024 17:12:10 -0700 Subject: [PATCH 5/5] Type ignore --- pandas/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 6de6bdd292295..e510d487ac954 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1635,7 +1635,7 @@ def to_frame( if name is lib.no_default: result_name = self._get_level_names() else: - result_name = Index([name]) + result_name = Index([name]) # type: ignore[assignment] result = DataFrame(self, copy=False) result.columns = result_name