From 5654174297747041ab4dd94efa55294c23011fbf Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 2 Apr 2024 10:23:38 -0700 Subject: [PATCH 1/4] PERF: Series(dict) returns RangeIndex when possible --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/series.py | 3 ++- pandas/tests/series/test_constructors.py | 6 ++++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 4debd41de213f..557c342f1981f 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -297,6 +297,7 @@ Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ - :attr:`Categorical.categories` returns a :class:`RangeIndex` columns instead of an :class:`Index` if the constructed ``values`` was a ``range``. (:issue:`57787`) - :class:`DataFrame` returns a :class:`RangeIndex` columns when possible when ``data`` is a ``dict`` (:issue:`57943`) +- :class:`Series` returns a :class:`RangeIndex` index when possible when ``data`` is a ``dict`` (:issue:`?`) - :func:`concat` returns a :class:`RangeIndex` level in the :class:`MultiIndex` result when ``keys`` is a ``range`` or :class:`RangeIndex` (:issue:`57542`) - :meth:`RangeIndex.append` returns a :class:`RangeIndex` instead of a :class:`Index` when appending values that could continue the :class:`RangeIndex` (:issue:`57467`) - :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57542`) diff --git a/pandas/core/series.py b/pandas/core/series.py index ee496a355f6ca..702c939d8b5e4 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -132,6 +132,7 @@ PeriodIndex, default_index, ensure_index, + maybe_sequence_to_range, ) import pandas.core.indexes.base as ibase from pandas.core.indexes.multi import maybe_droplevels @@ -547,7 +548,7 @@ def _init_dict( # using generators in effects the performance. # Below is the new way of extracting the keys and values - keys = tuple(data.keys()) + keys = maybe_sequence_to_range(tuple(data.keys())) values = list(data.values()) # Generating list of values- faster way elif index is not None: # fastpath for Series(data=None). Just use broadcasting a scalar diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 68737e86f0c6a..97faba532e94a 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -2251,3 +2251,9 @@ def test_series_with_complex_nan(input_list): result = Series(ser.array) assert ser.dtype == "complex128" tm.assert_series_equal(ser, result) + + +def test_dict_keys_rangeindex(): + result = Series({0: 1, 1: 2}) + expected = Series([1, 2], index=RangeIndex(2)) + tm.assert_series_equal(result, expected, check_index_type=True) From 7e489a9f0aecdc73afe3a49c1ab2fa2da9ef01f6 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 2 Apr 2024 10:24:31 -0700 Subject: [PATCH 2/4] Whatsnew number --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 557c342f1981f..78ee359397df5 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -297,7 +297,7 @@ Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ - :attr:`Categorical.categories` returns a :class:`RangeIndex` columns instead of an :class:`Index` if the constructed ``values`` was a ``range``. (:issue:`57787`) - :class:`DataFrame` returns a :class:`RangeIndex` columns when possible when ``data`` is a ``dict`` (:issue:`57943`) -- :class:`Series` returns a :class:`RangeIndex` index when possible when ``data`` is a ``dict`` (:issue:`?`) +- :class:`Series` returns a :class:`RangeIndex` index when possible when ``data`` is a ``dict`` (:issue:`58118`) - :func:`concat` returns a :class:`RangeIndex` level in the :class:`MultiIndex` result when ``keys`` is a ``range`` or :class:`RangeIndex` (:issue:`57542`) - :meth:`RangeIndex.append` returns a :class:`RangeIndex` instead of a :class:`Index` when appending values that could continue the :class:`RangeIndex` (:issue:`57467`) - :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57542`) From 899e70e1f3a107f406bfa3dac6ec6ad7ad1c43fe Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 2 Apr 2024 11:17:25 -0700 Subject: [PATCH 3/4] Catch overflowerror in maybe_sequence_to_range --- pandas/core/indexes/base.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a4b58445289ad..73e564f95cf65 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -7144,7 +7144,10 @@ def maybe_sequence_to_range(sequence) -> Any | range: return sequence if len(sequence) == 0: return range(0) - np_sequence = np.asarray(sequence, dtype=np.int64) + try: + np_sequence = np.asarray(sequence, dtype=np.int64) + except OverflowError: + return sequence diff = np_sequence[1] - np_sequence[0] if diff == 0: return sequence From c4c0ff311111079bead7002a47088054892a8564 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 2 Apr 2024 13:49:15 -0700 Subject: [PATCH 4/4] Remove annotation --- pandas/core/series.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 702c939d8b5e4..967aea15fff60 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -539,8 +539,6 @@ def _init_dict( _data : BlockManager for the new Series index : index for the new Series """ - keys: Index | tuple - # Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')] # raises KeyError), so we iterate the entire dict, and align if data: