Skip to content

Commit ded256d

Browse files
authored
PERF: DataFrame(dict) returns RangeIndex columns when possible (pandas-dev#57943)
* PERF: DataFrame(dict) returns RangeIndex columns when possible * add whatsnew note * Fix test failures * Only 1 ndim * Use infer_dtype * Skip EA, skipna=False
1 parent cf40e56 commit ded256d

File tree

6 files changed

+18
-19
lines changed

6 files changed

+18
-19
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,7 @@ Removal of prior version deprecations/changes
267267
Performance improvements
268268
~~~~~~~~~~~~~~~~~~~~~~~~
269269
- :attr:`Categorical.categories` returns a :class:`RangeIndex` columns instead of an :class:`Index` if the constructed ``values`` was a ``range``. (:issue:`57787`)
270+
- :class:`DataFrame` returns a :class:`RangeIndex` columns when possible when ``data`` is a ``dict`` (:issue:`57943`)
270271
- :func:`concat` returns a :class:`RangeIndex` level in the :class:`MultiIndex` result when ``keys`` is a ``range`` or :class:`RangeIndex` (:issue:`57542`)
271272
- :meth:`RangeIndex.append` returns a :class:`RangeIndex` instead of a :class:`Index` when appending values that could continue the :class:`RangeIndex` (:issue:`57467`)
272273
- :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57542`)

pandas/core/indexes/api.py

+2-11
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from __future__ import annotations
22

3-
import textwrap
43
from typing import (
54
TYPE_CHECKING,
65
cast,
@@ -23,6 +22,7 @@
2322
ensure_index,
2423
ensure_index_from_sequences,
2524
get_unanimous_names,
25+
maybe_sequence_to_range,
2626
)
2727
from pandas.core.indexes.category import CategoricalIndex
2828
from pandas.core.indexes.datetimes import DatetimeIndex
@@ -34,16 +34,6 @@
3434

3535
if TYPE_CHECKING:
3636
from pandas._typing import Axis
37-
_sort_msg = textwrap.dedent(
38-
"""\
39-
Sorting because non-concatenation axis is not aligned. A future version
40-
of pandas will change to not sort by default.
41-
42-
To accept the future behavior, pass 'sort=False'.
43-
44-
To retain the current behavior and silence the warning, pass 'sort=True'.
45-
"""
46-
)
4737

4838

4939
__all__ = [
@@ -66,6 +56,7 @@
6656
"all_indexes_same",
6757
"default_index",
6858
"safe_sort_index",
59+
"maybe_sequence_to_range",
6960
]
7061

7162

pandas/core/indexes/base.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -7157,18 +7157,17 @@ def maybe_sequence_to_range(sequence) -> Any | range:
71577157
-------
71587158
Any : input or range
71597159
"""
7160-
if isinstance(sequence, (ABCSeries, Index, range)):
7160+
if isinstance(sequence, (ABCSeries, Index, range, ExtensionArray)):
71617161
return sequence
7162-
np_sequence = np.asarray(sequence)
7163-
if np_sequence.dtype.kind != "i" or len(np_sequence) == 1:
7162+
elif len(sequence) == 1 or lib.infer_dtype(sequence, skipna=False) != "integer":
71647163
return sequence
7165-
elif len(np_sequence) == 0:
7164+
elif len(sequence) == 0:
71667165
return range(0)
7167-
diff = np_sequence[1] - np_sequence[0]
7166+
diff = sequence[1] - sequence[0]
71687167
if diff == 0:
71697168
return sequence
7170-
elif len(np_sequence) == 2 or lib.is_sequence_range(np_sequence, diff):
7171-
return range(np_sequence[0], np_sequence[-1] + diff, diff)
7169+
elif len(sequence) == 2 or lib.is_sequence_range(np.asarray(sequence), diff):
7170+
return range(sequence[0], sequence[-1] + diff, diff)
71727171
else:
71737172
return sequence
71747173

pandas/core/internals/construction.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
default_index,
6161
ensure_index,
6262
get_objs_combined_axis,
63+
maybe_sequence_to_range,
6364
union_indexes,
6465
)
6566
from pandas.core.internals.blocks import (
@@ -403,7 +404,7 @@ def dict_to_mgr(
403404
arrays[i] = arr
404405

405406
else:
406-
keys = list(data.keys())
407+
keys = maybe_sequence_to_range(list(data.keys()))
407408
columns = Index(keys) if keys else default_index(0)
408409
arrays = [com.maybe_iterable_to_list(data[k]) for k in keys]
409410

pandas/tests/frame/test_constructors.py

+5
Original file line numberDiff line numberDiff line change
@@ -2709,6 +2709,11 @@ def test_inference_on_pandas_objects(self):
27092709
result = DataFrame({"a": ser})
27102710
assert result.dtypes.iloc[0] == np.object_
27112711

2712+
def test_dict_keys_returns_rangeindex(self):
2713+
result = DataFrame({0: [1], 1: [2]}).columns
2714+
expected = RangeIndex(2)
2715+
tm.assert_index_equal(result, expected, exact=True)
2716+
27122717

27132718
class TestDataFrameConstructorIndexInference:
27142719
def test_frame_from_dict_of_series_overlapping_monthly_period_indexes(self):

pandas/tests/reshape/test_pivot.py

+2
Original file line numberDiff line numberDiff line change
@@ -1738,6 +1738,7 @@ def test_daily(self):
17381738
mask = ts.index.year == y
17391739
expected[y] = Series(ts.values[mask], index=doy[mask])
17401740
expected = DataFrame(expected, dtype=float).T
1741+
expected.index = expected.index.astype(np.int32)
17411742
tm.assert_frame_equal(result, expected)
17421743

17431744
def test_monthly(self):
@@ -1753,6 +1754,7 @@ def test_monthly(self):
17531754
mask = ts.index.year == y
17541755
expected[y] = Series(ts.values[mask], index=month[mask])
17551756
expected = DataFrame(expected, dtype=float).T
1757+
expected.index = expected.index.astype(np.int32)
17561758
tm.assert_frame_equal(result, expected)
17571759

17581760
def test_pivot_table_with_iterator_values(self, data):

0 commit comments

Comments
 (0)