Skip to content

Commit ce4eef3

Browse files
authored
PERF: fix clean_index_list perf (#16295)
closes #16285
1 parent 0091810 commit ce4eef3

File tree

4 files changed

+18
-10
lines changed

4 files changed

+18
-10
lines changed

asv_bench/benchmarks/indexing.py

+3
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ def time_getitem_list_like(self):
1919
def time_getitem_array(self):
2020
self.s[np.arange(10000)]
2121

22+
def time_getitem_lists(self):
23+
self.s[np.arange(10000).tolist()]
24+
2225
def time_iloc_array(self):
2326
self.s.iloc[np.arange(10000)]
2427

doc/source/whatsnew/v0.20.2.txt

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ Enhancements
2626
Performance Improvements
2727
~~~~~~~~~~~~~~~~~~~~~~~~
2828

29+
- Performance regression fix when indexing with a list-like (:issue:`16285`)
2930

3031

3132
.. _whatsnew_0202.bug_fixes:

pandas/_libs/lib.pyx

+13-9
Original file line numberDiff line numberDiff line change
@@ -950,7 +950,6 @@ def clean_index_list(list obj):
950950
Utility used in pandas.core.index._ensure_index
951951
"""
952952
cdef:
953-
ndarray[object] converted
954953
Py_ssize_t i, n = len(obj)
955954
object v
956955
bint all_arrays = 1
@@ -964,15 +963,20 @@ def clean_index_list(list obj):
964963
if all_arrays:
965964
return obj, all_arrays
966965

967-
converted = np.empty(n, dtype=object)
968-
for i in range(n):
969-
v = obj[i]
970-
if PyList_Check(v) or np.PyArray_Check(v) or hasattr(v, '_data'):
971-
converted[i] = tuple(v)
972-
else:
973-
converted[i] = v
966+
# don't force numpy coerce with nan's
967+
inferred = infer_dtype(obj)
968+
if inferred in ['string', 'bytes', 'unicode',
969+
'mixed', 'mixed-integer']:
970+
return np.asarray(obj, dtype=object), 0
971+
elif inferred in ['integer']:
972+
973+
# TODO: we infer an integer but it *could* be a unint64
974+
try:
975+
return np.asarray(obj, dtype='int64'), 0
976+
except OverflowError:
977+
return np.asarray(obj, dtype='object'), 0
974978

975-
return maybe_convert_objects(converted), 0
979+
return np.asarray(obj), 0
976980

977981

978982
ctypedef fused pandas_string:

pandas/core/indexes/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3960,7 +3960,7 @@ def _ensure_index(index_like, copy=False):
39603960
if isinstance(index_like, list):
39613961
if type(index_like) != list:
39623962
index_like = list(index_like)
3963-
# 2200 ?
3963+
39643964
converted, all_arrays = lib.clean_index_list(index_like)
39653965

39663966
if len(converted) > 0 and all_arrays:

0 commit comments

Comments
 (0)