Skip to content

Commit 7f32698

Browse files
committed
ENH: be more careful about performance in list conversion when passed to _ensure_index. per #2336
1 parent 400da7f commit 7f32698

File tree

4 files changed

+39
-10
lines changed

4 files changed

+39
-10
lines changed

pandas/core/frame.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -2249,7 +2249,8 @@ def lookup(self, row_labels, col_labels):
22492249

22502250
n = len(row_labels)
22512251
if n != len(col_labels):
2252-
raise AssertionError('Row labels must have same size as col labels')
2252+
raise AssertionError('Row labels must have same size as '
2253+
'column labels')
22532254

22542255
thresh = 1000
22552256
if not self._is_mixed_type or n > thresh:

pandas/core/index.py

+5-7
Original file line numberDiff line numberDiff line change
@@ -2527,15 +2527,13 @@ def _ensure_index(index_like):
25272527
return Index(index_like, name=index_like.name)
25282528

25292529
if isinstance(index_like, list):
2530-
klasses = (list, np.ndarray)
2531-
all_arrays = all(isinstance(x, klasses) for x in index_like)
2530+
# #2200 ?
2531+
converted, all_arrays = lib.clean_index_list(index_like)
25322532

2533-
if len(index_like) > 0 and all_arrays:
2534-
return MultiIndex.from_arrays(index_like)
2533+
if len(converted) > 0 and all_arrays:
2534+
return MultiIndex.from_arrays(converted)
25352535
else:
2536-
# #2200 ?
2537-
index_like = [tuple(x) if isinstance(x, klasses) else x
2538-
for x in index_like]
2536+
index_like = converted
25392537

25402538
return Index(index_like)
25412539

pandas/src/tseries.pyx

+30
Original file line numberDiff line numberDiff line change
@@ -707,6 +707,36 @@ def astype_intsafe(ndarray[object] arr, new_dtype):
707707

708708
return result
709709

710+
def clean_index_list(list obj):
711+
'''
712+
Utility used in pandas.core.index._ensure_index
713+
'''
714+
cdef:
715+
ndarray[object] converted
716+
Py_ssize_t i, n = len(obj)
717+
object v
718+
bint all_arrays = 1
719+
720+
for i in range(n):
721+
v = obj[i]
722+
if not (PyList_Check(v) or cnp.PyArray_Check(v)):
723+
all_arrays = 0
724+
break
725+
726+
if all_arrays:
727+
return obj, all_arrays
728+
729+
converted = np.empty(n, dtype=object)
730+
for i in range(n):
731+
v = obj[i]
732+
if PyList_Check(v) or cnp.PyArray_Check(v):
733+
converted[i] = tuple(v)
734+
else:
735+
converted[i] = v
736+
737+
return maybe_convert_objects(converted), 0
738+
739+
710740
include "hashtable.pyx"
711741
include "datetime.pyx"
712742
include "skiplist.pyx"

vb_suite/frame_methods.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
1414
row_labels = list(df.index[::10])[:900]
1515
col_labels = list(df.columns) * 100
16-
row_labels_all = list(df.index) * len(df.columns)
17-
col_labels_all = list(df.columns) * len(df.index)
16+
row_labels_all = np.array(list(df.index) * len(df.columns), dtype='object')
17+
col_labels_all = np.array(list(df.columns) * len(df.index), dtype='object')
1818
"""
1919

2020
frame_fancy_lookup = Benchmark('df.lookup(row_labels, col_labels)', setup,

0 commit comments

Comments
 (0)