Skip to content

Commit acb9b33

Browse files
author
Mike Kelly
committed
PERF: Slowness in multi-level indexes with datetime levels
Special case handling of sliced multi indexes, where there the length of the level values may exceed the length of the index. Corrected nan handling issue introduced by the original change. (+1 squashed commit) Squashed commits: [0dae170] PERF: Slowness in multi-level indexes with datetime levels Special case handling of sliced multi indexes, where there the length of the level values may exceed the length of the index.
1 parent 69e0f91 commit acb9b33

File tree

3 files changed

+33
-9
lines changed

3 files changed

+33
-9
lines changed

pandas/core/index.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -2937,11 +2937,16 @@ def values(self):
29372937

29382938
values = []
29392939
for lev, lab in zip(self.levels, self.labels):
2940-
lev_values = lev.values
29412940
# Need to box timestamps, etc.
2942-
if hasattr(lev, '_box_values'):
2943-
lev_values = lev._box_values(lev_values)
2944-
taken = com.take_1d(lev_values, lab)
2941+
box = hasattr(lev, '_box_values')
2942+
# Try to minimize boxing.
2943+
if box and len(lev) > len(lab):
2944+
taken = lev._box_values(com.take_1d(lev.values, lab))
2945+
elif box:
2946+
taken = com.take_1d(lev._box_values(lev.values), lab,
2947+
fill_value=_get_na_value(lev.dtype.type))
2948+
else:
2949+
taken = com.take_1d(lev.values, lab)
29452950
values.append(taken)
29462951

29472952
self._tuples = lib.fast_zip(values)

pandas/tests/test_index.py

+12
Original file line numberDiff line numberDiff line change
@@ -2335,6 +2335,18 @@ def test_from_product_datetimeindex(self):
23352335
(2, pd.Timestamp('2000-01-02'))])
23362336
assert_array_equal(mi.values, etalon)
23372337

2338+
def test_values_boxed(self):
2339+
tuples = [(1, pd.Timestamp('2000-01-01')),
2340+
(2, pd.NaT),
2341+
(3, pd.Timestamp('2000-01-03')),
2342+
(1, pd.Timestamp('2000-01-04')),
2343+
(2, pd.Timestamp('2000-01-02')),
2344+
(3, pd.Timestamp('2000-01-03'))]
2345+
mi = pd.MultiIndex.from_tuples(tuples)
2346+
assert_array_equal(mi.values, pd.lib.list_to_object_array(tuples))
2347+
# Check that code branches for boxed values produce identical results
2348+
assert_array_equal(mi.values[:4], mi[:4].values)
2349+
23382350
def test_append(self):
23392351
result = self.index[:3].append(self.index[3:])
23402352
self.assertTrue(result.equals(self.index))

vb_suite/index_object.py

+12-5
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@
108108

109109

110110
# Constructing MultiIndex from cartesian product of iterables
111-
#
111+
#
112112

113113
setup = common_setup + """
114114
iterables = [tm.makeStringIndex(10000), xrange(20)]
@@ -123,10 +123,17 @@
123123

124124
setup = common_setup + """
125125
level1 = range(1000)
126-
level2 = date_range(start='1/1/2012', periods=10)
126+
level2 = date_range(start='1/1/2012', periods=100)
127+
mi = MultiIndex.from_product([level1, level2])
127128
"""
128129

129-
multiindex_with_datetime_level = \
130-
Benchmark("MultiIndex.from_product([level1, level2]).values", setup,
131-
name='multiindex_with_datetime_level',
130+
multiindex_with_datetime_level_full = \
131+
Benchmark("mi.copy().values", setup,
132+
name='multiindex_with_datetime_level_full',
133+
start_date=datetime(2014, 10, 11))
134+
135+
136+
multiindex_with_datetime_level_sliced = \
137+
Benchmark("mi[:10].values", setup,
138+
name='multiindex_with_datetime_level_sliced',
132139
start_date=datetime(2014, 10, 11))

0 commit comments

Comments
 (0)