Skip to content

Commit df1dcc8

Browse files
committed
PERF: rework MultiIndex.is_monotonic as per @ssanderson idea
1 parent 1bcf57e commit df1dcc8

File tree

3 files changed

+71
-14
lines changed

3 files changed

+71
-14
lines changed

asv_bench/benchmarks/indexing.py

+16-4
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,9 @@ def setup(self):
189189
self.eps_C = 5
190190
self.eps_D = 5000
191191
self.mdt2 = self.mdt.set_index(['A', 'B', 'C', 'D']).sortlevel()
192-
self.miint = MultiIndex.from_product([np.arange(1000), np.arange(1000)], names=['one','two'])
192+
self.miint = MultiIndex.from_product(
193+
[np.arange(1000),
194+
np.arange(1000)], names=['one', 'two'])
193195

194196
def time_series_xs_mi_ix(self):
195197
self.s.ix[999]
@@ -198,11 +200,21 @@ def time_frame_xs_mi_ix(self):
198200
self.df.ix[999]
199201

200202
def time_multiindex_slicers(self):
201-
self.mdt2.loc[self.idx[(self.test_A - self.eps_A):(self.test_A + self.eps_A), (self.test_B - self.eps_B):(self.test_B + self.eps_B), (self.test_C - self.eps_C):(self.test_C + self.eps_C), (self.test_D - self.eps_D):(self.test_D + self.eps_D)], :]
203+
self.mdt2.loc[self.idx[
204+
(self.test_A - self.eps_A):(self.test_A + self.eps_A),
205+
(self.test_B - self.eps_B):(self.test_B + self.eps_B),
206+
(self.test_C - self.eps_C):(self.test_C + self.eps_C),
207+
(self.test_D - self.eps_D):(self.test_D + self.eps_D)], :]
202208

203209
def time_multiindex_get_indexer(self):
204-
self.miint.get_indexer(np.array([(0, 10), (0, 11), (0, 12), (0, 13), (0, 14),
205-
(0, 15), (0, 16),(0, 17), (0, 18), (0, 19)], dtype=object))
210+
self.miint.get_indexer(
211+
np.array([(0, 10), (0, 11), (0, 12),
212+
(0, 13), (0, 14), (0, 15),
213+
(0, 16), (0, 17), (0, 18),
214+
(0, 19)], dtype=object))
215+
216+
def time_is_monotonic(self):
217+
self.miint.is_monotonic
206218

207219

208220
class PanelIndexing(object):

pandas/indexes/multi.py

+14-8
Original file line numberDiff line numberDiff line change
@@ -667,15 +667,21 @@ def _has_complex_internals(self):
667667
@cache_readonly
668668
def is_monotonic(self):
669669

670-
# TODO
671-
# this is unfortunate we end up tupelizing
672-
# just to determine monotonicity :<
673-
674-
# fast-path
675-
if not self.levels[0].is_monotonic:
676-
return False
670+
def level_values(level):
671+
unique = self.levels[level]
672+
labels = self.labels[level]
673+
return algos.take_1d(unique.values, labels,
674+
fill_value=unique._na_value)
675+
676+
# reversed() because lexsort() wants the most significant key last.
677+
values = [level_values(i) for i in reversed(range(len(self.levels)))]
678+
try:
679+
sort_order = np.lexsort(values)
680+
return Index(sort_order).is_monotonic
681+
except TypeError:
677682

678-
return Index(self.values).is_monotonic
683+
# we have mixed types and np.lexsort is not happy
684+
return Index(self.values).is_monotonic
679685

680686
@cache_readonly
681687
def is_unique(self):

pandas/tests/indexes/test_multi.py

+41-2
Original file line numberDiff line numberDiff line change
@@ -2318,11 +2318,50 @@ def test_level_setting_resets_attributes(self):
23182318
ind = MultiIndex.from_arrays([
23192319
['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3]
23202320
])
2321-
assert ind.is_monotonic
2321+
self.assertTrue(ind.is_monotonic)
23222322
ind.set_levels([['A', 'B', 'A', 'A', 'B'], [2, 1, 3, -2, 5]],
23232323
inplace=True)
2324+
23242325
# if this fails, probably didn't reset the cache correctly.
2325-
assert not ind.is_monotonic
2326+
self.assertFalse(ind.is_monotonic)
2327+
2328+
def test_is_monotonic(self):
2329+
i = MultiIndex.from_product([np.arange(10),
2330+
np.arange(10)], names=['one', 'two'])
2331+
self.assertTrue(i.is_monotonic)
2332+
self.assertTrue(Index(i.values).is_monotonic)
2333+
2334+
i = MultiIndex.from_product([np.arange(10, 0, -1),
2335+
np.arange(10)], names=['one', 'two'])
2336+
self.assertFalse(i.is_monotonic)
2337+
self.assertFalse(Index(i.values).is_monotonic)
2338+
2339+
i = MultiIndex.from_product([np.arange(10),
2340+
np.arange(10, 0, -1)],
2341+
names=['one', 'two'])
2342+
self.assertFalse(i.is_monotonic)
2343+
self.assertFalse(Index(i.values).is_monotonic)
2344+
2345+
i = MultiIndex.from_product([[1.0, np.nan, 2.0], ['a', 'b', 'c']])
2346+
self.assertFalse(i.is_monotonic)
2347+
self.assertFalse(Index(i.values).is_monotonic)
2348+
2349+
# string ordering
2350+
i = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
2351+
['one', 'two', 'three']],
2352+
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
2353+
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
2354+
names=['first', 'second'])
2355+
self.assertFalse(i.is_monotonic)
2356+
self.assertFalse(Index(i.values).is_monotonic)
2357+
2358+
i = MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'],
2359+
['mom', 'next', 'zenith']],
2360+
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
2361+
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
2362+
names=['first', 'second'])
2363+
self.assertTrue(i.is_monotonic)
2364+
self.assertTrue(Index(i.values).is_monotonic)
23262365

23272366
def test_isin(self):
23282367
values = [('foo', 2), ('bar', 3), ('quux', 4)]

0 commit comments

Comments
 (0)