From fbce54f75f6a62cbb6f5c7ec4f355de20eb34c7d Mon Sep 17 00:00:00 2001 From: makbigc Date: Tue, 5 Feb 2019 19:13:40 +0800 Subject: [PATCH 1/4] Remove MultiIndex conversion in is_unique --- pandas/core/indexes/interval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 2c63fe33c57fe..5d5b749c5ff6f 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -463,7 +463,7 @@ def is_unique(self): """ Return True if the IntervalIndex contains unique elements, else False """ - return self._multiindex.is_unique + return len(self) == len(self.unique()) @cache_readonly @Appender(_interval_shared_docs['is_non_overlapping_monotonic'] From 986ce9a63200a816ad983fc819a1768196d5d374 Mon Sep 17 00:00:00 2001 From: makbigc Date: Thu, 21 Mar 2019 22:25:57 +0800 Subject: [PATCH 2/4] Add benchmark --- asv_bench/benchmarks/index_object.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/index_object.py b/asv_bench/benchmarks/index_object.py index bbe164d4858ab..0d72285685314 100644 --- a/asv_bench/benchmarks/index_object.py +++ b/asv_bench/benchmarks/index_object.py @@ -1,7 +1,7 @@ import numpy as np import pandas.util.testing as tm from pandas import (Series, date_range, DatetimeIndex, Index, RangeIndex, - Float64Index) + Float64Index, IntervalIndex) class SetOperations(object): @@ -181,4 +181,16 @@ def time_get_loc(self): self.ind.get_loc(0) +class IntervalIndexMethod(object): + # GH 24813 + def setup(self): + N = 10**5 + left = np.append(np.arange(N), np.array(0)) + right = np.append(np.arange(1, N + 1), np.array(1)) + self.intv = IntervalIndex.from_arrays(left, right) + + def time_is_unique(self): + self.intv.is_unique + + from .pandas_vb_common import setup # noqa: F401 From fda328b7c390139e1ad671d2ba380906c3c95b8e Mon Sep 17 00:00:00 2001 From: makbigc Date: Sun, 24 Mar 2019 21:02:28 +0800 Subject: [PATCH 3/4] New IntervalIndex.is_unique --- pandas/core/indexes/interval.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 5d5b749c5ff6f..d9ee5d9433a5b 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -463,7 +463,13 @@ def is_unique(self): """ Return True if the IntervalIndex contains unique elements, else False """ - return len(self) == len(self.unique()) + left = self.values.left + right = self.values.right + for i in range(len(self)): + mask = (left[i] == left) & (right[i] == right) + if mask.sum() > 1: + return False + return True @cache_readonly @Appender(_interval_shared_docs['is_non_overlapping_monotonic'] From b382b17397851410b774146cc57983c3e480039c Mon Sep 17 00:00:00 2001 From: makbigc Date: Sun, 24 Mar 2019 21:18:21 +0800 Subject: [PATCH 4/4] Add whatsnew entry --- doc/source/whatsnew/v0.25.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index ccf5c43280765..8a7d681a8f81f 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -177,6 +177,7 @@ Performance Improvements - Improved performance of :meth:`pandas.core.groupby.GroupBy.quantile` (:issue:`20405`) - Improved performance of :meth:`read_csv` by faster tokenizing and faster parsing of small float numbers (:issue:`25784`) - Improved performance of :meth:`read_csv` by faster parsing of N/A and boolean values (:issue:`25804`) +- Improved performance of :meth:`IntervalIndex.is_unique` by removing conversion to `MultiIndex` (:issue:`24813`) .. _whatsnew_0250.bug_fixes: