From 7cb99e162eecca14850aedb29b3d19cffdce5574 Mon Sep 17 00:00:00 2001 From: makbigc Date: Tue, 19 Mar 2019 23:09:54 +0800 Subject: [PATCH 01/10] Get rid of _multiindex --- pandas/core/indexes/interval.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index ffbed7ab2006d..7a6593e4fb990 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -449,6 +449,16 @@ def is_monotonic_increasing(self): Return True if the IntervalIndex is monotonic increasing (only equal or increasing values), else False """ + values = [self.right, self.left] + + try: + sort_order = np.lexsort(values) + return Index(sort_order).is_monotonic + except TypeError: + + # we have mixed types and np.lexsort is not happy + return Index(self.values).is_monotonic + return self._multiindex.is_monotonic_increasing @cache_readonly @@ -457,7 +467,7 @@ def is_monotonic_decreasing(self): Return True if the IntervalIndex is monotonic decreasing (only equal or decreasing values), else False """ - return self._multiindex.is_monotonic_decreasing + return self[::-1].is_monotonic_increasing @cache_readonly def is_unique(self): From 0c54e55e2898e69ff454409f0cace271b73e0713 Mon Sep 17 00:00:00 2001 From: makbigc Date: Tue, 19 Mar 2019 23:10:23 +0800 Subject: [PATCH 02/10] Add benchmark test --- asv_bench/benchmarks/index_object.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/index_object.py b/asv_bench/benchmarks/index_object.py index bbe164d4858ab..fbbedcdee3489 100644 --- a/asv_bench/benchmarks/index_object.py +++ b/asv_bench/benchmarks/index_object.py @@ -1,7 +1,7 @@ import numpy as np import pandas.util.testing as tm from pandas import (Series, date_range, DatetimeIndex, Index, RangeIndex, - Float64Index) + Float64Index, IntervalIndex) class SetOperations(object): @@ -181,4 +181,15 @@ def time_get_loc(self): self.ind.get_loc(0) +class IntervalIndexMethod(object): + # GH 24813 + def setup(self): + N = 10000 + left = np.append(np.arange(N), np.array(0)) + right = np.append(np.arange(1, N + 1), np.array(1)) + self.intv = IntervalIndex.from_arrays(left, right) + + def time_monotonic_inc(self): + self.intv.is_monotonic_increasing + from .pandas_vb_common import setup # noqa: F401 From e9f2601f957fa20fc4d2b78454bc5dc69dfa967d Mon Sep 17 00:00:00 2001 From: makbigc Date: Thu, 21 Mar 2019 19:04:46 +0800 Subject: [PATCH 03/10] Decrease test_monotonic_inc by one order and change is_monotonic --- asv_bench/benchmarks/index_object.py | 3 ++- pandas/core/indexes/interval.py | 4 +--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/asv_bench/benchmarks/index_object.py b/asv_bench/benchmarks/index_object.py index fbbedcdee3489..8860dce950ba9 100644 --- a/asv_bench/benchmarks/index_object.py +++ b/asv_bench/benchmarks/index_object.py @@ -184,7 +184,7 @@ def time_get_loc(self): class IntervalIndexMethod(object): # GH 24813 def setup(self): - N = 10000 + N = 10**5 left = np.append(np.arange(N), np.array(0)) right = np.append(np.arange(1, N + 1), np.array(1)) self.intv = IntervalIndex.from_arrays(left, right) @@ -192,4 +192,5 @@ def setup(self): def time_monotonic_inc(self): self.intv.is_monotonic_increasing + from .pandas_vb_common import setup # noqa: F401 diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 7a6593e4fb990..5498cb0621c18 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -441,7 +441,7 @@ def is_monotonic(self): Return True if the IntervalIndex is monotonic increasing (only equal or increasing values), else False """ - return self._multiindex.is_monotonic + return self.is_monotonic_increasing @cache_readonly def is_monotonic_increasing(self): @@ -459,8 +459,6 @@ def is_monotonic_increasing(self): # we have mixed types and np.lexsort is not happy return Index(self.values).is_monotonic - return self._multiindex.is_monotonic_increasing - @cache_readonly def is_monotonic_decreasing(self): """ From 8c5c580e236dcbf2d8fabc7372f4371a90e7273a Mon Sep 17 00:00:00 2001 From: makbigc Date: Sun, 24 Mar 2019 17:22:29 +0800 Subject: [PATCH 04/10] Remove the exception for the mixed type --- pandas/core/indexes/interval.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 5498cb0621c18..038ed2cabfa4b 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -451,13 +451,8 @@ def is_monotonic_increasing(self): """ values = [self.right, self.left] - try: - sort_order = np.lexsort(values) - return Index(sort_order).is_monotonic - except TypeError: - - # we have mixed types and np.lexsort is not happy - return Index(self.values).is_monotonic + sort_order = np.lexsort(values) + return Index(sort_order).is_monotonic @cache_readonly def is_monotonic_decreasing(self): From 543b0d38f7447e02ab498d94d8a290c32b002e90 Mon Sep 17 00:00:00 2001 From: makbigc Date: Sat, 6 Apr 2019 14:12:18 +0800 Subject: [PATCH 05/10] Add a whatsnew note --- doc/source/whatsnew/v0.25.0.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index da712f84eb1b5..629d2cf592225 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -248,7 +248,8 @@ Performance Improvements - Improved performance of :meth:`pandas.core.groupby.GroupBy.quantile` (:issue:`20405`) - Improved performance of :meth:`read_csv` by faster tokenizing and faster parsing of small float numbers (:issue:`25784`) - Improved performance of :meth:`read_csv` by faster parsing of N/A and boolean values (:issue:`25804`) - +- Imporved performance of :meth:`IntervalIndex.is_monotonic`, :meth:`IntervalIndex.is_monotonic_increasing` and :meth:`IntervalIndex.is_monotonic_decreasing` by removing conversion to :class:`MultiIndex` (:issue:`24813`) + .. _whatsnew_0250.bug_fixes: Bug Fixes From 0bba60204bd237361f7ab465e2ccb37ce4510cf1 Mon Sep 17 00:00:00 2001 From: makbigc Date: Sat, 6 Apr 2019 14:24:35 +0800 Subject: [PATCH 06/10] Use algos.is_monotonic instead of Index instant --- pandas/core/indexes/interval.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 038ed2cabfa4b..571990fe413a7 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -7,6 +7,7 @@ from pandas._config import get_option from pandas._libs import Timedelta, Timestamp +from pandas._libs.algos import is_monotonic from pandas._libs.interval import Interval, IntervalMixin, IntervalTree from pandas.compat import add_metaclass from pandas.util._decorators import Appender, cache_readonly @@ -452,7 +453,7 @@ def is_monotonic_increasing(self): values = [self.right, self.left] sort_order = np.lexsort(values) - return Index(sort_order).is_monotonic + return is_monotonic(sort_order, False)[0] @cache_readonly def is_monotonic_decreasing(self): From ceb91f38cd112ef2c72da114157b26efe0e18afe Mon Sep 17 00:00:00 2001 From: makbigc Date: Sat, 6 Apr 2019 14:52:35 +0800 Subject: [PATCH 07/10] Add one more bench --- asv_bench/benchmarks/index_object.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/asv_bench/benchmarks/index_object.py b/asv_bench/benchmarks/index_object.py index 8860dce950ba9..24018d150497e 100644 --- a/asv_bench/benchmarks/index_object.py +++ b/asv_bench/benchmarks/index_object.py @@ -183,13 +183,14 @@ def time_get_loc(self): class IntervalIndexMethod(object): # GH 24813 - def setup(self): - N = 10**5 + params = [10**3, 10**5] + + def setup(self, N): left = np.append(np.arange(N), np.array(0)) right = np.append(np.arange(1, N + 1), np.array(1)) self.intv = IntervalIndex.from_arrays(left, right) - def time_monotonic_inc(self): + def time_monotonic_inc(self, N): self.intv.is_monotonic_increasing From 2e6fa2890b36c17fadea7493878a3b5c090a1caa Mon Sep 17 00:00:00 2001 From: makbigc Date: Sun, 14 Apr 2019 22:50:37 +0800 Subject: [PATCH 08/10] Move is_monotonic_inc to IntervalTree --- pandas/_libs/intervaltree.pxi.in | 13 +++++++++++++ pandas/core/indexes/interval.py | 6 +----- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in index 196841f35ed8d..ac713a928973f 100644 --- a/pandas/_libs/intervaltree.pxi.in +++ b/pandas/_libs/intervaltree.pxi.in @@ -4,6 +4,8 @@ Template for intervaltree WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in """ +from pandas._libs.algos import is_monotonic + ctypedef fused scalar_t: float64_t float32_t @@ -101,6 +103,17 @@ cdef class IntervalTree(IntervalMixin): return self._is_overlapping + @property + def is_monotonic_increasing(self): + """ + Return True if the IntervalTree is monotonic increasing (only equal or + increasing values), else False + """ + values = [self.right, self.left] + + sort_order = np.lexsort(values) + return is_monotonic(sort_order, False)[0] + def get_loc(self, scalar_t key): """Return all positions corresponding to intervals that overlap with the given scalar key diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 571990fe413a7..14c8ec6150dbd 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -7,7 +7,6 @@ from pandas._config import get_option from pandas._libs import Timedelta, Timestamp -from pandas._libs.algos import is_monotonic from pandas._libs.interval import Interval, IntervalMixin, IntervalTree from pandas.compat import add_metaclass from pandas.util._decorators import Appender, cache_readonly @@ -450,10 +449,7 @@ def is_monotonic_increasing(self): Return True if the IntervalIndex is monotonic increasing (only equal or increasing values), else False """ - values = [self.right, self.left] - - sort_order = np.lexsort(values) - return is_monotonic(sort_order, False)[0] + return self._engine.is_monotonic_increasing @cache_readonly def is_monotonic_decreasing(self): From 34ad0f291f4fe58f6dbc2564147da591ae1a09fc Mon Sep 17 00:00:00 2001 From: makbigc Date: Sun, 14 Apr 2019 23:14:57 +0800 Subject: [PATCH 09/10] Run _engine method in setup --- asv_bench/benchmarks/index_object.py | 1 + 1 file changed, 1 insertion(+) diff --git a/asv_bench/benchmarks/index_object.py b/asv_bench/benchmarks/index_object.py index 24018d150497e..c6b5d8912b4de 100644 --- a/asv_bench/benchmarks/index_object.py +++ b/asv_bench/benchmarks/index_object.py @@ -189,6 +189,7 @@ def setup(self, N): left = np.append(np.arange(N), np.array(0)) right = np.append(np.arange(1, N + 1), np.array(1)) self.intv = IntervalIndex.from_arrays(left, right) + self.intv._engine def time_monotonic_inc(self, N): self.intv.is_monotonic_increasing From 57f8404da255f261d1b2bee098911ae3e9aadd56 Mon Sep 17 00:00:00 2001 From: makbigc Date: Mon, 15 Apr 2019 20:56:58 +0800 Subject: [PATCH 10/10] Remove trailing space --- doc/source/whatsnew/v0.25.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 629d2cf592225..7d80f2b5793ad 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -249,7 +249,7 @@ Performance Improvements - Improved performance of :meth:`read_csv` by faster tokenizing and faster parsing of small float numbers (:issue:`25784`) - Improved performance of :meth:`read_csv` by faster parsing of N/A and boolean values (:issue:`25804`) - Imporved performance of :meth:`IntervalIndex.is_monotonic`, :meth:`IntervalIndex.is_monotonic_increasing` and :meth:`IntervalIndex.is_monotonic_decreasing` by removing conversion to :class:`MultiIndex` (:issue:`24813`) - + .. _whatsnew_0250.bug_fixes: Bug Fixes