From b72147e4544a2acea9dad673a0bccadd31454182 Mon Sep 17 00:00:00 2001 From: jschendel Date: Thu, 17 Aug 2017 12:35:38 -0600 Subject: [PATCH 1/6] FIX: Index._searchsorted_monotonic for decreasing indexes Fixed an issue where Index._searchsorted_monotonic(..., side='right') returns the left side position for monotonic decreasing indexes. Issue had a downstream effect on scalar lookups in IntervalIndex as well. --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/indexes/base.py | 2 +- pandas/tests/indexes/test_base.py | 21 +++++++++++++++++++++ pandas/tests/indexing/test_interval.py | 9 +++++++++ 4 files changed, 32 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 636bb2dc3e60e..0f67b58c678a7 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -418,6 +418,7 @@ Indexing - Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`) - Bug in ``CategoricalIndex`` reindexing in which specified indices containing duplicates were not being respected (:issue:`17323`) - Bug in intersection of ``RangeIndex`` with negative step (:issue:`17296`) +- Bug in ``IntervalIndex`` where performing a scalar lookup fails for included right endpoints of non-overlapping monotonic decreasing indexes (:issue:`16417`, :issue:`17271`) I/O ^^^ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a9098126a38e3..ef5f68936044a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3465,7 +3465,7 @@ def _searchsorted_monotonic(self, label, side='left'): # everything for it to work (element ordering, search side and # resulting value). pos = self[::-1].searchsorted(label, side='right' if side == 'left' - else 'right') + else 'left') return len(self) - pos raise ValueError('index must be monotonic increasing or decreasing') diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index f96dbdcfb8acf..d1bb0d9a9666a 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -2119,6 +2119,27 @@ def test_intersect_str_dates(self): assert len(res) == 0 + def test_searchsorted_monotonic(self): + # GH17271 + idx_inc = Index([0, 2, 4]) + idx_dec = Index([4, 2, 0]) + + # test included value. + assert idx_inc._searchsorted_monotonic(0, side='left') == 0 + assert idx_inc._searchsorted_monotonic(0, side='right') == 1 + assert idx_dec._searchsorted_monotonic(0, side='left') == 2 + assert idx_dec._searchsorted_monotonic(0, side='right') == 3 + + # test non-included value. + for side in ('left', 'right'): + assert idx_inc._searchsorted_monotonic(1, side=side) == 1 + assert idx_dec._searchsorted_monotonic(1, side=side) == 2 + + # non-monotonic should raise. + idx_bad = Index([0, 4, 2]) + with pytest.raises(ValueError): + idx_bad._searchsorted_monotonic(3) + class TestIndexUtils(object): diff --git a/pandas/tests/indexing/test_interval.py b/pandas/tests/indexing/test_interval.py index be6e5e1cffb2e..f6c67d8b12b7e 100644 --- a/pandas/tests/indexing/test_interval.py +++ b/pandas/tests/indexing/test_interval.py @@ -11,6 +11,9 @@ class TestIntervalIndex(object): def setup_method(self, method): self.s = Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6))) + idx_dec = IntervalIndex.from_tuples([(2, 3), (1, 2), (0, 1)]) + self.s_dec = Series(list('abc'), idx_dec) + def test_loc_with_scalar(self): s = self.s @@ -39,6 +42,9 @@ def test_loc_with_scalar(self): expected = s.iloc[2:5] tm.assert_series_equal(expected, s.loc[s >= 2]) + # test endpoint of non-overlapping monotonic decreasing (GH16417) + assert self.s_dec.loc[3] == 'a' + def test_getitem_with_scalar(self): s = self.s @@ -67,6 +73,9 @@ def test_getitem_with_scalar(self): expected = s.iloc[2:5] tm.assert_series_equal(expected, s[s >= 2]) + # test endpoint of non-overlapping monotonic decreasing (GH16417) + assert self.s_dec[3] == 'a' + def test_with_interval(self): s = self.s From 2125c43e781ea1104de19c3c36a1d5f6fc956e8a Mon Sep 17 00:00:00 2001 From: jschendel Date: Sun, 20 Aug 2017 19:45:08 -0600 Subject: [PATCH 2/6] Review related changes --- pandas/tests/indexes/common.py | 23 ++++++- .../indexes/datetimes/test_datetimelike.py | 4 +- pandas/tests/indexes/period/test_period.py | 4 +- pandas/tests/indexes/test_base.py | 21 ------- pandas/tests/indexes/test_numeric.py | 12 ++-- pandas/tests/indexing/test_interval.py | 63 ++++++++++--------- 6 files changed, 71 insertions(+), 56 deletions(-) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 1fdc08d68eb26..e9a4feb0d5c9b 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -632,7 +632,7 @@ def test_difference_base(self): pass elif isinstance(idx, (DatetimeIndex, TimedeltaIndex)): assert result.__class__ == answer.__class__ - tm.assert_numpy_array_equal(result.asi8, answer.asi8) + assert tm.equalContents(result.asi8, answer.asi8) else: result = first.difference(case) assert tm.equalContents(result, answer) @@ -954,3 +954,24 @@ def test_join_self_unique(self, how): if index.is_unique: joined = index.join(index, how=how) assert (index == joined).all() + + def test_searchsorted_monotonic(self): + # GH17271 + for index in self.indices.values(): + # not implemented for tuple searches in MultiIndex + # or Intervals searches in IntervalIndex + if isinstance(index, (MultiIndex, IntervalIndex)): + continue + + # nothing to test if the index is empty + if index.empty: + continue + value = index[0] + + if index.is_monotonic_increasing or index.is_monotonic_decreasing: + assert index._searchsorted_monotonic(value, side='left') == 0 + assert index._searchsorted_monotonic(value, side='right') == 1 + else: + # non-monotonic should raise. + with pytest.raises(ValueError): + index._searchsorted_monotonic(value, side='left') diff --git a/pandas/tests/indexes/datetimes/test_datetimelike.py b/pandas/tests/indexes/datetimes/test_datetimelike.py index 3b970ee382521..538e10e6011ec 100644 --- a/pandas/tests/indexes/datetimes/test_datetimelike.py +++ b/pandas/tests/indexes/datetimes/test_datetimelike.py @@ -12,7 +12,9 @@ class TestDatetimeIndex(DatetimeLike): _holder = DatetimeIndex def setup_method(self, method): - self.indices = dict(index=tm.makeDateIndex(10)) + self.indices = dict(index=tm.makeDateIndex(10), + index_dec=date_range('20130110', periods=10, + freq='-1D')) self.setup_indices() def create_index(self): diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index e24e2ad936e2c..51f7d13cb0638 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -18,7 +18,9 @@ class TestPeriodIndex(DatetimeLike): _multiprocess_can_split_ = True def setup_method(self, method): - self.indices = dict(index=tm.makePeriodIndex(10)) + self.indices = dict(index=tm.makePeriodIndex(10), + index_dec=period_range('20130101', periods=10, + freq='D')[::-1]) self.setup_indices() def create_index(self): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index d1bb0d9a9666a..f96dbdcfb8acf 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -2119,27 +2119,6 @@ def test_intersect_str_dates(self): assert len(res) == 0 - def test_searchsorted_monotonic(self): - # GH17271 - idx_inc = Index([0, 2, 4]) - idx_dec = Index([4, 2, 0]) - - # test included value. - assert idx_inc._searchsorted_monotonic(0, side='left') == 0 - assert idx_inc._searchsorted_monotonic(0, side='right') == 1 - assert idx_dec._searchsorted_monotonic(0, side='left') == 2 - assert idx_dec._searchsorted_monotonic(0, side='right') == 3 - - # test non-included value. - for side in ('left', 'right'): - assert idx_inc._searchsorted_monotonic(1, side=side) == 1 - assert idx_dec._searchsorted_monotonic(1, side=side) == 2 - - # non-monotonic should raise. - idx_bad = Index([0, 4, 2]) - with pytest.raises(ValueError): - idx_bad._searchsorted_monotonic(3) - class TestIndexUtils(object): diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 1a0a38c173284..7e7e10e4aeabe 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -181,7 +181,9 @@ class TestFloat64Index(Numeric): def setup_method(self, method): self.indices = dict(mixed=Float64Index([1.5, 2, 3, 4, 5]), - float=Float64Index(np.arange(5) * 2.5)) + float=Float64Index(np.arange(5) * 2.5), + mixed_dec=Float64Index([5, 4, 3, 2, 1.5]), + float_dec=Float64Index(np.arange(4, -1, -1) * 2.5)) self.setup_indices() def create_index(self): @@ -654,7 +656,8 @@ class TestInt64Index(NumericInt): _holder = Int64Index def setup_method(self, method): - self.indices = dict(index=Int64Index(np.arange(0, 20, 2))) + self.indices = dict(index=Int64Index(np.arange(0, 20, 2)), + index_dec=Int64Index(np.arange(19, -1, -1))) self.setup_indices() def create_index(self): @@ -949,8 +952,9 @@ class TestUInt64Index(NumericInt): _holder = UInt64Index def setup_method(self, method): - self.indices = dict(index=UInt64Index([2**63, 2**63 + 10, 2**63 + 15, - 2**63 + 20, 2**63 + 25])) + vals = [2**63, 2**63 + 10, 2**63 + 15, 2**63 + 20, 2**63 + 25] + self.indices = dict(index=UInt64Index(vals), + index_dec=UInt64Index(reversed(vals))) self.setup_indices() def create_index(self): diff --git a/pandas/tests/indexing/test_interval.py b/pandas/tests/indexing/test_interval.py index f6c67d8b12b7e..31a94abcd99a5 100644 --- a/pandas/tests/indexing/test_interval.py +++ b/pandas/tests/indexing/test_interval.py @@ -3,6 +3,7 @@ import pandas as pd from pandas import Series, DataFrame, IntervalIndex, Interval +from pandas.compat import product import pandas.util.testing as tm @@ -11,22 +12,9 @@ class TestIntervalIndex(object): def setup_method(self, method): self.s = Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6))) - idx_dec = IntervalIndex.from_tuples([(2, 3), (1, 2), (0, 1)]) - self.s_dec = Series(list('abc'), idx_dec) - def test_loc_with_scalar(self): s = self.s - expected = 0 - - result = s.loc[0.5] - assert result == expected - - result = s.loc[1] - assert result == expected - - with pytest.raises(KeyError): - s.loc[0] expected = s.iloc[:3] tm.assert_series_equal(expected, s.loc[:3]) @@ -42,22 +30,9 @@ def test_loc_with_scalar(self): expected = s.iloc[2:5] tm.assert_series_equal(expected, s.loc[s >= 2]) - # test endpoint of non-overlapping monotonic decreasing (GH16417) - assert self.s_dec.loc[3] == 'a' - def test_getitem_with_scalar(self): s = self.s - expected = 0 - - result = s[0.5] - assert result == expected - - result = s[1] - assert result == expected - - with pytest.raises(KeyError): - s[0] expected = s.iloc[:3] tm.assert_series_equal(expected, s[:3]) @@ -73,8 +48,40 @@ def test_getitem_with_scalar(self): expected = s.iloc[2:5] tm.assert_series_equal(expected, s[s >= 2]) - # test endpoint of non-overlapping monotonic decreasing (GH16417) - assert self.s_dec[3] == 'a' + @pytest.mark.parametrize('direction, closed', + product(('increasing', 'decreasing'), + ('left', 'right', 'neither', 'both'))) + def test_nonoverlapping_monotonic(self, direction, closed): + tpls = [(0, 1), (2, 3), (4, 5)] + if direction == 'decreasing': + tpls = reversed(tpls) + + idx = IntervalIndex.from_tuples(tpls, closed=closed) + s = Series(list('abc'), idx) + + for key, expected in zip(idx.left, s): + if idx.closed_left: + assert s[key] == expected + assert s.loc[key] == expected + else: + with pytest.raises(KeyError): + s[key] + with pytest.raises(KeyError): + s.loc[key] + + for key, expected in zip(idx.right, s): + if idx.closed_right: + assert s[key] == expected + assert s.loc[key] == expected + else: + with pytest.raises(KeyError): + s[key] + with pytest.raises(KeyError): + s.loc[key] + + for key, expected in zip(idx.mid, s): + assert s[key] == expected + assert s.loc[key] == expected def test_with_interval(self): From b857acaa3f05a531e8f1483f3bf6ee68c75f40e7 Mon Sep 17 00:00:00 2001 From: jschendel Date: Sun, 20 Aug 2017 20:15:56 -0600 Subject: [PATCH 3/6] Additional review changes --- pandas/tests/indexes/common.py | 5 ++++- pandas/tests/indexes/test_range.py | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index e9a4feb0d5c9b..393d09ee54238 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -551,6 +551,8 @@ def test_setops_errorcases(self): "or array-like", method, case) + @pytest.mark.xfail(reason='intersection fails for monotonic decreasing ' + 'RangeIndex (GH 17296)') def test_intersection_base(self): for name, idx in compat.iteritems(self.indices): first = idx[:5] @@ -632,7 +634,8 @@ def test_difference_base(self): pass elif isinstance(idx, (DatetimeIndex, TimedeltaIndex)): assert result.__class__ == answer.__class__ - assert tm.equalContents(result.asi8, answer.asi8) + tm.assert_numpy_array_equal(result.sort_values().asi8, + answer.sort_values().asi8) else: result = first.difference(case) assert tm.equalContents(result, answer) diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 06c8f0ee392c7..d206c36ee51c9 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -25,7 +25,8 @@ class TestRangeIndex(Numeric): _compat_props = ['shape', 'ndim', 'size', 'itemsize'] def setup_method(self, method): - self.indices = dict(index=RangeIndex(0, 20, 2, name='foo')) + self.indices = dict(index=RangeIndex(0, 20, 2, name='foo'), + index_dec=RangeIndex(18, -1, -2, name='bar')) self.setup_indices() def create_index(self): From 783f38f60106d5b48860e877744abcd5d4e1b0ad Mon Sep 17 00:00:00 2001 From: jschendel Date: Thu, 24 Aug 2017 22:24:10 -0600 Subject: [PATCH 4/6] Test searchsorted and misc changes --- pandas/tests/indexes/common.py | 41 +++++++++++++++++++++++++------ pandas/tests/indexes/test_base.py | 3 ++- 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 393d09ee54238..aa25b237c165d 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -138,9 +138,10 @@ def test_get_indexer_consistency(self): if isinstance(index, IntervalIndex): continue - indexer = index.get_indexer(index[0:2]) - assert isinstance(indexer, np.ndarray) - assert indexer.dtype == np.intp + if index.is_unique: + indexer = index.get_indexer(index[0:2]) + assert isinstance(indexer, np.ndarray) + assert indexer.dtype == np.intp indexer, _ = index.get_indexer_non_unique(index[0:2]) assert isinstance(indexer, np.ndarray) @@ -551,8 +552,6 @@ def test_setops_errorcases(self): "or array-like", method, case) - @pytest.mark.xfail(reason='intersection fails for monotonic decreasing ' - 'RangeIndex (GH 17296)') def test_intersection_base(self): for name, idx in compat.iteritems(self.indices): first = idx[:5] @@ -561,6 +560,9 @@ def test_intersection_base(self): if isinstance(idx, CategoricalIndex): pass + elif isinstance(idx, RangeIndex): + pytest.xfail(reason='intersection fails for decreasing ' + 'RangeIndex (GH 17296)') else: assert tm.equalContents(intersect, second) @@ -971,9 +973,32 @@ def test_searchsorted_monotonic(self): continue value = index[0] - if index.is_monotonic_increasing or index.is_monotonic_decreasing: - assert index._searchsorted_monotonic(value, side='left') == 0 - assert index._searchsorted_monotonic(value, side='right') == 1 + # determine the expected results (handle dupes for 'right') + expected_left, expected_right = 0, (index == value).argmin() + if expected_right == 0: + # all values are the same, expected_right should be length + expected_right = len(index) + + # test _searchsorted_monotonic in all cases + # test searchsorted only for increasing + if index.is_monotonic_increasing: + ssm_left = index._searchsorted_monotonic(value, side='left') + assert expected_left == ssm_left + + ssm_right = index._searchsorted_monotonic(value, side='right') + assert expected_right == ssm_right + + ss_left = index.searchsorted(value, side='left') + assert expected_left == ss_left + + ss_right = index.searchsorted(value, side='right') + assert expected_right == ss_right + elif index.is_monotonic_decreasing: + ssm_left = index._searchsorted_monotonic(value, side='left') + assert expected_left == ssm_left + + ssm_right = index._searchsorted_monotonic(value, side='right') + assert expected_right == ssm_right else: # non-monotonic should raise. with pytest.raises(ValueError): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index f96dbdcfb8acf..d69fbbcdf4bf6 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -46,7 +46,8 @@ def setup_method(self, method): catIndex=tm.makeCategoricalIndex(100), empty=Index([]), tuples=MultiIndex.from_tuples(lzip( - ['foo', 'bar', 'baz'], [1, 2, 3]))) + ['foo', 'bar', 'baz'], [1, 2, 3])), + repeats=Index([0, 0, 1, 1, 2, 2])) self.setup_indices() def create_index(self): From e078f4c9fcb5ff2a95510e56e4bcb68587763ce0 Mon Sep 17 00:00:00 2001 From: jschendel Date: Fri, 25 Aug 2017 12:51:59 -0600 Subject: [PATCH 5/6] Verify non-unique index --- pandas/tests/indexes/common.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index aa25b237c165d..8b9ea3699770c 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -11,6 +11,7 @@ RangeIndex, MultiIndex, CategoricalIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex, IntervalIndex, notna, isna) +from pandas.core.indexes.base import InvalidIndexError from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin from pandas.core.dtypes.common import needs_i8_conversion from pandas._libs.tslib import iNaT @@ -138,10 +139,14 @@ def test_get_indexer_consistency(self): if isinstance(index, IntervalIndex): continue - if index.is_unique: + if index.is_unique or isinstance(index, CategoricalIndex): indexer = index.get_indexer(index[0:2]) assert isinstance(indexer, np.ndarray) assert indexer.dtype == np.intp + else: + e = "Reindexing only valid with uniquely valued Index objects" + with tm.assert_raises_regex(InvalidIndexError, e): + indexer = index.get_indexer(index[0:2]) indexer, _ = index.get_indexer_non_unique(index[0:2]) assert isinstance(indexer, np.ndarray) From 41e59d7f9947518f85dc2c38ac9da7ac6eaec64b Mon Sep 17 00:00:00 2001 From: jschendel Date: Wed, 6 Sep 2017 20:36:12 -0600 Subject: [PATCH 6/6] Rebase and remove xfail Removed xfail, as the issue impacting the failing test has been resolved. Rebase to address merge conflicts. --- pandas/tests/indexes/common.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 8b9ea3699770c..90618cd6e235f 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -565,9 +565,6 @@ def test_intersection_base(self): if isinstance(idx, CategoricalIndex): pass - elif isinstance(idx, RangeIndex): - pytest.xfail(reason='intersection fails for decreasing ' - 'RangeIndex (GH 17296)') else: assert tm.equalContents(intersect, second)