diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index a6c92bf9faf9b..239c9b9fbe1c9 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -1065,6 +1065,7 @@ Indexing - Bug in ``Index`` subclasses constructors that ignore unexpected keyword arguments (:issue:`19348`) - Bug in :meth:`Index.difference` when taking difference of an ``Index`` with itself (:issue:`20040`) - Bug in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` in presence of entire rows of NaNs in the middle of values (:issue:`20499`). +- Bug in :class:`IntervalIndex` where some indexing operations were not supported for overlapping or non-monotonic ``uint64`` data (:issue:`20636`) MultiIndex ^^^^^^^^^^ diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in index b22e694c9fcca..9ed76242a95c3 100644 --- a/pandas/_libs/intervaltree.pxi.in +++ b/pandas/_libs/intervaltree.pxi.in @@ -5,7 +5,7 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in """ from numpy cimport ( - int64_t, int32_t, float64_t, float32_t, + int64_t, int32_t, float64_t, float32_t, uint64_t, ndarray, PyArray_ArgSort, NPY_QUICKSORT, PyArray_Take) import numpy as np @@ -24,6 +24,7 @@ ctypedef fused scalar_t: float32_t int64_t int32_t + uint64_t #---------------------------------------------------------------------- @@ -205,7 +206,7 @@ cdef sort_values_and_indices(all_values, all_indices, subset): {{py: nodes = [] -for dtype in ['float32', 'float64', 'int32', 'int64']: +for dtype in ['float32', 'float64', 'int32', 'int64', 'uint64']: for closed, cmp_left, cmp_right in [ ('left', '<=', '<'), ('right', '<', '<='), diff --git a/pandas/tests/indexes/interval/test_interval_tree.py b/pandas/tests/indexes/interval/test_interval_tree.py index 343131125f640..056d3e1087a2e 100644 --- a/pandas/tests/indexes/interval/test_interval_tree.py +++ b/pandas/tests/indexes/interval/test_interval_tree.py @@ -12,36 +12,37 @@ def closed(request): return request.param +@pytest.fixture( + scope='class', params=['int32', 'int64', 'float32', 'float64', 'uint64']) +def dtype(request): + return request.param + + +@pytest.fixture(scope='class') +def tree(dtype): + left = np.arange(5, dtype=dtype) + return IntervalTree(left, left + 2) + + class TestIntervalTree(object): - def setup_method(self, method): - def gentree(dtype): - left = np.arange(5, dtype=dtype) - right = left + 2 - return IntervalTree(left, right) - - self.tree = gentree('int64') - self.trees = {dtype: gentree(dtype) - for dtype in ['int32', 'int64', 'float32', 'float64']} - - def test_get_loc(self): - for dtype, tree in self.trees.items(): - tm.assert_numpy_array_equal(tree.get_loc(1), - np.array([0], dtype='int64')) - tm.assert_numpy_array_equal(np.sort(tree.get_loc(2)), - np.array([0, 1], dtype='int64')) - with pytest.raises(KeyError): - tree.get_loc(-1) - - def test_get_indexer(self): - for dtype, tree in self.trees.items(): - tm.assert_numpy_array_equal( - tree.get_indexer(np.array([1.0, 5.5, 6.5])), - np.array([0, 4, -1], dtype='int64')) - with pytest.raises(KeyError): - tree.get_indexer(np.array([3.0])) - - def test_get_indexer_non_unique(self): - indexer, missing = self.tree.get_indexer_non_unique( + + def test_get_loc(self, tree): + tm.assert_numpy_array_equal(tree.get_loc(1), + np.array([0], dtype='int64')) + tm.assert_numpy_array_equal(np.sort(tree.get_loc(2)), + np.array([0, 1], dtype='int64')) + with pytest.raises(KeyError): + tree.get_loc(-1) + + def test_get_indexer(self, tree): + tm.assert_numpy_array_equal( + tree.get_indexer(np.array([1.0, 5.5, 6.5])), + np.array([0, 4, -1], dtype='int64')) + with pytest.raises(KeyError): + tree.get_indexer(np.array([3.0])) + + def test_get_indexer_non_unique(self, tree): + indexer, missing = tree.get_indexer_non_unique( np.array([1.0, 2.0, 6.5])) tm.assert_numpy_array_equal(indexer[:1], np.array([0], dtype='int64')) @@ -51,8 +52,9 @@ def test_get_indexer_non_unique(self): np.array([-1], dtype='int64')) tm.assert_numpy_array_equal(missing, np.array([2], dtype='int64')) - def test_duplicates(self): - tree = IntervalTree([0, 0, 0], [1, 1, 1]) + def test_duplicates(self, dtype): + left = np.array([0, 0, 0], dtype=dtype) + tree = IntervalTree(left, left + 1) tm.assert_numpy_array_equal(np.sort(tree.get_loc(0.5)), np.array([0, 1, 2], dtype='int64'))