Skip to content

Commit 74f527f

Browse files
gfyoungjreback
authored andcommitted
BUG: Check integrity of sparse int indices
The check_integrity method of IntIndex in pandas.sparse was un- implemented despite having documentation. This PR implements the method and calls it when initializing `IntIndex`. xref <a href="https://github.com/pandas- dev/pandas/pull/15844#discussion_r108840154">#15844 (comment)</a> Author: gfyoung <[email protected]> Closes #15863 from gfyoung/sparse-pyx-refactor and squashes the following commits: f435d28 [gfyoung] BUG: Check integrity of sparse int indices
1 parent d1e1ba0 commit 74f527f

File tree

3 files changed

+80
-7
lines changed

3 files changed

+80
-7
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1020,6 +1020,7 @@ Sparse
10201020
- Bug in ``SparseSeries.reindex`` on single level with list of length 1 (:issue:`15447`)
10211021
- Bug in repr-formatting a ``SparseDataFrame`` after a value was set on (a copy of) one of its series (:issue:`15488`)
10221022
- Bug in ``SparseDataFrame`` construction with lists not coercing to dtype (:issue:`15682`)
1023+
- Bug in sparse array indexing in which indices were not being validated (:issue:`15863`)
10231024

10241025
Reshaping
10251026
^^^^^^^^^

pandas/sparse/sparse.pyx

+41-7
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,9 @@ cdef inline int int_min(int a, int b): return a if a <= b else b
3434

3535
cdef class SparseIndex:
3636
"""
37-
Abstract superclass for sparse index types
37+
Abstract superclass for sparse index types.
3838
"""
39+
3940
def __init__(self):
4041
raise NotImplementedError
4142

@@ -48,8 +49,9 @@ cdef class IntIndex(SparseIndex):
4849
----------
4950
length : integer
5051
indices : array-like
51-
Contains integers corresponding to
52+
Contains integers corresponding to the indices.
5253
"""
54+
5355
cdef readonly:
5456
Py_ssize_t length, npoints
5557
ndarray indices
@@ -59,9 +61,11 @@ cdef class IntIndex(SparseIndex):
5961
self.indices = np.ascontiguousarray(indices, dtype=np.int32)
6062
self.npoints = len(self.indices)
6163

64+
self.check_integrity()
65+
6266
def __reduce__(self):
6367
args = (self.length, self.indices)
64-
return (IntIndex, args)
68+
return IntIndex, args
6569

6670
def __repr__(self):
6771
output = 'IntIndex\n'
@@ -70,10 +74,40 @@ cdef class IntIndex(SparseIndex):
7074

7175
def check_integrity(self):
7276
"""
73-
Only need be strictly ascending and nothing less than 0 or greater than
74-
total length
77+
Checks the following:
78+
79+
- Indices are strictly ascending
80+
- Number of indices is at most self.length
81+
- Indices are at least 0 and at most the total length less one
82+
83+
A ValueError is raised if any of these conditions is violated.
7584
"""
76-
pass
85+
86+
cdef:
87+
int32_t index, prev = -1
88+
89+
if self.npoints > self.length:
90+
msg = ("Too many indices. Expected "
91+
"{exp} but found {act}").format(
92+
exp=self.length, act=self.npoints)
93+
raise ValueError(msg)
94+
95+
# Indices are vacuously ordered and non-negative
96+
# if the sequence of indices is empty.
97+
if self.npoints == 0:
98+
return
99+
100+
if min(self.indices) < 0:
101+
raise ValueError("No index can be less than zero")
102+
103+
if max(self.indices) >= self.length:
104+
raise ValueError("All indices must be less than the length")
105+
106+
for index in self.indices:
107+
if prev != -1 and index <= prev:
108+
raise ValueError("Indices must be strictly increasing")
109+
110+
prev = index
77111

78112
def equals(self, other):
79113
if not isinstance(other, IntIndex):
@@ -320,7 +354,7 @@ cdef class BlockIndex(SparseIndex):
320354

321355
def __reduce__(self):
322356
args = (self.length, self.blocs, self.blengths)
323-
return (BlockIndex, args)
357+
return BlockIndex, args
324358

325359
def __repr__(self):
326360
output = 'BlockIndex\n'

pandas/tests/sparse/test_libsparse.py

+38
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,44 @@ def test_to_block_index(self):
474474

475475
class TestIntIndex(tm.TestCase):
476476

477+
def test_check_integrity(self):
478+
479+
# Too many indices than specified in self.length
480+
msg = "Too many indices"
481+
482+
with tm.assertRaisesRegexp(ValueError, msg):
483+
IntIndex(length=1, indices=[1, 2, 3])
484+
485+
# No index can be negative.
486+
msg = "No index can be less than zero"
487+
488+
with tm.assertRaisesRegexp(ValueError, msg):
489+
IntIndex(length=5, indices=[1, -2, 3])
490+
491+
# No index can be negative.
492+
msg = "No index can be less than zero"
493+
494+
with tm.assertRaisesRegexp(ValueError, msg):
495+
IntIndex(length=5, indices=[1, -2, 3])
496+
497+
# All indices must be less than the length.
498+
msg = "All indices must be less than the length"
499+
500+
with tm.assertRaisesRegexp(ValueError, msg):
501+
IntIndex(length=5, indices=[1, 2, 5])
502+
503+
with tm.assertRaisesRegexp(ValueError, msg):
504+
IntIndex(length=5, indices=[1, 2, 6])
505+
506+
# Indices must be strictly ascending.
507+
msg = "Indices must be strictly increasing"
508+
509+
with tm.assertRaisesRegexp(ValueError, msg):
510+
IntIndex(length=5, indices=[1, 3, 2])
511+
512+
with tm.assertRaisesRegexp(ValueError, msg):
513+
IntIndex(length=5, indices=[1, 3, 3])
514+
477515
def test_int_internal(self):
478516
idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='integer')
479517
self.assertIsInstance(idx, IntIndex)

0 commit comments

Comments
 (0)