Skip to content

Commit acf5933

Browse files
committed
PERF: Sparse ops speedup
1 parent 6afd5d2 commit acf5933

File tree

4 files changed

+162
-54
lines changed

4 files changed

+162
-54
lines changed

asv_bench/benchmarks/sparse.py

+49-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def time_sparse_series_to_coo(self):
5555
self.ss.to_coo(row_levels=[0, 1], column_levels=[2, 3], sort_labels=True)
5656

5757

58-
class sparse_arithmetic(object):
58+
class sparse_arithmetic_int(object):
5959
goal_time = 0.2
6060

6161
def setup(self):
@@ -75,6 +75,12 @@ def make_sparse_array(self, length, dense_size, fill_value):
7575
arr[indexer] = np.random.randint(0, 100, len(indexer))
7676
return pd.SparseArray(arr, fill_value=fill_value)
7777

78+
def time_sparse_make_union(self):
79+
self.a_10percent.sp_index.make_union(self.b_10percent.sp_index)
80+
81+
def time_sparse_intersect(self):
82+
self.a_10percent.sp_index.intersect(self.b_10percent.sp_index)
83+
7884
def time_sparse_addition_10percent(self):
7985
self.a_10percent + self.b_10percent
8086

@@ -92,3 +98,45 @@ def time_sparse_division_10percent_zero(self):
9298

9399
def time_sparse_division_1percent(self):
94100
self.a_1percent / self.b_1percent
101+
102+
103+
104+
class sparse_arithmetic_block(object):
105+
goal_time = 0.2
106+
107+
def setup(self):
108+
np.random.seed(1)
109+
self.a = self.make_sparse_array(length=1000000, num_blocks=1000,
110+
block_size=10, fill_value=np.nan)
111+
self.b = self.make_sparse_array(length=1000000, num_blocks=1000,
112+
block_size=10, fill_value=np.nan)
113+
114+
self.a_zero = self.make_sparse_array(length=1000000, num_blocks=1000,
115+
block_size=10, fill_value=0)
116+
self.b_zero = self.make_sparse_array(length=1000000, num_blocks=1000,
117+
block_size=10, fill_value=np.nan)
118+
119+
def make_sparse_array(self, length, num_blocks, block_size, fill_value):
120+
a = np.array([fill_value] * length)
121+
for block in range(num_blocks):
122+
i = np.random.randint(0, length)
123+
a[i:i + block_size] = np.random.randint(0, 100, len(a[i:i + block_size]))
124+
return pd.SparseArray(a, fill_value=fill_value)
125+
126+
def time_sparse_make_union(self):
127+
self.a.sp_index.make_union(self.b.sp_index)
128+
129+
def time_sparse_intersect(self):
130+
self.a.sp_index.intersect(self.b.sp_index)
131+
132+
def time_sparse_addition(self):
133+
self.a + self.b
134+
135+
def time_sparse_addition_zero(self):
136+
self.a_zero + self.b_zero
137+
138+
def time_sparse_division(self):
139+
self.a / self.b
140+
141+
def time_sparse_division_zero(self):
142+
self.a_zero / self.b_zero

doc/source/whatsnew/v0.18.2.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,8 @@ Deprecations
6666
Performance Improvements
6767
~~~~~~~~~~~~~~~~~~~~~~~~
6868

69-
69+
- Improved performance of sparse ``IntIndex.intersect`` (:issue:`13082`)
70+
- Improved performance of sparse arithmetic with ``BlockIndex`` when the number of blocks are large, though recommended to use ``IntIndex`` in such case (:issue:`13082`)
7071

7172

7273

pandas/sparse/tests/test_libsparse.py

+62-26
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,57 @@ def test_intindex_make_union(self):
186186
a.make_union(b)
187187

188188

189+
class TestSparseIndexIntersect(tm.TestCase):
190+
191+
def test_intersect(self):
192+
def _check_correct(a, b, expected):
193+
result = a.intersect(b)
194+
assert (result.equals(expected))
195+
196+
def _check_length_exc(a, longer):
197+
nose.tools.assert_raises(Exception, a.intersect, longer)
198+
199+
def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
200+
xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
201+
yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
202+
expected = BlockIndex(TEST_LENGTH, eloc, elen)
203+
longer_index = BlockIndex(TEST_LENGTH + 1, yloc, ylen)
204+
205+
_check_correct(xindex, yindex, expected)
206+
_check_correct(xindex.to_int_index(), yindex.to_int_index(),
207+
expected.to_int_index())
208+
209+
_check_length_exc(xindex, longer_index)
210+
_check_length_exc(xindex.to_int_index(),
211+
longer_index.to_int_index())
212+
213+
if compat.is_platform_windows():
214+
raise nose.SkipTest("segfaults on win-64 when all tests are run")
215+
check_cases(_check_case)
216+
217+
def test_intersect_empty(self):
218+
xindex = IntIndex(4, np.array([], dtype=np.int32))
219+
yindex = IntIndex(4, np.array([2, 3], dtype=np.int32))
220+
self.assertTrue(xindex.intersect(yindex).equals(xindex))
221+
self.assertTrue(yindex.intersect(xindex).equals(xindex))
222+
223+
xindex = xindex.to_block_index()
224+
yindex = yindex.to_block_index()
225+
self.assertTrue(xindex.intersect(yindex).equals(xindex))
226+
self.assertTrue(yindex.intersect(xindex).equals(xindex))
227+
228+
def test_intersect_identical(self):
229+
cases = [IntIndex(5, np.array([1, 2], dtype=np.int32)),
230+
IntIndex(5, np.array([0, 2, 4], dtype=np.int32)),
231+
IntIndex(0, np.array([], dtype=np.int32)),
232+
IntIndex(5, np.array([], dtype=np.int32))]
233+
234+
for case in cases:
235+
self.assertTrue(case.intersect(case).equals(case))
236+
case = case.to_block_index()
237+
self.assertTrue(case.intersect(case).equals(case))
238+
239+
189240
class TestSparseIndexCommon(tm.TestCase):
190241

191242
_multiprocess_can_split_ = True
@@ -281,32 +332,6 @@ def _check(index):
281332
# corner cases
282333

283334

284-
def test_intersect():
285-
def _check_correct(a, b, expected):
286-
result = a.intersect(b)
287-
assert (result.equals(expected))
288-
289-
def _check_length_exc(a, longer):
290-
nose.tools.assert_raises(Exception, a.intersect, longer)
291-
292-
def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
293-
xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
294-
yindex = BlockIndex(TEST_LENGTH, yloc, ylen)
295-
expected = BlockIndex(TEST_LENGTH, eloc, elen)
296-
longer_index = BlockIndex(TEST_LENGTH + 1, yloc, ylen)
297-
298-
_check_correct(xindex, yindex, expected)
299-
_check_correct(xindex.to_int_index(), yindex.to_int_index(),
300-
expected.to_int_index())
301-
302-
_check_length_exc(xindex, longer_index)
303-
_check_length_exc(xindex.to_int_index(), longer_index.to_int_index())
304-
305-
if compat.is_platform_windows():
306-
raise nose.SkipTest("segfaults on win-64 when all tests are run")
307-
check_cases(_check_case)
308-
309-
310335
class TestBlockIndex(tm.TestCase):
311336

312337
_multiprocess_can_split_ = True
@@ -345,6 +370,16 @@ def test_block_internal(self):
345370
tm.assert_numpy_array_equal(idx.blengths,
346371
np.array([1, 2], dtype=np.int32))
347372

373+
def test_make_block_boundary(self):
374+
for i in [5, 10, 100, 101]:
375+
idx = _make_index(i, np.arange(0, i, 2, dtype=np.int32),
376+
kind='block')
377+
378+
exp = np.arange(0, i, 2, dtype=np.int32)
379+
tm.assert_numpy_array_equal(idx.blocs, exp)
380+
tm.assert_numpy_array_equal(idx.blengths,
381+
np.ones(len(exp), dtype=np.int32))
382+
348383
def test_equals(self):
349384
index = BlockIndex(10, [0, 4], [2, 5])
350385

@@ -413,6 +448,7 @@ def test_equals(self):
413448
self.assertFalse(index.equals(IntIndex(10, [0, 1, 2, 3])))
414449

415450
def test_to_block_index(self):
451+
416452
def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
417453
xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
418454
yindex = BlockIndex(TEST_LENGTH, yloc, ylen)

pandas/src/sparse.pyx

+49-26
Original file line numberDiff line numberDiff line change
@@ -98,10 +98,9 @@ cdef class IntIndex(SparseIndex):
9898

9999
cpdef IntIndex intersect(self, SparseIndex y_):
100100
cdef:
101-
Py_ssize_t out_length, xi, yi = 0
101+
Py_ssize_t out_length, xi, yi = 0, result_indexer = 0
102102
int32_t xind
103-
ndarray[int32_t, ndim=1] xindices, yindices
104-
list new_list = []
103+
ndarray[int32_t, ndim=1] xindices, yindices, new_indices
105104
IntIndex y
106105

107106
# if is one already, returns self
@@ -112,6 +111,7 @@ cdef class IntIndex(SparseIndex):
112111

113112
xindices = self.indices
114113
yindices = y.indices
114+
new_indices = np.empty(min(len(xindices), len(yindices)), dtype=np.int32)
115115

116116
for xi from 0 <= xi < self.npoints:
117117
xind = xindices[xi]
@@ -124,9 +124,11 @@ cdef class IntIndex(SparseIndex):
124124

125125
# TODO: would a two-pass algorithm be faster?
126126
if yindices[yi] == xind:
127-
new_list.append(xind)
127+
new_indices[result_indexer] = xind
128+
result_indexer += 1
128129

129-
return IntIndex(self.length, new_list)
130+
new_indices = new_indices[:result_indexer]
131+
return IntIndex(self.length, new_indices)
130132

131133
cpdef IntIndex make_union(self, SparseIndex y_):
132134

@@ -238,34 +240,42 @@ cdef class IntIndex(SparseIndex):
238240

239241
cpdef get_blocks(ndarray[int32_t, ndim=1] indices):
240242
cdef:
241-
Py_ssize_t i, npoints
243+
Py_ssize_t init_len, i, npoints, result_indexer = 0
242244
int32_t block, length = 1, cur, prev
243-
list locs = [], lens = []
245+
ndarray[int32_t, ndim=1] locs, lens
244246

245247
npoints = len(indices)
246248

247249
# just handle the special empty case separately
248250
if npoints == 0:
249-
return [], []
251+
return np.array([], dtype=np.int32), np.array([], dtype=np.int32)
252+
253+
# block size can't be longer than npoints
254+
locs = np.empty(npoints, dtype=np.int32)
255+
lens = np.empty(npoints, dtype=np.int32)
250256

251257
# TODO: two-pass algorithm faster?
252258
prev = block = indices[0]
253259
for i from 1 <= i < npoints:
254260
cur = indices[i]
255261
if cur - prev > 1:
256262
# new block
257-
locs.append(block)
258-
lens.append(length)
263+
locs[result_indexer] = block
264+
lens[result_indexer] = length
259265
block = cur
260266
length = 1
267+
result_indexer += 1
261268
else:
262269
# same block, increment length
263270
length += 1
264271

265272
prev = cur
266273

267-
locs.append(block)
268-
lens.append(length)
274+
locs[result_indexer] = block
275+
lens[result_indexer] = length
276+
result_indexer += 1
277+
locs = locs[:result_indexer]
278+
lens = lens[:result_indexer]
269279
return locs, lens
270280

271281
#-------------------------------------------------------------------------------
@@ -398,12 +408,8 @@ cdef class BlockIndex(SparseIndex):
398408
"""
399409
cdef:
400410
BlockIndex y
401-
ndarray[int32_t, ndim=1] xloc, xlen, yloc, ylen
402-
403-
list out_blocs = []
404-
list out_blengths = []
405-
406-
Py_ssize_t xi = 0, yi = 0
411+
ndarray[int32_t, ndim=1] xloc, xlen, yloc, ylen, out_bloc, out_blen
412+
Py_ssize_t xi = 0, yi = 0, max_len, result_indexer = 0
407413
int32_t cur_loc, cur_length, diff
408414

409415
y = other.to_block_index()
@@ -416,6 +422,11 @@ cdef class BlockIndex(SparseIndex):
416422
yloc = y.blocs
417423
ylen = y.blengths
418424

425+
# block may be split, but can't exceed original len / 2 + 1
426+
max_len = int(min(self.length, y.length) / 2) + 1
427+
out_bloc = np.empty(max_len, dtype=np.int32)
428+
out_blen = np.empty(max_len, dtype=np.int32)
429+
419430
while True:
420431
# we are done (or possibly never began)
421432
if xi >= self.nblocks or yi >= y.nblocks:
@@ -458,10 +469,14 @@ cdef class BlockIndex(SparseIndex):
458469
cur_length = ylen[yi]
459470
yi += 1
460471

461-
out_blocs.append(cur_loc)
462-
out_blengths.append(cur_length)
472+
out_bloc[result_indexer] = cur_loc
473+
out_blen[result_indexer] = cur_length
474+
result_indexer += 1
463475

464-
return BlockIndex(self.length, out_blocs, out_blengths)
476+
out_bloc = out_bloc[:result_indexer]
477+
out_blen = out_blen[:result_indexer]
478+
479+
return BlockIndex(self.length, out_bloc, out_blen)
465480

466481
cpdef BlockIndex make_union(self, SparseIndex y):
467482
"""
@@ -626,15 +641,19 @@ cdef class BlockUnion(BlockMerge):
626641

627642
cdef _make_merged_blocks(self):
628643
cdef:
629-
ndarray[int32_t, ndim=1] xstart, xend, ystart, yend
644+
ndarray[int32_t, ndim=1] xstart, xend, ystart, yend, out_bloc, out_blen
630645
int32_t nstart, nend, diff
631-
list out_blocs = [], out_blengths = []
646+
Py_ssize_t max_len, result_indexer = 0
632647

633648
xstart = self.xstart
634649
xend = self.xend
635650
ystart = self.ystart
636651
yend = self.yend
637652

653+
max_len = int(min(self.x.length, self.y.length) / 2) + 1
654+
out_bloc = np.empty(max_len, dtype=np.int32)
655+
out_blen = np.empty(max_len, dtype=np.int32)
656+
638657
while True:
639658
# we are done (or possibly never began)
640659
if self.xi >= self.x.nblocks and self.yi >= self.y.nblocks:
@@ -658,10 +677,14 @@ cdef class BlockUnion(BlockMerge):
658677
nstart = ystart[self.yi]
659678
nend = self._find_next_block_end(1)
660679

661-
out_blocs.append(nstart)
662-
out_blengths.append(nend - nstart)
680+
out_bloc[result_indexer] = nstart
681+
out_blen[result_indexer] = nend - nstart
682+
result_indexer += 1
683+
684+
out_bloc = out_bloc[:result_indexer]
685+
out_blen = out_blen[:result_indexer]
663686

664-
return BlockIndex(self.x.length, out_blocs, out_blengths)
687+
return BlockIndex(self.x.length, out_bloc, out_blen)
665688

666689
cdef int32_t _find_next_block_end(self, bint mode) except -1:
667690
"""

0 commit comments

Comments
 (0)