Skip to content

Commit 33ca281

Browse files
committed
TST: unit testing and buglets in SparseArray, #436
1 parent 17151ca commit 33ca281

File tree

9 files changed

+130
-45
lines changed

9 files changed

+130
-45
lines changed

pandas/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,14 @@
2020
from pandas.info import __doc__
2121

2222
from pandas.core.api import *
23+
from pandas.sparse.api import *
24+
from pandas.stats.api import *
25+
2326
from pandas.core.common import set_printoptions
2427
from pandas.core.common import set_eng_float_format
2528
from pandas.io.parsers import read_csv, read_table, read_clipboard, ExcelFile
2629
from pandas.io.pytables import HDFStore
27-
from pandas.stats.api import *
2830
from pandas.util.testing import debug
2931

3032
from pandas.tools.pivot import pivot_table
3133

32-
from pandas.sparse.api import (SparseArray, SparseSeries, SparseDataFrame,
33-
SparsePanel)

pandas/sparse/api.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
# pylint: disable=W0611
2+
13
from pandas.sparse.array import SparseArray
24
from pandas.sparse.list import SparseList
3-
from pandas.sparse.series import SparseSeries
5+
from pandas.sparse.series import SparseSeries, SparseTimeSeries
46
from pandas.sparse.frame import SparseDataFrame
57
from pandas.sparse.panel import SparsePanel

pandas/sparse/array.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -200,19 +200,21 @@ def __repr__(self):
200200
__rfloordiv__ = _sparse_op_wrap(lambda x, y: y // x, 'floordiv')
201201
__rpow__ = _sparse_op_wrap(lambda x, y: y ** x, '__rpow__')
202202

203+
def disable(self, other):
204+
raise NotImplementedError('inplace binary ops not supported')
203205
# Inplace operators
204-
__iadd__ = __add__
205-
__isub__ = __sub__
206-
__imul__ = __mul__
207-
__itruediv__ = __truediv__
208-
__ifloordiv__ = __floordiv__
209-
__ipow__ = __pow__
206+
__iadd__ = disable
207+
__isub__ = disable
208+
__imul__ = disable
209+
__itruediv__ = disable
210+
__ifloordiv__ = disable
211+
__ipow__ = disable
210212

211213
# Python 2 division operators
212214
if not py3compat.PY3:
213215
__div__ = _sparse_op_wrap(operator.div, 'div')
214216
__rdiv__ = _sparse_op_wrap(lambda x, y: y / x, '__rdiv__')
215-
__idiv__ = __div__
217+
__idiv__ = disable
216218

217219
@property
218220
def values(self):
@@ -240,6 +242,14 @@ def __getitem__(self, key):
240242
data_slice = self.values[key]
241243
return self._constructor(data_slice)
242244

245+
def __getslice__(self, i, j):
246+
if i < 0:
247+
i -= len(self)
248+
if j < 0:
249+
j -= len(self)
250+
slobj = slice(i, j)
251+
return self.__getitem__(slobj)
252+
243253
def _get_val_at(self, loc):
244254
n = len(self)
245255
if loc < 0:

pandas/sparse/frame.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -218,13 +218,13 @@ def _set_item(self, key, value):
218218
kind=self.default_kind)
219219
if hasattr(value, '__iter__'):
220220
if isinstance(value, Series):
221-
cleanSeries = value.reindex(self.index)
221+
clean_series = value.reindex(self.index)
222222
if not isinstance(value, SparseSeries):
223-
cleanSeries = sp_maker(cleanSeries)
223+
clean_series = sp_maker(clean_series)
224224
else:
225-
cleanSeries = sp_maker(value)
225+
clean_series = sp_maker(value)
226226

227-
self._series[key] = cleanSeries
227+
self._series[key] = clean_series
228228
# Scalar
229229
else:
230230
self._series[key] = sp_maker(value)

pandas/sparse/list.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,12 @@ class SparseList(object):
77
"""
88
Data structure for accumulating data to be converted into a
99
SparseArray. Has similar API to the standard Python list
10-
"""
1110
11+
Parameters
12+
----------
13+
data : scalar or array-like
14+
fill_value : scalar, default NaN
15+
"""
1216
def __init__(self, data=None, fill_value=np.nan):
1317
self.fill_value = fill_value
1418
self._chunks = []
@@ -104,6 +108,13 @@ def to_array(self):
104108
return self._chunks[0]
105109

106110
def append(self, value):
111+
"""
112+
Append element or array-like chunk of data to the SparseList
113+
114+
Parameters
115+
----------
116+
value: scalar or array-like
117+
"""
107118
if np.isscalar(value):
108119
value = [value]
109120

pandas/sparse/series.py

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -75,17 +75,17 @@ class SparseSeries(SparseArray, Series):
7575
def __new__(cls, data, index=None, sparse_index=None, kind='block',
7676
fill_value=None, name=None, copy=False):
7777

78-
is_sparse_series = isinstance(data, SparseSeries)
78+
is_sparse_array = isinstance(data, SparseArray)
7979
if fill_value is None:
80-
if is_sparse_series:
80+
if is_sparse_array:
8181
fill_value = data.fill_value
8282
else:
8383
fill_value = nan
8484

85-
if is_sparse_series:
86-
if index is None:
85+
if is_sparse_array:
86+
if isinstance(data, SparseSeries) and index is None:
8787
index = data.index
88-
else:
88+
elif index is not None:
8989
assert(len(index) == len(data))
9090

9191
sparse_index = data.sp_index
@@ -236,19 +236,10 @@ def __repr__(self):
236236
__rfloordiv__ = _sparse_op_wrap(lambda x, y: y // x, 'floordiv')
237237
__rpow__ = _sparse_op_wrap(lambda x, y: y ** x, '__rpow__')
238238

239-
# Inplace operators
240-
__iadd__ = __add__
241-
__isub__ = __sub__
242-
__imul__ = __mul__
243-
__itruediv__ = __truediv__
244-
__ifloordiv__ = __floordiv__
245-
__ipow__ = __pow__
246-
247239
# Python 2 division operators
248240
if not py3compat.PY3:
249241
__div__ = _sparse_op_wrap(operator.div, 'div')
250242
__rdiv__ = _sparse_op_wrap(lambda x, y: y / x, '__rdiv__')
251-
__idiv__ = __div__
252243

253244
def __getitem__(self, key):
254245
"""

pandas/sparse/tests/test_array.py

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,66 @@
1+
from numpy import nan, ndarray
12
import numpy as np
23

4+
import operator
35
import unittest
46

7+
from pandas.sparse.api import SparseArray
8+
from pandas.util.testing import assert_almost_equal
9+
10+
def assert_sp_array_equal(left, right):
11+
assert_almost_equal(left.sp_values, right.sp_values)
12+
assert(left.sp_index.equals(right.sp_index))
13+
if np.isnan(left.fill_value):
14+
assert(np.isnan(right.fill_value))
15+
else:
16+
assert(left.fill_value == right.fill_value)
17+
18+
519
class TestSparseArray(unittest.TestCase):
620

721
def setUp(self):
8-
pass
22+
self.arr_data = np.array([nan, nan, 1, 2, 3, nan, 4, 5, nan, 6])
23+
self.arr = SparseArray(self.arr_data)
24+
self.zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0)
25+
26+
def test_values_asarray(self):
27+
assert_almost_equal(self.arr.values, self.arr_data)
28+
assert_almost_equal(self.arr.sp_values, np.asarray(self.arr))
29+
30+
def test_getslice(self):
31+
result = self.arr[:-3]
32+
exp = SparseArray(self.arr.values[:-3])
33+
assert_sp_array_equal(result, exp)
34+
35+
def test_binary_operators(self):
36+
data1 = np.random.randn(20)
37+
data2 = np.random.randn(20)
38+
data1[::2] = np.nan
39+
data2[::3] = np.nan
40+
41+
arr1 = SparseArray(data1)
42+
arr2 = SparseArray(data2)
43+
44+
def _check_op(op):
45+
res = op(arr1, arr2)
46+
exp = SparseArray(op(arr1.values, arr2.values))
47+
self.assert_(isinstance(res, SparseArray))
48+
assert_almost_equal(res.values, exp.values)
49+
50+
def _check_inplace_op(op):
51+
tmp = arr1.copy()
52+
self.assertRaises(NotImplementedError, op, tmp, arr2)
53+
54+
bin_ops = [operator.add, operator.sub, operator.mul, operator.truediv,
55+
operator.floordiv, operator.pow]
56+
for op in bin_ops:
57+
_check_op(op)
58+
59+
inplace_ops = ['iadd', 'isub', 'imul', 'itruediv', 'ifloordiv', 'ipow']
60+
for op in inplace_ops:
61+
_check_inplace_op(getattr(operator, op))
62+
63+
if __name__ == '__main__':
64+
import nose
65+
nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],
66+
exit=False)

pandas/sparse/tests/test_list.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ def test_getitem(self):
7676
for i in range(len(arr)):
7777
assert_almost_equal(splist[i], arr[i])
7878

79+
7980
if __name__ == '__main__':
8081
import nose
8182
nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],

pandas/sparse/tests/test_sparse.py

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,16 @@
2222
import pandas.sparse.frame as spf
2323

2424
from pandas._sparse import BlockIndex, IntIndex
25-
from pandas.core.sparse import (SparseSeries, SparseTimeSeries,
26-
SparseDataFrame, SparsePanel)
25+
from pandas.sparse.api import (SparseSeries, SparseTimeSeries,
26+
SparseDataFrame, SparsePanel,
27+
SparseArray)
2728

2829
import pandas.tests.test_frame as test_frame
2930
import pandas.tests.test_panel as test_panel
3031
import pandas.tests.test_series as test_series
3132

33+
from test_array import assert_sp_array_equal
34+
3235
def _test_data1():
3336
# nan-based
3437
arr = np.arange(20, dtype=float)
@@ -63,14 +66,6 @@ def assert_sp_series_equal(a, b):
6366
assert(a.index.equals(b.index))
6467
assert_sp_array_equal(a, b)
6568

66-
def assert_sp_array_equal(left, right):
67-
assert_almost_equal(left.sp_values, right.sp_values)
68-
assert(left.sp_index.equals(right.sp_index))
69-
if np.isnan(left.fill_value):
70-
assert(np.isnan(right.fill_value))
71-
else:
72-
assert(left.fill_value == right.fill_value)
73-
7469

7570
def assert_sp_frame_equal(left, right, exact_indices=True):
7671
"""
@@ -332,7 +327,7 @@ def test_set_value(self):
332327
self.assert_(res.index[-1] == 'foobar')
333328
self.assertEqual(res['foobar'], 0)
334329

335-
def test_getitem_fancy_index(self):
330+
def test_getitem_slice(self):
336331
idx = self.bseries.index
337332
res = self.bseries[::2]
338333
self.assert_(isinstance(res, SparseSeries))
@@ -345,6 +340,10 @@ def test_getitem_fancy_index(self):
345340
res = self.bseries[5:]
346341
assert_sp_series_equal(res, self.bseries.reindex(idx[5:]))
347342

343+
# negative indices
344+
res = self.bseries[:-3]
345+
assert_sp_series_equal(res, self.bseries.reindex(idx[:-3]))
346+
348347
def test_take(self):
349348
def _compare_with_dense(sp):
350349
dense = sp.to_dense()
@@ -368,9 +367,6 @@ def _compare(idx):
368367
sp = SparseSeries(np.ones(10.) * nan)
369368
assert_almost_equal(sp.take([0, 1, 2, 3, 4]), np.repeat(nan, 5))
370369

371-
def test_getslice(self):
372-
pass
373-
374370
def test_setitem(self):
375371
self.assertRaises(Exception, self.bseries.__setitem__, 5, 7.)
376372
self.assertRaises(Exception, self.iseries.__setitem__, 5, 7.)
@@ -432,6 +428,15 @@ def test_operators_corner2(self):
432428
result = val - self.zbseries
433429
assert_sp_series_equal(result, 3 - self.zbseries)
434430

431+
432+
def test_binary_operators(self):
433+
def _check_inplace_op(op):
434+
tmp = self.bseries.copy()
435+
self.assertRaises(NotImplementedError, op, tmp, self.bseries)
436+
inplace_ops = ['iadd', 'isub', 'imul', 'itruediv', 'ifloordiv', 'ipow']
437+
for op in inplace_ops:
438+
_check_inplace_op(getattr(operator, op))
439+
435440
def test_reindex(self):
436441
def _compare_with_series(sps, new_index):
437442
spsre = sps.reindex(new_index)
@@ -936,6 +941,13 @@ def test_setitem_corner(self):
936941
self.frame['a'] = self.frame['B']
937942
assert_sp_series_equal(self.frame['a'], self.frame['B'])
938943

944+
def test_setitem_array(self):
945+
arr = self.frame['B'].view(SparseArray)
946+
947+
self.frame['E'] = arr
948+
assert_sp_series_equal(self.frame['E'], self.frame['B'])
949+
self.assertRaises(Exception, self.frame.__setitem__, 'F', arr[:-1])
950+
939951
def test_delitem(self):
940952
A = self.frame['A']
941953
C = self.frame['C']

0 commit comments

Comments
 (0)