Skip to content

Commit f80bfc1

Browse files
committed
TST: finish unit test coverage for sparse arrays, release notes GH #436
1 parent 3618568 commit f80bfc1

File tree

7 files changed

+83
-34
lines changed

7 files changed

+83
-34
lines changed

RELEASE.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ pandas 0.6.1
3939
#438). Add similar methods to sparse data structures for compatibility
4040
- Add Qt table widget to sandbox (PR #435)
4141
- DataFrame.align can accept Series arguments, add axis keyword (GH #461)
42+
- Implement new SparseList and SparseArray data structures. SparseSeries now
43+
derives from SparseArray (GH #463)
4244

4345
**Improvements to existing features**
4446

pandas/core/series.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -372,9 +372,9 @@ def _multilevel_index(self, key):
372372

373373
def __getslice__(self, i, j):
374374
if i < 0:
375-
i -= len(self)
375+
i = 0
376376
if j < 0:
377-
j -= len(self)
377+
j = 0
378378
slobj = slice(i, j)
379379
return self.__getitem__(slobj)
380380

pandas/sparse/array.py

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -244,9 +244,9 @@ def __getitem__(self, key):
244244

245245
def __getslice__(self, i, j):
246246
if i < 0:
247-
i -= len(self)
247+
i = 0
248248
if j < 0:
249-
j -= len(self)
249+
j = 0
250250
slobj = slice(i, j)
251251
return self.__getitem__(slobj)
252252

@@ -304,8 +304,9 @@ def astype(self, dtype=None):
304304
"""
305305
306306
"""
307+
dtype = np.dtype(dtype)
307308
if dtype is not None and dtype not in (np.float_, float):
308-
raise Exception('Can only support floating point data')
309+
raise Exception('Can only support floating point data for now')
309310
return self.copy()
310311

311312
def copy(self, deep=True):
@@ -373,10 +374,6 @@ def cumsum(self, axis=0, dtype=None, out=None):
373374
-------
374375
cumsum : Series
375376
"""
376-
377-
378-
379-
380377
if com.notnull(self.fill_value):
381378
return self.to_dense().cumsum()
382379
# TODO: what if sp_values contains NaN??
@@ -402,14 +399,6 @@ def mean(self, axis=None, dtype=None, out=None):
402399
nsparse = self.sp_index.npoints
403400
return (sp_sum + self.fill_value * nsparse) / (ct + nsparse)
404401

405-
def valid(self):
406-
"""
407-
Analogous to Series.valid
408-
"""
409-
# TODO: make more efficient
410-
dense_valid = self.to_dense().valid()
411-
return dense_valid.to_sparse(fill_value=self.fill_value)
412-
413402

414403

415404
def make_sparse(arr, kind='block', fill_value=nan):

pandas/sparse/frame.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -295,9 +295,6 @@ def __getitem__(self, item):
295295
else: # pragma: no cover
296296
raise
297297

298-
def _get_item_cache(self, key):
299-
return self[key]
300-
301298
def get_value(self, index, col):
302299
s = self._series[col]
303300
return s.get_value(index)

pandas/sparse/series.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -410,12 +410,6 @@ def sparse_reindex(self, new_index):
410410
sparse_index=new_index,
411411
fill_value=self.fill_value)
412412

413-
@property
414-
def _valid_sp_values(self):
415-
sp_vals = self.sp_values
416-
mask = np.isfinite(sp_vals)
417-
return sp_vals[mask]
418-
419413
def cumsum(self, axis=0, dtype=None, out=None):
420414
"""
421415
Cumulative sum of values. Preserves locations of NaN values
@@ -424,13 +418,18 @@ def cumsum(self, axis=0, dtype=None, out=None):
424418
425419
Returns
426420
-------
427-
cumsum : Series
421+
cumsum : Series or SparseSeries
428422
"""
429-
if not np.isnan(self.fill_value):
430-
return self.to_dense().cumsum()
431-
return SparseSeries(self.sp_values.cumsum(), index=self.index,
432-
sparse_index=self.sp_index, name=self.name,
433-
fill_value=self.fill_value)
423+
result = SparseArray.cumsum(self)
424+
if isinstance(result, SparseArray):
425+
result = self._attach_meta(result)
426+
return result
427+
428+
def _attach_meta(self, sparse_arr):
429+
sparse_series = sparse_arr.view(SparseSeries)
430+
sparse_series.index = self.index
431+
sparse_series.name = self.name
432+
return sparse_series
434433

435434
def valid(self):
436435
"""

pandas/sparse/tests/test_array.py

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import numpy as np
33

44
import operator
5+
import pickle
56
import unittest
67

78
from pandas.sparse.api import SparseArray
@@ -23,18 +24,50 @@ def setUp(self):
2324
self.arr = SparseArray(self.arr_data)
2425
self.zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0)
2526

26-
def test_constructor(self):
27-
pass
27+
def test_constructor_from_sparse(self):
28+
res = SparseArray(self.zarr)
29+
self.assertEquals(res.fill_value, 0)
30+
assert_almost_equal(res.sp_values, self.zarr.sp_values)
31+
32+
def test_constructor_copy(self):
33+
cp = SparseArray(self.arr, copy=True)
34+
cp.sp_values[:3] = 0
35+
self.assert_(not (self.arr.sp_values[:3] == 0).any())
36+
37+
not_copy = SparseArray(self.arr)
38+
not_copy.sp_values[:3] = 0
39+
self.assert_((self.arr.sp_values[:3] == 0).all())
40+
41+
def test_astype(self):
42+
res = self.arr.astype('f8')
43+
res.sp_values[:3] = 27
44+
self.assert_(not (self.arr.sp_values[:3] == 27).any())
45+
46+
self.assertRaises(Exception, self.arr.astype, 'i8')
2847

2948
def test_values_asarray(self):
3049
assert_almost_equal(self.arr.values, self.arr_data)
50+
assert_almost_equal(self.arr.to_dense(), self.arr_data)
3151
assert_almost_equal(self.arr.sp_values, np.asarray(self.arr))
3252

3353
def test_getslice(self):
3454
result = self.arr[:-3]
3555
exp = SparseArray(self.arr.values[:-3])
3656
assert_sp_array_equal(result, exp)
3757

58+
result = self.arr[-4:]
59+
exp = SparseArray(self.arr.values[-4:])
60+
assert_sp_array_equal(result, exp)
61+
62+
# two corner cases from Series
63+
result = self.arr[-12:]
64+
exp = SparseArray(self.arr)
65+
assert_sp_array_equal(result, exp)
66+
67+
result = self.arr[:-12]
68+
exp = SparseArray(self.arr.values[:0])
69+
assert_sp_array_equal(result, exp)
70+
3871
def test_binary_operators(self):
3972
data1 = np.random.randn(20)
4073
data2 = np.random.randn(20)
@@ -85,6 +118,15 @@ def _check_inplace_op(op):
85118
for op in inplace_ops:
86119
_check_inplace_op(getattr(operator, op))
87120

121+
def test_pickle(self):
122+
def _check_roundtrip(obj):
123+
pickled = pickle.dumps(obj)
124+
unpickled = pickle.loads(pickled)
125+
assert_sp_array_equal(unpickled, obj)
126+
127+
_check_roundtrip(self.arr)
128+
_check_roundtrip(self.zarr)
129+
88130
if __name__ == '__main__':
89131
import nose
90132
nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],

pandas/sparse/tests/test_list.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,31 @@
99
from test_sparse import assert_sp_array_equal
1010

1111

12+
def assert_sp_list_equal(left, right):
13+
assert_sp_array_equal(left.to_array(), right.to_array())
14+
1215
class TestSparseList(unittest.TestCase):
1316

1417
def setUp(self):
1518
self.na_data = np.array([nan, nan, 1, 2, 3, nan, 4, 5, nan, 6])
1619
self.zero_data = np.array([0, 0, 1, 2, 3, 0, 4, 5, 0, 6])
1720

21+
def test_constructor(self):
22+
lst1 = SparseList(self.na_data[:5])
23+
exp = SparseList()
24+
exp.append(self.na_data[:5])
25+
assert_sp_list_equal(lst1, exp)
26+
27+
def test_len(self):
28+
arr = self.na_data
29+
splist = SparseList()
30+
splist.append(arr[:5])
31+
self.assertEquals(len(splist), 5)
32+
splist.append(arr[5])
33+
self.assertEquals(len(splist), 6)
34+
splist.append(arr[6:])
35+
self.assertEquals(len(splist), 10)
36+
1837
def test_append_na(self):
1938
arr = self.na_data
2039
splist = SparseList()
@@ -75,6 +94,7 @@ def test_getitem(self):
7594

7695
for i in range(len(arr)):
7796
assert_almost_equal(splist[i], arr[i])
97+
assert_almost_equal(splist[-i], arr[-i])
7898

7999

80100
if __name__ == '__main__':

0 commit comments

Comments
 (0)