TST: finish unit test coverage for sparse arrays, release notes GH #436

wesm · wesm · commit f80bfc169072 · 2011-12-11T18:14:16.000-05:00
diff --git a/RELEASE.rst b/RELEASE.rst
@@ -39,6 +39,8 @@ pandas 0.6.1
     #438). Add similar methods to sparse data structures for compatibility
   - Add Qt table widget to sandbox (PR #435)
   - DataFrame.align can accept Series arguments, add axis keyword (GH #461)
+  - Implement new SparseList and SparseArray data structures. SparseSeries now
+    derives from SparseArray (GH #463)
 
 **Improvements to existing features**
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -372,9 +372,9 @@ def _multilevel_index(self, key):
 
     def __getslice__(self, i, j):
         if i < 0:
-            i -= len(self)
+            i = 0
         if j < 0:
-            j -= len(self)
+            j = 0
         slobj = slice(i, j)
         return self.__getitem__(slobj)
 
diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py
@@ -244,9 +244,9 @@ def __getitem__(self, key):
 
     def __getslice__(self, i, j):
         if i < 0:
-            i -= len(self)
+            i = 0
         if j < 0:
-            j -= len(self)
+            j = 0
         slobj = slice(i, j)
         return self.__getitem__(slobj)
 
@@ -304,8 +304,9 @@ def astype(self, dtype=None):
         """
 
         """
+        dtype = np.dtype(dtype)
         if dtype is not None and dtype not in (np.float_, float):
-            raise Exception('Can only support floating point data')
+            raise Exception('Can only support floating point data for now')
         return self.copy()
 
     def copy(self, deep=True):
@@ -373,10 +374,6 @@ def cumsum(self, axis=0, dtype=None, out=None):
         -------
         cumsum : Series
         """
-
-
-
-
         if com.notnull(self.fill_value):
             return self.to_dense().cumsum()
         # TODO: what if sp_values contains NaN??
@@ -402,14 +399,6 @@ def mean(self, axis=None, dtype=None, out=None):
             nsparse = self.sp_index.npoints
             return (sp_sum + self.fill_value * nsparse) / (ct + nsparse)
 
-    def valid(self):
-        """
-        Analogous to Series.valid
-        """
-        # TODO: make more efficient
-        dense_valid = self.to_dense().valid()
-        return dense_valid.to_sparse(fill_value=self.fill_value)
-
 
 
 def make_sparse(arr, kind='block', fill_value=nan):
diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py
@@ -295,9 +295,6 @@ def __getitem__(self, item):
             else: # pragma: no cover
                 raise
 
-    def _get_item_cache(self, key):
-        return self[key]
-
     def get_value(self, index, col):
         s = self._series[col]
         return s.get_value(index)
diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py
@@ -410,12 +410,6 @@ def sparse_reindex(self, new_index):
                             sparse_index=new_index,
                             fill_value=self.fill_value)
 
-    @property
-    def _valid_sp_values(self):
-        sp_vals = self.sp_values
-        mask = np.isfinite(sp_vals)
-        return sp_vals[mask]
-
     def cumsum(self, axis=0, dtype=None, out=None):
         """
         Cumulative sum of values. Preserves locations of NaN values
@@ -424,13 +418,18 @@ def cumsum(self, axis=0, dtype=None, out=None):
 
         Returns
         -------
-        cumsum : Series
+        cumsum : Series or SparseSeries
         """
-        if not np.isnan(self.fill_value):
-            return self.to_dense().cumsum()
-        return SparseSeries(self.sp_values.cumsum(), index=self.index,
-                            sparse_index=self.sp_index, name=self.name,
-                            fill_value=self.fill_value)
+        result = SparseArray.cumsum(self)
+        if isinstance(result, SparseArray):
+            result = self._attach_meta(result)
+        return result
+
+    def _attach_meta(self, sparse_arr):
+        sparse_series = sparse_arr.view(SparseSeries)
+        sparse_series.index = self.index
+        sparse_series.name = self.name
+        return sparse_series
 
     def valid(self):
         """
diff --git a/pandas/sparse/tests/test_array.py b/pandas/sparse/tests/test_array.py
@@ -2,6 +2,7 @@
 import numpy as np
 
 import operator
+import pickle
 import unittest
 
 from pandas.sparse.api import SparseArray
@@ -23,18 +24,50 @@ def setUp(self):
         self.arr = SparseArray(self.arr_data)
         self.zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0)
 
-    def test_constructor(self):
-        pass
+    def test_constructor_from_sparse(self):
+        res = SparseArray(self.zarr)
+        self.assertEquals(res.fill_value, 0)
+        assert_almost_equal(res.sp_values, self.zarr.sp_values)
+
+    def test_constructor_copy(self):
+        cp = SparseArray(self.arr, copy=True)
+        cp.sp_values[:3] = 0
+        self.assert_(not (self.arr.sp_values[:3] == 0).any())
+
+        not_copy = SparseArray(self.arr)
+        not_copy.sp_values[:3] = 0
+        self.assert_((self.arr.sp_values[:3] == 0).all())
+
+    def test_astype(self):
+        res = self.arr.astype('f8')
+        res.sp_values[:3] = 27
+        self.assert_(not (self.arr.sp_values[:3] == 27).any())
+
+        self.assertRaises(Exception, self.arr.astype, 'i8')
 
     def test_values_asarray(self):
         assert_almost_equal(self.arr.values, self.arr_data)
+        assert_almost_equal(self.arr.to_dense(), self.arr_data)
         assert_almost_equal(self.arr.sp_values, np.asarray(self.arr))
 
     def test_getslice(self):
         result = self.arr[:-3]
         exp = SparseArray(self.arr.values[:-3])
         assert_sp_array_equal(result, exp)
 
+        result = self.arr[-4:]
+        exp = SparseArray(self.arr.values[-4:])
+        assert_sp_array_equal(result, exp)
+
+        # two corner cases from Series
+        result = self.arr[-12:]
+        exp = SparseArray(self.arr)
+        assert_sp_array_equal(result, exp)
+
+        result = self.arr[:-12]
+        exp = SparseArray(self.arr.values[:0])
+        assert_sp_array_equal(result, exp)
+
     def test_binary_operators(self):
         data1 = np.random.randn(20)
         data2 = np.random.randn(20)
@@ -85,6 +118,15 @@ def _check_inplace_op(op):
         for op in inplace_ops:
             _check_inplace_op(getattr(operator, op))
 
+    def test_pickle(self):
+        def _check_roundtrip(obj):
+            pickled = pickle.dumps(obj)
+            unpickled = pickle.loads(pickled)
+            assert_sp_array_equal(unpickled, obj)
+
+        _check_roundtrip(self.arr)
+        _check_roundtrip(self.zarr)
+
 if __name__ == '__main__':
     import nose
     nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],
diff --git a/pandas/sparse/tests/test_list.py b/pandas/sparse/tests/test_list.py
@@ -9,12 +9,31 @@
 from test_sparse import assert_sp_array_equal
 
 
+def assert_sp_list_equal(left, right):
+    assert_sp_array_equal(left.to_array(), right.to_array())
+
 class TestSparseList(unittest.TestCase):
 
     def setUp(self):
         self.na_data = np.array([nan, nan, 1, 2, 3, nan, 4, 5, nan, 6])
         self.zero_data = np.array([0, 0, 1, 2, 3, 0, 4, 5, 0, 6])
 
+    def test_constructor(self):
+        lst1 = SparseList(self.na_data[:5])
+        exp = SparseList()
+        exp.append(self.na_data[:5])
+        assert_sp_list_equal(lst1, exp)
+
+    def test_len(self):
+        arr = self.na_data
+        splist = SparseList()
+        splist.append(arr[:5])
+        self.assertEquals(len(splist), 5)
+        splist.append(arr[5])
+        self.assertEquals(len(splist), 6)
+        splist.append(arr[6:])
+        self.assertEquals(len(splist), 10)
+
     def test_append_na(self):
         arr = self.na_data
         splist = SparseList()
@@ -75,6 +94,7 @@ def test_getitem(self):
 
         for i in range(len(arr)):
             assert_almost_equal(splist[i], arr[i])
+            assert_almost_equal(splist[-i], arr[-i])
 
 
 if __name__ == '__main__':