BUG: SparseSeries.shift may raise NameError or TypeError

sinhrks · jreback · commit 5d8a93517e88 · 2016-04-19T21:06:35.000-04:00
Author: sinhrks <sinhrks@gmail.com> Closes #12908 from sinhrks/sparse_shift and squashes the following commits: 5a0adfa [sinhrks] BUG: SparseSeries.shift may raise NameError or TypeError
diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt
@@ -109,7 +109,7 @@ These changes conform sparse handling to return the correct types and work to ma
    s.take(0)
    s.take([1, 2, 3])
 
-- Bug in ``SparseSeries.__getitem__`` with ``Ellipsis`` raises ``KeyError`` (:issue:`9467`)
+- Bug in ``SparseSeries[]`` indexing with ``Ellipsis`` raises ``KeyError`` (:issue:`9467`)
 - Bug in ``SparseSeries.loc[]`` with list-like input raises ``TypeError`` (:issue:`10560`)
 - Bug in ``SparseSeries.iloc[]`` with scalar input may raise ``IndexError`` (:issue:`10560`)
 - Bug in ``SparseSeries.loc[]``, ``.iloc[]`` with ``slice`` returns ``SparseArray``, rather than ``SparseSeries`` (:issue:`10560`)
@@ -119,11 +119,13 @@ These changes conform sparse handling to return the correct types and work to ma
 - Bug in ``SparseArray`` pow calculates ``1 ** np.nan`` as ``np.nan`` which must be 1 (:issue:`12910`)
 - Bug in ``SparseSeries.__repr__`` raises ``TypeError`` when it is longer than ``max_rows`` (:issue:`10560`)
 - Bug in ``SparseSeries.shape`` ignores ``fill_value`` (:issue:`10452`)
+- Bug in ``SparseSeries`` and ``SparseArray`` may have different ``dtype`` from its dense values (:issue:`12908`)
 - Bug in ``SparseSeries.reindex`` incorrectly handle ``fill_value`` (:issue:`12797`)
 - Bug in ``SparseArray.to_frame()`` results in ``DataFrame``, rather than ``SparseDataFrame`` (:issue:`9850`)
 - Bug in ``SparseArray.to_dense()`` does not preserve ``dtype`` (:issue:`10648`)
 - Bug in ``SparseArray.to_dense()`` incorrectly handle ``fill_value`` (:issue:`12797`)
 - Bug in ``pd.concat()`` of ``SparseSeries`` results in dense (:issue:`10536`)
+- Bug in ``SparseArray.shift()`` may raise ``NameError`` or ``TypeError`` (:issue:`12908`)
 
 .. _whatsnew_0181.api:
 
diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py
@@ -165,6 +165,12 @@ def __new__(cls, data, sparse_index=None, index=None, kind='integer',
 
     @classmethod
     def _simple_new(cls, data, sp_index, fill_value):
+        if (com.is_integer_dtype(data) and com.is_float(fill_value) and
+           sp_index.ngaps > 0):
+            # if float fill_value is being included in dense repr,
+            # convert values to float
+            data = data.astype(float)
+
         result = data.view(cls)
 
         if not isinstance(sp_index, SparseIndex):
diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py
@@ -20,14 +20,20 @@
 import pandas.core.ops as ops
 import pandas.index as _index
 import pandas.lib as lib
+from pandas.util.decorators import Appender
 
-from pandas.sparse.array import (make_sparse, _sparse_array_op, SparseArray)
+from pandas.sparse.array import (make_sparse, _sparse_array_op, SparseArray,
+                                 _make_index)
 from pandas._sparse import BlockIndex, IntIndex
 import pandas._sparse as splib
 
 from pandas.sparse.scipy_sparse import (_sparse_series_to_coo,
                                         _coo_to_sparse_series)
 
+
+_shared_doc_kwargs = dict(klass='SparseSeries',
+                          axes_single_arg="{0, 'index'}")
+
 # -----------------------------------------------------------------------------
 # Wrapper function for Series arithmetic methods
 
@@ -633,20 +639,17 @@ def dropna(self, axis=0, inplace=False, **kwargs):
             dense_valid = dense_valid[dense_valid != self.fill_value]
             return dense_valid.to_sparse(fill_value=self.fill_value)
 
-    def shift(self, periods, freq=None):
-        """
-        Analogous to Series.shift
-        """
+    @Appender(generic._shared_docs['shift'] % _shared_doc_kwargs)
+    def shift(self, periods, freq=None, axis=0):
+        if periods == 0:
+            return self.copy()
 
         # no special handling of fill values yet
         if not isnull(self.fill_value):
-            # TODO: kwds is not defined...should this work?
-            dense_shifted = self.to_dense().shift(periods, freq=freq, **kwds)  # noqa
-            return dense_shifted.to_sparse(fill_value=self.fill_value,
-                                           kind=self.kind)
-
-        if periods == 0:
-            return self.copy()
+            shifted = self.to_dense().shift(periods, freq=freq,
+                                            axis=axis)
+            return shifted.to_sparse(fill_value=self.fill_value,
+                                     kind=self.kind)
 
         if freq is not None:
             return self._constructor(
@@ -659,14 +662,11 @@ def shift(self, periods, freq=None):
         start, end = new_indices.searchsorted([0, int_index.length])
 
         new_indices = new_indices[start:end]
+        new_sp_index = _make_index(len(self), new_indices, self.sp_index)
 
-        new_sp_index = IntIndex(len(self), new_indices)
-        if isinstance(self.sp_index, BlockIndex):
-            new_sp_index = new_sp_index.to_block_index()
-
-        return self._constructor(self.sp_values[start:end].copy(),
-                                 index=self.index, sparse_index=new_sp_index,
-                                 fill_value=self.fill_value).__finalize__(self)
+        arr = self.values._simple_new(self.sp_values[start:end].copy(),
+                                      new_sp_index, fill_value=np.nan)
+        return self._constructor(arr, index=self.index).__finalize__(self)
 
     def combine_first(self, other):
         """
diff --git a/pandas/sparse/tests/test_array.py b/pandas/sparse/tests/test_array.py
@@ -8,148 +8,11 @@
 
 from pandas import _np_version_under1p8
 from pandas.sparse.api import SparseArray
-import pandas.sparse.array as sparray
+from pandas._sparse import IntIndex
 from pandas.util.testing import assert_almost_equal, assertRaisesRegexp
 import pandas.util.testing as tm
 
 
-class TestSparseArrayIndex(tm.TestCase):
-
-    _multiprocess_can_split_ = True
-
-    def test_int_internal(self):
-        idx = sparray._make_index(4, np.array([2, 3], dtype=np.int32),
-                                  kind='integer')
-        self.assertIsInstance(idx, sparray.IntIndex)
-        self.assertEqual(idx.npoints, 2)
-        tm.assert_numpy_array_equal(idx.indices,
-                                    np.array([2, 3], dtype=np.int32))
-
-        idx = sparray._make_index(4, np.array([], dtype=np.int32),
-                                  kind='integer')
-        self.assertIsInstance(idx, sparray.IntIndex)
-        self.assertEqual(idx.npoints, 0)
-        tm.assert_numpy_array_equal(idx.indices,
-                                    np.array([], dtype=np.int32))
-
-        idx = sparray._make_index(4, np.array([0, 1, 2, 3], dtype=np.int32),
-                                  kind='integer')
-        self.assertIsInstance(idx, sparray.IntIndex)
-        self.assertEqual(idx.npoints, 4)
-        tm.assert_numpy_array_equal(idx.indices,
-                                    np.array([0, 1, 2, 3], dtype=np.int32))
-
-    def test_block_internal(self):
-        idx = sparray._make_index(4, np.array([2, 3], dtype=np.int32),
-                                  kind='block')
-        self.assertIsInstance(idx, sparray.BlockIndex)
-        self.assertEqual(idx.npoints, 2)
-        tm.assert_numpy_array_equal(idx.blocs,
-                                    np.array([2], dtype=np.int32))
-        tm.assert_numpy_array_equal(idx.blengths,
-                                    np.array([2], dtype=np.int32))
-
-        idx = sparray._make_index(4, np.array([], dtype=np.int32),
-                                  kind='block')
-        self.assertIsInstance(idx, sparray.BlockIndex)
-        self.assertEqual(idx.npoints, 0)
-        tm.assert_numpy_array_equal(idx.blocs,
-                                    np.array([], dtype=np.int32))
-        tm.assert_numpy_array_equal(idx.blengths,
-                                    np.array([], dtype=np.int32))
-
-        idx = sparray._make_index(4, np.array([0, 1, 2, 3], dtype=np.int32),
-                                  kind='block')
-        self.assertIsInstance(idx, sparray.BlockIndex)
-        self.assertEqual(idx.npoints, 4)
-        tm.assert_numpy_array_equal(idx.blocs,
-                                    np.array([0], dtype=np.int32))
-        tm.assert_numpy_array_equal(idx.blengths,
-                                    np.array([4], dtype=np.int32))
-
-        idx = sparray._make_index(4, np.array([0, 2, 3], dtype=np.int32),
-                                  kind='block')
-        self.assertIsInstance(idx, sparray.BlockIndex)
-        self.assertEqual(idx.npoints, 3)
-        tm.assert_numpy_array_equal(idx.blocs,
-                                    np.array([0, 2], dtype=np.int32))
-        tm.assert_numpy_array_equal(idx.blengths,
-                                    np.array([1, 2], dtype=np.int32))
-
-    def test_lookup(self):
-        for kind in ['integer', 'block']:
-            idx = sparray._make_index(4, np.array([2, 3], dtype=np.int32),
-                                      kind=kind)
-            self.assertEqual(idx.lookup(-1), -1)
-            self.assertEqual(idx.lookup(0), -1)
-            self.assertEqual(idx.lookup(1), -1)
-            self.assertEqual(idx.lookup(2), 0)
-            self.assertEqual(idx.lookup(3), 1)
-            self.assertEqual(idx.lookup(4), -1)
-
-            idx = sparray._make_index(4, np.array([], dtype=np.int32),
-                                      kind=kind)
-            for i in range(-1, 5):
-                self.assertEqual(idx.lookup(i), -1)
-
-            idx = sparray._make_index(4, np.array([0, 1, 2, 3],
-                                                  dtype=np.int32), kind=kind)
-            self.assertEqual(idx.lookup(-1), -1)
-            self.assertEqual(idx.lookup(0), 0)
-            self.assertEqual(idx.lookup(1), 1)
-            self.assertEqual(idx.lookup(2), 2)
-            self.assertEqual(idx.lookup(3), 3)
-            self.assertEqual(idx.lookup(4), -1)
-
-            idx = sparray._make_index(4, np.array([0, 2, 3], dtype=np.int32),
-                                      kind=kind)
-            self.assertEqual(idx.lookup(-1), -1)
-            self.assertEqual(idx.lookup(0), 0)
-            self.assertEqual(idx.lookup(1), -1)
-            self.assertEqual(idx.lookup(2), 1)
-            self.assertEqual(idx.lookup(3), 2)
-            self.assertEqual(idx.lookup(4), -1)
-
-    def test_lookup_array(self):
-        for kind in ['integer', 'block']:
-            idx = sparray._make_index(4, np.array([2, 3], dtype=np.int32),
-                                      kind=kind)
-
-            res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32))
-            exp = np.array([-1, -1, 0], dtype=np.int32)
-            self.assert_numpy_array_equal(res, exp)
-
-            res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32))
-            exp = np.array([-1, 0, -1, 1], dtype=np.int32)
-            self.assert_numpy_array_equal(res, exp)
-
-            idx = sparray._make_index(4, np.array([], dtype=np.int32),
-                                      kind=kind)
-            res = idx.lookup_array(np.array([-1, 0, 2, 4], dtype=np.int32))
-            exp = np.array([-1, -1, -1, -1], dtype=np.int32)
-
-            idx = sparray._make_index(4, np.array([0, 1, 2, 3],
-                                                  dtype=np.int32),
-                                      kind=kind)
-            res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32))
-            exp = np.array([-1, 0, 2], dtype=np.int32)
-            self.assert_numpy_array_equal(res, exp)
-
-            res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32))
-            exp = np.array([-1, 2, 1, 3], dtype=np.int32)
-            self.assert_numpy_array_equal(res, exp)
-
-            idx = sparray._make_index(4, np.array([0, 2, 3], dtype=np.int32),
-                                      kind=kind)
-            res = idx.lookup_array(np.array([2, 1, 3, 0], dtype=np.int32))
-            exp = np.array([1, -1, 2, 0], dtype=np.int32)
-            self.assert_numpy_array_equal(res, exp)
-
-            res = idx.lookup_array(np.array([1, 4, 2, 5], dtype=np.int32))
-            exp = np.array([-1, -1, 1, -1], dtype=np.int32)
-            self.assert_numpy_array_equal(res, exp)
-
-
 class TestSparseArray(tm.TestCase):
 
     _multiprocess_can_split_ = True
@@ -159,6 +22,67 @@ def setUp(self):
         self.arr = SparseArray(self.arr_data)
         self.zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0)
 
+    def test_constructor_dtype(self):
+        arr = SparseArray([np.nan, 1, 2, np.nan])
+        self.assertEqual(arr.dtype, np.float64)
+        self.assertTrue(np.isnan(arr.fill_value))
+
+        arr = SparseArray([np.nan, 1, 2, np.nan], fill_value=0)
+        self.assertEqual(arr.dtype, np.float64)
+        self.assertEqual(arr.fill_value, 0)
+
+        arr = SparseArray([0, 1, 2, 4], dtype=np.int64)
+        self.assertEqual(arr.dtype, np.int64)
+        self.assertTrue(np.isnan(arr.fill_value))
+
+        arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=np.int64)
+        self.assertEqual(arr.dtype, np.int64)
+        self.assertEqual(arr.fill_value, 0)
+
+        arr = SparseArray([0, 1, 2, 4], dtype=None)
+        self.assertEqual(arr.dtype, np.int64)
+        self.assertTrue(np.isnan(arr.fill_value))
+
+        arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=None)
+        self.assertEqual(arr.dtype, np.int64)
+        self.assertEqual(arr.fill_value, 0)
+
+    def test_constructor_spindex_dtype(self):
+        arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]))
+        tm.assert_sp_array_equal(arr, SparseArray([np.nan, 1, 2, np.nan]))
+        self.assertEqual(arr.dtype, np.float64)
+        self.assertTrue(np.isnan(arr.fill_value))
+
+        arr = SparseArray(data=[0, 1, 2, 3],
+                          sparse_index=IntIndex(4, [0, 1, 2, 3]),
+                          dtype=np.int64)
+        exp = SparseArray([0, 1, 2, 3], dtype=np.int64)
+        tm.assert_sp_array_equal(arr, exp)
+        self.assertEqual(arr.dtype, np.int64)
+        self.assertTrue(np.isnan(arr.fill_value))
+
+        arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]),
+                          fill_value=0, dtype=np.int64)
+        exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=np.int64)
+        tm.assert_sp_array_equal(arr, exp)
+        self.assertEqual(arr.dtype, np.int64)
+        self.assertEqual(arr.fill_value, 0)
+
+        arr = SparseArray(data=[0, 1, 2, 3],
+                          sparse_index=IntIndex(4, [0, 1, 2, 3]),
+                          dtype=None)
+        exp = SparseArray([0, 1, 2, 3], dtype=None)
+        tm.assert_sp_array_equal(arr, exp)
+        self.assertEqual(arr.dtype, np.int64)
+        self.assertTrue(np.isnan(arr.fill_value))
+
+        arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]),
+                          fill_value=0, dtype=None)
+        exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=None)
+        tm.assert_sp_array_equal(arr, exp)
+        self.assertEqual(arr.dtype, np.int64)
+        self.assertEqual(arr.fill_value, 0)
+
     def test_get_item(self):
 
         self.assertTrue(np.isnan(self.arr[1]))
diff --git a/pandas/sparse/tests/test_libsparse.py b/pandas/sparse/tests/test_libsparse.py
diff --git a/pandas/sparse/tests/test_series.py b/pandas/sparse/tests/test_series.py