From 5a0adfa416fa1af5aba6f1a9d6c67a609b052bb1 Mon Sep 17 00:00:00 2001 From: sinhrks Date: Sat, 16 Apr 2016 19:18:05 +0900 Subject: [PATCH] BUG: SparseSeries.shift may raise NameError or TypeError --- doc/source/whatsnew/v0.18.1.txt | 4 +- pandas/sparse/array.py | 6 + pandas/sparse/series.py | 38 ++--- pandas/sparse/tests/test_array.py | 200 ++++++++------------------ pandas/sparse/tests/test_libsparse.py | 175 +++++++++++++++++++--- pandas/sparse/tests/test_series.py | 89 ++++++++++++ 6 files changed, 333 insertions(+), 179 deletions(-) diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index 073b859f4c9a7..d47fbed47f554 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -109,7 +109,7 @@ These changes conform sparse handling to return the correct types and work to ma s.take(0) s.take([1, 2, 3]) -- Bug in ``SparseSeries.__getitem__`` with ``Ellipsis`` raises ``KeyError`` (:issue:`9467`) +- Bug in ``SparseSeries[]`` indexing with ``Ellipsis`` raises ``KeyError`` (:issue:`9467`) - Bug in ``SparseSeries.loc[]`` with list-like input raises ``TypeError`` (:issue:`10560`) - Bug in ``SparseSeries.iloc[]`` with scalar input may raise ``IndexError`` (:issue:`10560`) - Bug in ``SparseSeries.loc[]``, ``.iloc[]`` with ``slice`` returns ``SparseArray``, rather than ``SparseSeries`` (:issue:`10560`) @@ -119,11 +119,13 @@ These changes conform sparse handling to return the correct types and work to ma - Bug in ``SparseArray`` pow calculates ``1 ** np.nan`` as ``np.nan`` which must be 1 (:issue:`12910`) - Bug in ``SparseSeries.__repr__`` raises ``TypeError`` when it is longer than ``max_rows`` (:issue:`10560`) - Bug in ``SparseSeries.shape`` ignores ``fill_value`` (:issue:`10452`) +- Bug in ``SparseSeries`` and ``SparseArray`` may have different ``dtype`` from its dense values (:issue:`12908`) - Bug in ``SparseSeries.reindex`` incorrectly handle ``fill_value`` (:issue:`12797`) - Bug in ``SparseArray.to_frame()`` results in ``DataFrame``, rather than ``SparseDataFrame`` (:issue:`9850`) - Bug in ``SparseArray.to_dense()`` does not preserve ``dtype`` (:issue:`10648`) - Bug in ``SparseArray.to_dense()`` incorrectly handle ``fill_value`` (:issue:`12797`) - Bug in ``pd.concat()`` of ``SparseSeries`` results in dense (:issue:`10536`) +- Bug in ``SparseArray.shift()`` may raise ``NameError`` or ``TypeError`` (:issue:`12908`) .. _whatsnew_0181.api: diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index 3d86e1489fede..a96663d757e74 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -165,6 +165,12 @@ def __new__(cls, data, sparse_index=None, index=None, kind='integer', @classmethod def _simple_new(cls, data, sp_index, fill_value): + if (com.is_integer_dtype(data) and com.is_float(fill_value) and + sp_index.ngaps > 0): + # if float fill_value is being included in dense repr, + # convert values to float + data = data.astype(float) + result = data.view(cls) if not isinstance(sp_index, SparseIndex): diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index 4cfa39c4571bd..1fe58922e85a5 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -20,14 +20,20 @@ import pandas.core.ops as ops import pandas.index as _index import pandas.lib as lib +from pandas.util.decorators import Appender -from pandas.sparse.array import (make_sparse, _sparse_array_op, SparseArray) +from pandas.sparse.array import (make_sparse, _sparse_array_op, SparseArray, + _make_index) from pandas._sparse import BlockIndex, IntIndex import pandas._sparse as splib from pandas.sparse.scipy_sparse import (_sparse_series_to_coo, _coo_to_sparse_series) + +_shared_doc_kwargs = dict(klass='SparseSeries', + axes_single_arg="{0, 'index'}") + # ----------------------------------------------------------------------------- # Wrapper function for Series arithmetic methods @@ -633,20 +639,17 @@ def dropna(self, axis=0, inplace=False, **kwargs): dense_valid = dense_valid[dense_valid != self.fill_value] return dense_valid.to_sparse(fill_value=self.fill_value) - def shift(self, periods, freq=None): - """ - Analogous to Series.shift - """ + @Appender(generic._shared_docs['shift'] % _shared_doc_kwargs) + def shift(self, periods, freq=None, axis=0): + if periods == 0: + return self.copy() # no special handling of fill values yet if not isnull(self.fill_value): - # TODO: kwds is not defined...should this work? - dense_shifted = self.to_dense().shift(periods, freq=freq, **kwds) # noqa - return dense_shifted.to_sparse(fill_value=self.fill_value, - kind=self.kind) - - if periods == 0: - return self.copy() + shifted = self.to_dense().shift(periods, freq=freq, + axis=axis) + return shifted.to_sparse(fill_value=self.fill_value, + kind=self.kind) if freq is not None: return self._constructor( @@ -659,14 +662,11 @@ def shift(self, periods, freq=None): start, end = new_indices.searchsorted([0, int_index.length]) new_indices = new_indices[start:end] + new_sp_index = _make_index(len(self), new_indices, self.sp_index) - new_sp_index = IntIndex(len(self), new_indices) - if isinstance(self.sp_index, BlockIndex): - new_sp_index = new_sp_index.to_block_index() - - return self._constructor(self.sp_values[start:end].copy(), - index=self.index, sparse_index=new_sp_index, - fill_value=self.fill_value).__finalize__(self) + arr = self.values._simple_new(self.sp_values[start:end].copy(), + new_sp_index, fill_value=np.nan) + return self._constructor(arr, index=self.index).__finalize__(self) def combine_first(self, other): """ diff --git a/pandas/sparse/tests/test_array.py b/pandas/sparse/tests/test_array.py index 064c4be15dfb0..7f76c079e17b3 100644 --- a/pandas/sparse/tests/test_array.py +++ b/pandas/sparse/tests/test_array.py @@ -8,148 +8,11 @@ from pandas import _np_version_under1p8 from pandas.sparse.api import SparseArray -import pandas.sparse.array as sparray +from pandas._sparse import IntIndex from pandas.util.testing import assert_almost_equal, assertRaisesRegexp import pandas.util.testing as tm -class TestSparseArrayIndex(tm.TestCase): - - _multiprocess_can_split_ = True - - def test_int_internal(self): - idx = sparray._make_index(4, np.array([2, 3], dtype=np.int32), - kind='integer') - self.assertIsInstance(idx, sparray.IntIndex) - self.assertEqual(idx.npoints, 2) - tm.assert_numpy_array_equal(idx.indices, - np.array([2, 3], dtype=np.int32)) - - idx = sparray._make_index(4, np.array([], dtype=np.int32), - kind='integer') - self.assertIsInstance(idx, sparray.IntIndex) - self.assertEqual(idx.npoints, 0) - tm.assert_numpy_array_equal(idx.indices, - np.array([], dtype=np.int32)) - - idx = sparray._make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), - kind='integer') - self.assertIsInstance(idx, sparray.IntIndex) - self.assertEqual(idx.npoints, 4) - tm.assert_numpy_array_equal(idx.indices, - np.array([0, 1, 2, 3], dtype=np.int32)) - - def test_block_internal(self): - idx = sparray._make_index(4, np.array([2, 3], dtype=np.int32), - kind='block') - self.assertIsInstance(idx, sparray.BlockIndex) - self.assertEqual(idx.npoints, 2) - tm.assert_numpy_array_equal(idx.blocs, - np.array([2], dtype=np.int32)) - tm.assert_numpy_array_equal(idx.blengths, - np.array([2], dtype=np.int32)) - - idx = sparray._make_index(4, np.array([], dtype=np.int32), - kind='block') - self.assertIsInstance(idx, sparray.BlockIndex) - self.assertEqual(idx.npoints, 0) - tm.assert_numpy_array_equal(idx.blocs, - np.array([], dtype=np.int32)) - tm.assert_numpy_array_equal(idx.blengths, - np.array([], dtype=np.int32)) - - idx = sparray._make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), - kind='block') - self.assertIsInstance(idx, sparray.BlockIndex) - self.assertEqual(idx.npoints, 4) - tm.assert_numpy_array_equal(idx.blocs, - np.array([0], dtype=np.int32)) - tm.assert_numpy_array_equal(idx.blengths, - np.array([4], dtype=np.int32)) - - idx = sparray._make_index(4, np.array([0, 2, 3], dtype=np.int32), - kind='block') - self.assertIsInstance(idx, sparray.BlockIndex) - self.assertEqual(idx.npoints, 3) - tm.assert_numpy_array_equal(idx.blocs, - np.array([0, 2], dtype=np.int32)) - tm.assert_numpy_array_equal(idx.blengths, - np.array([1, 2], dtype=np.int32)) - - def test_lookup(self): - for kind in ['integer', 'block']: - idx = sparray._make_index(4, np.array([2, 3], dtype=np.int32), - kind=kind) - self.assertEqual(idx.lookup(-1), -1) - self.assertEqual(idx.lookup(0), -1) - self.assertEqual(idx.lookup(1), -1) - self.assertEqual(idx.lookup(2), 0) - self.assertEqual(idx.lookup(3), 1) - self.assertEqual(idx.lookup(4), -1) - - idx = sparray._make_index(4, np.array([], dtype=np.int32), - kind=kind) - for i in range(-1, 5): - self.assertEqual(idx.lookup(i), -1) - - idx = sparray._make_index(4, np.array([0, 1, 2, 3], - dtype=np.int32), kind=kind) - self.assertEqual(idx.lookup(-1), -1) - self.assertEqual(idx.lookup(0), 0) - self.assertEqual(idx.lookup(1), 1) - self.assertEqual(idx.lookup(2), 2) - self.assertEqual(idx.lookup(3), 3) - self.assertEqual(idx.lookup(4), -1) - - idx = sparray._make_index(4, np.array([0, 2, 3], dtype=np.int32), - kind=kind) - self.assertEqual(idx.lookup(-1), -1) - self.assertEqual(idx.lookup(0), 0) - self.assertEqual(idx.lookup(1), -1) - self.assertEqual(idx.lookup(2), 1) - self.assertEqual(idx.lookup(3), 2) - self.assertEqual(idx.lookup(4), -1) - - def test_lookup_array(self): - for kind in ['integer', 'block']: - idx = sparray._make_index(4, np.array([2, 3], dtype=np.int32), - kind=kind) - - res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32)) - exp = np.array([-1, -1, 0], dtype=np.int32) - self.assert_numpy_array_equal(res, exp) - - res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32)) - exp = np.array([-1, 0, -1, 1], dtype=np.int32) - self.assert_numpy_array_equal(res, exp) - - idx = sparray._make_index(4, np.array([], dtype=np.int32), - kind=kind) - res = idx.lookup_array(np.array([-1, 0, 2, 4], dtype=np.int32)) - exp = np.array([-1, -1, -1, -1], dtype=np.int32) - - idx = sparray._make_index(4, np.array([0, 1, 2, 3], - dtype=np.int32), - kind=kind) - res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32)) - exp = np.array([-1, 0, 2], dtype=np.int32) - self.assert_numpy_array_equal(res, exp) - - res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32)) - exp = np.array([-1, 2, 1, 3], dtype=np.int32) - self.assert_numpy_array_equal(res, exp) - - idx = sparray._make_index(4, np.array([0, 2, 3], dtype=np.int32), - kind=kind) - res = idx.lookup_array(np.array([2, 1, 3, 0], dtype=np.int32)) - exp = np.array([1, -1, 2, 0], dtype=np.int32) - self.assert_numpy_array_equal(res, exp) - - res = idx.lookup_array(np.array([1, 4, 2, 5], dtype=np.int32)) - exp = np.array([-1, -1, 1, -1], dtype=np.int32) - self.assert_numpy_array_equal(res, exp) - - class TestSparseArray(tm.TestCase): _multiprocess_can_split_ = True @@ -159,6 +22,67 @@ def setUp(self): self.arr = SparseArray(self.arr_data) self.zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0) + def test_constructor_dtype(self): + arr = SparseArray([np.nan, 1, 2, np.nan]) + self.assertEqual(arr.dtype, np.float64) + self.assertTrue(np.isnan(arr.fill_value)) + + arr = SparseArray([np.nan, 1, 2, np.nan], fill_value=0) + self.assertEqual(arr.dtype, np.float64) + self.assertEqual(arr.fill_value, 0) + + arr = SparseArray([0, 1, 2, 4], dtype=np.int64) + self.assertEqual(arr.dtype, np.int64) + self.assertTrue(np.isnan(arr.fill_value)) + + arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=np.int64) + self.assertEqual(arr.dtype, np.int64) + self.assertEqual(arr.fill_value, 0) + + arr = SparseArray([0, 1, 2, 4], dtype=None) + self.assertEqual(arr.dtype, np.int64) + self.assertTrue(np.isnan(arr.fill_value)) + + arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=None) + self.assertEqual(arr.dtype, np.int64) + self.assertEqual(arr.fill_value, 0) + + def test_constructor_spindex_dtype(self): + arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2])) + tm.assert_sp_array_equal(arr, SparseArray([np.nan, 1, 2, np.nan])) + self.assertEqual(arr.dtype, np.float64) + self.assertTrue(np.isnan(arr.fill_value)) + + arr = SparseArray(data=[0, 1, 2, 3], + sparse_index=IntIndex(4, [0, 1, 2, 3]), + dtype=np.int64) + exp = SparseArray([0, 1, 2, 3], dtype=np.int64) + tm.assert_sp_array_equal(arr, exp) + self.assertEqual(arr.dtype, np.int64) + self.assertTrue(np.isnan(arr.fill_value)) + + arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]), + fill_value=0, dtype=np.int64) + exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=np.int64) + tm.assert_sp_array_equal(arr, exp) + self.assertEqual(arr.dtype, np.int64) + self.assertEqual(arr.fill_value, 0) + + arr = SparseArray(data=[0, 1, 2, 3], + sparse_index=IntIndex(4, [0, 1, 2, 3]), + dtype=None) + exp = SparseArray([0, 1, 2, 3], dtype=None) + tm.assert_sp_array_equal(arr, exp) + self.assertEqual(arr.dtype, np.int64) + self.assertTrue(np.isnan(arr.fill_value)) + + arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]), + fill_value=0, dtype=None) + exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=None) + tm.assert_sp_array_equal(arr, exp) + self.assertEqual(arr.dtype, np.int64) + self.assertEqual(arr.fill_value, 0) + def test_get_item(self): self.assertTrue(np.isnan(self.arr[1])) diff --git a/pandas/sparse/tests/test_libsparse.py b/pandas/sparse/tests/test_libsparse.py index 293e50424b075..8d7ae012d0fe9 100644 --- a/pandas/sparse/tests/test_libsparse.py +++ b/pandas/sparse/tests/test_libsparse.py @@ -8,7 +8,7 @@ from pandas import compat -from pandas._sparse import IntIndex, BlockIndex +from pandas.sparse.array import IntIndex, BlockIndex, _make_index import pandas._sparse as splib TEST_LENGTH = 20 @@ -156,26 +156,99 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): # TODO: different-length index objects -def test_lookup(): - def _check(index): - assert (index.lookup(0) == -1) - assert (index.lookup(5) == 0) - assert (index.lookup(7) == 2) - assert (index.lookup(8) == -1) - assert (index.lookup(9) == -1) - assert (index.lookup(10) == -1) - assert (index.lookup(11) == -1) - assert (index.lookup(12) == 3) - assert (index.lookup(17) == 8) - assert (index.lookup(18) == -1) - - bindex = BlockIndex(20, [5, 12], [3, 6]) - iindex = bindex.to_int_index() - - _check(bindex) - _check(iindex) - - # corner cases +class TestSparseIndexCommon(tm.TestCase): + + _multiprocess_can_split_ = True + + def test_lookup(self): + for kind in ['integer', 'block']: + idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind=kind) + self.assertEqual(idx.lookup(-1), -1) + self.assertEqual(idx.lookup(0), -1) + self.assertEqual(idx.lookup(1), -1) + self.assertEqual(idx.lookup(2), 0) + self.assertEqual(idx.lookup(3), 1) + self.assertEqual(idx.lookup(4), -1) + + idx = _make_index(4, np.array([], dtype=np.int32), kind=kind) + + for i in range(-1, 5): + self.assertEqual(idx.lookup(i), -1) + + idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), + kind=kind) + self.assertEqual(idx.lookup(-1), -1) + self.assertEqual(idx.lookup(0), 0) + self.assertEqual(idx.lookup(1), 1) + self.assertEqual(idx.lookup(2), 2) + self.assertEqual(idx.lookup(3), 3) + self.assertEqual(idx.lookup(4), -1) + + idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), + kind=kind) + self.assertEqual(idx.lookup(-1), -1) + self.assertEqual(idx.lookup(0), 0) + self.assertEqual(idx.lookup(1), -1) + self.assertEqual(idx.lookup(2), 1) + self.assertEqual(idx.lookup(3), 2) + self.assertEqual(idx.lookup(4), -1) + + def test_lookup_array(self): + for kind in ['integer', 'block']: + idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind=kind) + + res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32)) + exp = np.array([-1, -1, 0], dtype=np.int32) + self.assert_numpy_array_equal(res, exp) + + res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32)) + exp = np.array([-1, 0, -1, 1], dtype=np.int32) + self.assert_numpy_array_equal(res, exp) + + idx = _make_index(4, np.array([], dtype=np.int32), kind=kind) + res = idx.lookup_array(np.array([-1, 0, 2, 4], dtype=np.int32)) + exp = np.array([-1, -1, -1, -1], dtype=np.int32) + + idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), + kind=kind) + res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32)) + exp = np.array([-1, 0, 2], dtype=np.int32) + self.assert_numpy_array_equal(res, exp) + + res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32)) + exp = np.array([-1, 2, 1, 3], dtype=np.int32) + self.assert_numpy_array_equal(res, exp) + + idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), + kind=kind) + res = idx.lookup_array(np.array([2, 1, 3, 0], dtype=np.int32)) + exp = np.array([1, -1, 2, 0], dtype=np.int32) + self.assert_numpy_array_equal(res, exp) + + res = idx.lookup_array(np.array([1, 4, 2, 5], dtype=np.int32)) + exp = np.array([-1, -1, 1, -1], dtype=np.int32) + self.assert_numpy_array_equal(res, exp) + + def test_lookup_basics(self): + def _check(index): + assert (index.lookup(0) == -1) + assert (index.lookup(5) == 0) + assert (index.lookup(7) == 2) + assert (index.lookup(8) == -1) + assert (index.lookup(9) == -1) + assert (index.lookup(10) == -1) + assert (index.lookup(11) == -1) + assert (index.lookup(12) == 3) + assert (index.lookup(17) == 8) + assert (index.lookup(18) == -1) + + bindex = BlockIndex(20, [5, 12], [3, 6]) + iindex = bindex.to_int_index() + + _check(bindex) + _check(iindex) + + # corner cases def test_intersect(): @@ -205,6 +278,43 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): class TestBlockIndex(tm.TestCase): + + _multiprocess_can_split_ = True + + def test_block_internal(self): + idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='block') + self.assertIsInstance(idx, BlockIndex) + self.assertEqual(idx.npoints, 2) + tm.assert_numpy_array_equal(idx.blocs, + np.array([2], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, + np.array([2], dtype=np.int32)) + + idx = _make_index(4, np.array([], dtype=np.int32), kind='block') + self.assertIsInstance(idx, BlockIndex) + self.assertEqual(idx.npoints, 0) + tm.assert_numpy_array_equal(idx.blocs, + np.array([], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, + np.array([], dtype=np.int32)) + + idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), + kind='block') + self.assertIsInstance(idx, BlockIndex) + self.assertEqual(idx.npoints, 4) + tm.assert_numpy_array_equal(idx.blocs, + np.array([0], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, + np.array([4], dtype=np.int32)) + + idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), kind='block') + self.assertIsInstance(idx, BlockIndex) + self.assertEqual(idx.npoints, 3) + tm.assert_numpy_array_equal(idx.blocs, + np.array([0, 2], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, + np.array([1, 2], dtype=np.int32)) + def test_equals(self): index = BlockIndex(10, [0, 4], [2, 5]) @@ -244,6 +354,29 @@ def test_to_block_index(self): class TestIntIndex(tm.TestCase): + + _multiprocess_can_split_ = True + + def test_int_internal(self): + idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind='integer') + self.assertIsInstance(idx, IntIndex) + self.assertEqual(idx.npoints, 2) + tm.assert_numpy_array_equal(idx.indices, + np.array([2, 3], dtype=np.int32)) + + idx = _make_index(4, np.array([], dtype=np.int32), kind='integer') + self.assertIsInstance(idx, IntIndex) + self.assertEqual(idx.npoints, 0) + tm.assert_numpy_array_equal(idx.indices, + np.array([], dtype=np.int32)) + + idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), + kind='integer') + self.assertIsInstance(idx, IntIndex) + self.assertEqual(idx.npoints, 4) + tm.assert_numpy_array_equal(idx.indices, + np.array([0, 1, 2, 3], dtype=np.int32)) + def test_equals(self): index = IntIndex(10, [0, 1, 2, 3, 4]) self.assertTrue(index.equals(index)) diff --git a/pandas/sparse/tests/test_series.py b/pandas/sparse/tests/test_series.py index 1d5b90c19decb..097bdee82a589 100644 --- a/pandas/sparse/tests/test_series.py +++ b/pandas/sparse/tests/test_series.py @@ -91,6 +91,23 @@ def setUp(self): self.ziseries2 = SparseSeries(arr, index=index, kind='integer', fill_value=0) + def test_constructor_dtype(self): + arr = SparseSeries([np.nan, 1, 2, np.nan]) + self.assertEqual(arr.dtype, np.float64) + self.assertTrue(np.isnan(arr.fill_value)) + + arr = SparseSeries([np.nan, 1, 2, np.nan], fill_value=0) + self.assertEqual(arr.dtype, np.float64) + self.assertEqual(arr.fill_value, 0) + + arr = SparseSeries([0, 1, 2, 4], dtype=np.int64) + self.assertEqual(arr.dtype, np.int64) + self.assertTrue(np.isnan(arr.fill_value)) + + arr = SparseSeries([0, 1, 2, 4], fill_value=0, dtype=np.int64) + self.assertEqual(arr.dtype, np.int64) + self.assertEqual(arr.fill_value, 0) + def test_iteration_and_str(self): [x for x in self.bseries] str(self.bseries) @@ -769,6 +786,78 @@ def test_shift(self): f = lambda s: s.shift(2, freq=datetools.bday) _dense_series_compare(series, f) + def test_shift_nan(self): + # GH 12908 + orig = pd.Series([np.nan, 2, np.nan, 4, 0, np.nan, 0]) + sparse = orig.to_sparse() + + tm.assert_sp_series_equal(sparse.shift(0), orig.shift(0).to_sparse()) + tm.assert_sp_series_equal(sparse.shift(1), orig.shift(1).to_sparse()) + tm.assert_sp_series_equal(sparse.shift(2), orig.shift(2).to_sparse()) + tm.assert_sp_series_equal(sparse.shift(3), orig.shift(3).to_sparse()) + + tm.assert_sp_series_equal(sparse.shift(-1), orig.shift(-1).to_sparse()) + tm.assert_sp_series_equal(sparse.shift(-2), orig.shift(-2).to_sparse()) + tm.assert_sp_series_equal(sparse.shift(-3), orig.shift(-3).to_sparse()) + tm.assert_sp_series_equal(sparse.shift(-4), orig.shift(-4).to_sparse()) + + sparse = orig.to_sparse(fill_value=0) + tm.assert_sp_series_equal(sparse.shift(0), + orig.shift(0).to_sparse(fill_value=0)) + tm.assert_sp_series_equal(sparse.shift(1), + orig.shift(1).to_sparse(fill_value=0)) + tm.assert_sp_series_equal(sparse.shift(2), + orig.shift(2).to_sparse(fill_value=0)) + tm.assert_sp_series_equal(sparse.shift(3), + orig.shift(3).to_sparse(fill_value=0)) + + tm.assert_sp_series_equal(sparse.shift(-1), + orig.shift(-1).to_sparse(fill_value=0)) + tm.assert_sp_series_equal(sparse.shift(-2), + orig.shift(-2).to_sparse(fill_value=0)) + tm.assert_sp_series_equal(sparse.shift(-3), + orig.shift(-3).to_sparse(fill_value=0)) + tm.assert_sp_series_equal(sparse.shift(-4), + orig.shift(-4).to_sparse(fill_value=0)) + + def test_shift_dtype(self): + # GH 12908 + orig = pd.Series([1, 2, 3, 4], dtype=np.int64) + sparse = orig.to_sparse() + + tm.assert_sp_series_equal(sparse.shift(0), orig.shift(0).to_sparse()) + tm.assert_sp_series_equal(sparse.shift(1), orig.shift(1).to_sparse()) + tm.assert_sp_series_equal(sparse.shift(2), orig.shift(2).to_sparse()) + tm.assert_sp_series_equal(sparse.shift(3), orig.shift(3).to_sparse()) + + tm.assert_sp_series_equal(sparse.shift(-1), orig.shift(-1).to_sparse()) + tm.assert_sp_series_equal(sparse.shift(-2), orig.shift(-2).to_sparse()) + tm.assert_sp_series_equal(sparse.shift(-3), orig.shift(-3).to_sparse()) + tm.assert_sp_series_equal(sparse.shift(-4), orig.shift(-4).to_sparse()) + + def test_shift_dtype_fill_value(self): + # GH 12908 + orig = pd.Series([1, 0, 0, 4], dtype=np.int64) + sparse = orig.to_sparse(fill_value=0) + + tm.assert_sp_series_equal(sparse.shift(0), + orig.shift(0).to_sparse(fill_value=0)) + tm.assert_sp_series_equal(sparse.shift(1), + orig.shift(1).to_sparse(fill_value=0)) + tm.assert_sp_series_equal(sparse.shift(2), + orig.shift(2).to_sparse(fill_value=0)) + tm.assert_sp_series_equal(sparse.shift(3), + orig.shift(3).to_sparse(fill_value=0)) + + tm.assert_sp_series_equal(sparse.shift(-1), + orig.shift(-1).to_sparse(fill_value=0)) + tm.assert_sp_series_equal(sparse.shift(-2), + orig.shift(-2).to_sparse(fill_value=0)) + tm.assert_sp_series_equal(sparse.shift(-3), + orig.shift(-3).to_sparse(fill_value=0)) + tm.assert_sp_series_equal(sparse.shift(-4), + orig.shift(-4).to_sparse(fill_value=0)) + def test_cumsum(self): result = self.bseries.cumsum() expected = self.bseries.to_dense().cumsum()