Skip to content

Commit 5d8a935

Browse files
sinhrksjreback
authored andcommitted
BUG: SparseSeries.shift may raise NameError or TypeError
Author: sinhrks <[email protected]> Closes #12908 from sinhrks/sparse_shift and squashes the following commits: 5a0adfa [sinhrks] BUG: SparseSeries.shift may raise NameError or TypeError
1 parent 6d2d6db commit 5d8a935

File tree

6 files changed

+333
-179
lines changed

6 files changed

+333
-179
lines changed

doc/source/whatsnew/v0.18.1.txt

+3-1
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ These changes conform sparse handling to return the correct types and work to ma
109109
s.take(0)
110110
s.take([1, 2, 3])
111111

112-
- Bug in ``SparseSeries.__getitem__`` with ``Ellipsis`` raises ``KeyError`` (:issue:`9467`)
112+
- Bug in ``SparseSeries[]`` indexing with ``Ellipsis`` raises ``KeyError`` (:issue:`9467`)
113113
- Bug in ``SparseSeries.loc[]`` with list-like input raises ``TypeError`` (:issue:`10560`)
114114
- Bug in ``SparseSeries.iloc[]`` with scalar input may raise ``IndexError`` (:issue:`10560`)
115115
- Bug in ``SparseSeries.loc[]``, ``.iloc[]`` with ``slice`` returns ``SparseArray``, rather than ``SparseSeries`` (:issue:`10560`)
@@ -119,11 +119,13 @@ These changes conform sparse handling to return the correct types and work to ma
119119
- Bug in ``SparseArray`` pow calculates ``1 ** np.nan`` as ``np.nan`` which must be 1 (:issue:`12910`)
120120
- Bug in ``SparseSeries.__repr__`` raises ``TypeError`` when it is longer than ``max_rows`` (:issue:`10560`)
121121
- Bug in ``SparseSeries.shape`` ignores ``fill_value`` (:issue:`10452`)
122+
- Bug in ``SparseSeries`` and ``SparseArray`` may have different ``dtype`` from its dense values (:issue:`12908`)
122123
- Bug in ``SparseSeries.reindex`` incorrectly handle ``fill_value`` (:issue:`12797`)
123124
- Bug in ``SparseArray.to_frame()`` results in ``DataFrame``, rather than ``SparseDataFrame`` (:issue:`9850`)
124125
- Bug in ``SparseArray.to_dense()`` does not preserve ``dtype`` (:issue:`10648`)
125126
- Bug in ``SparseArray.to_dense()`` incorrectly handle ``fill_value`` (:issue:`12797`)
126127
- Bug in ``pd.concat()`` of ``SparseSeries`` results in dense (:issue:`10536`)
128+
- Bug in ``SparseArray.shift()`` may raise ``NameError`` or ``TypeError`` (:issue:`12908`)
127129

128130
.. _whatsnew_0181.api:
129131

pandas/sparse/array.py

+6
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,12 @@ def __new__(cls, data, sparse_index=None, index=None, kind='integer',
165165

166166
@classmethod
167167
def _simple_new(cls, data, sp_index, fill_value):
168+
if (com.is_integer_dtype(data) and com.is_float(fill_value) and
169+
sp_index.ngaps > 0):
170+
# if float fill_value is being included in dense repr,
171+
# convert values to float
172+
data = data.astype(float)
173+
168174
result = data.view(cls)
169175

170176
if not isinstance(sp_index, SparseIndex):

pandas/sparse/series.py

+19-19
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,20 @@
2020
import pandas.core.ops as ops
2121
import pandas.index as _index
2222
import pandas.lib as lib
23+
from pandas.util.decorators import Appender
2324

24-
from pandas.sparse.array import (make_sparse, _sparse_array_op, SparseArray)
25+
from pandas.sparse.array import (make_sparse, _sparse_array_op, SparseArray,
26+
_make_index)
2527
from pandas._sparse import BlockIndex, IntIndex
2628
import pandas._sparse as splib
2729

2830
from pandas.sparse.scipy_sparse import (_sparse_series_to_coo,
2931
_coo_to_sparse_series)
3032

33+
34+
_shared_doc_kwargs = dict(klass='SparseSeries',
35+
axes_single_arg="{0, 'index'}")
36+
3137
# -----------------------------------------------------------------------------
3238
# Wrapper function for Series arithmetic methods
3339

@@ -633,20 +639,17 @@ def dropna(self, axis=0, inplace=False, **kwargs):
633639
dense_valid = dense_valid[dense_valid != self.fill_value]
634640
return dense_valid.to_sparse(fill_value=self.fill_value)
635641

636-
def shift(self, periods, freq=None):
637-
"""
638-
Analogous to Series.shift
639-
"""
642+
@Appender(generic._shared_docs['shift'] % _shared_doc_kwargs)
643+
def shift(self, periods, freq=None, axis=0):
644+
if periods == 0:
645+
return self.copy()
640646

641647
# no special handling of fill values yet
642648
if not isnull(self.fill_value):
643-
# TODO: kwds is not defined...should this work?
644-
dense_shifted = self.to_dense().shift(periods, freq=freq, **kwds) # noqa
645-
return dense_shifted.to_sparse(fill_value=self.fill_value,
646-
kind=self.kind)
647-
648-
if periods == 0:
649-
return self.copy()
649+
shifted = self.to_dense().shift(periods, freq=freq,
650+
axis=axis)
651+
return shifted.to_sparse(fill_value=self.fill_value,
652+
kind=self.kind)
650653

651654
if freq is not None:
652655
return self._constructor(
@@ -659,14 +662,11 @@ def shift(self, periods, freq=None):
659662
start, end = new_indices.searchsorted([0, int_index.length])
660663

661664
new_indices = new_indices[start:end]
665+
new_sp_index = _make_index(len(self), new_indices, self.sp_index)
662666

663-
new_sp_index = IntIndex(len(self), new_indices)
664-
if isinstance(self.sp_index, BlockIndex):
665-
new_sp_index = new_sp_index.to_block_index()
666-
667-
return self._constructor(self.sp_values[start:end].copy(),
668-
index=self.index, sparse_index=new_sp_index,
669-
fill_value=self.fill_value).__finalize__(self)
667+
arr = self.values._simple_new(self.sp_values[start:end].copy(),
668+
new_sp_index, fill_value=np.nan)
669+
return self._constructor(arr, index=self.index).__finalize__(self)
670670

671671
def combine_first(self, other):
672672
"""

pandas/sparse/tests/test_array.py

+62-138
Original file line numberDiff line numberDiff line change
@@ -8,148 +8,11 @@
88

99
from pandas import _np_version_under1p8
1010
from pandas.sparse.api import SparseArray
11-
import pandas.sparse.array as sparray
11+
from pandas._sparse import IntIndex
1212
from pandas.util.testing import assert_almost_equal, assertRaisesRegexp
1313
import pandas.util.testing as tm
1414

1515

16-
class TestSparseArrayIndex(tm.TestCase):
17-
18-
_multiprocess_can_split_ = True
19-
20-
def test_int_internal(self):
21-
idx = sparray._make_index(4, np.array([2, 3], dtype=np.int32),
22-
kind='integer')
23-
self.assertIsInstance(idx, sparray.IntIndex)
24-
self.assertEqual(idx.npoints, 2)
25-
tm.assert_numpy_array_equal(idx.indices,
26-
np.array([2, 3], dtype=np.int32))
27-
28-
idx = sparray._make_index(4, np.array([], dtype=np.int32),
29-
kind='integer')
30-
self.assertIsInstance(idx, sparray.IntIndex)
31-
self.assertEqual(idx.npoints, 0)
32-
tm.assert_numpy_array_equal(idx.indices,
33-
np.array([], dtype=np.int32))
34-
35-
idx = sparray._make_index(4, np.array([0, 1, 2, 3], dtype=np.int32),
36-
kind='integer')
37-
self.assertIsInstance(idx, sparray.IntIndex)
38-
self.assertEqual(idx.npoints, 4)
39-
tm.assert_numpy_array_equal(idx.indices,
40-
np.array([0, 1, 2, 3], dtype=np.int32))
41-
42-
def test_block_internal(self):
43-
idx = sparray._make_index(4, np.array([2, 3], dtype=np.int32),
44-
kind='block')
45-
self.assertIsInstance(idx, sparray.BlockIndex)
46-
self.assertEqual(idx.npoints, 2)
47-
tm.assert_numpy_array_equal(idx.blocs,
48-
np.array([2], dtype=np.int32))
49-
tm.assert_numpy_array_equal(idx.blengths,
50-
np.array([2], dtype=np.int32))
51-
52-
idx = sparray._make_index(4, np.array([], dtype=np.int32),
53-
kind='block')
54-
self.assertIsInstance(idx, sparray.BlockIndex)
55-
self.assertEqual(idx.npoints, 0)
56-
tm.assert_numpy_array_equal(idx.blocs,
57-
np.array([], dtype=np.int32))
58-
tm.assert_numpy_array_equal(idx.blengths,
59-
np.array([], dtype=np.int32))
60-
61-
idx = sparray._make_index(4, np.array([0, 1, 2, 3], dtype=np.int32),
62-
kind='block')
63-
self.assertIsInstance(idx, sparray.BlockIndex)
64-
self.assertEqual(idx.npoints, 4)
65-
tm.assert_numpy_array_equal(idx.blocs,
66-
np.array([0], dtype=np.int32))
67-
tm.assert_numpy_array_equal(idx.blengths,
68-
np.array([4], dtype=np.int32))
69-
70-
idx = sparray._make_index(4, np.array([0, 2, 3], dtype=np.int32),
71-
kind='block')
72-
self.assertIsInstance(idx, sparray.BlockIndex)
73-
self.assertEqual(idx.npoints, 3)
74-
tm.assert_numpy_array_equal(idx.blocs,
75-
np.array([0, 2], dtype=np.int32))
76-
tm.assert_numpy_array_equal(idx.blengths,
77-
np.array([1, 2], dtype=np.int32))
78-
79-
def test_lookup(self):
80-
for kind in ['integer', 'block']:
81-
idx = sparray._make_index(4, np.array([2, 3], dtype=np.int32),
82-
kind=kind)
83-
self.assertEqual(idx.lookup(-1), -1)
84-
self.assertEqual(idx.lookup(0), -1)
85-
self.assertEqual(idx.lookup(1), -1)
86-
self.assertEqual(idx.lookup(2), 0)
87-
self.assertEqual(idx.lookup(3), 1)
88-
self.assertEqual(idx.lookup(4), -1)
89-
90-
idx = sparray._make_index(4, np.array([], dtype=np.int32),
91-
kind=kind)
92-
for i in range(-1, 5):
93-
self.assertEqual(idx.lookup(i), -1)
94-
95-
idx = sparray._make_index(4, np.array([0, 1, 2, 3],
96-
dtype=np.int32), kind=kind)
97-
self.assertEqual(idx.lookup(-1), -1)
98-
self.assertEqual(idx.lookup(0), 0)
99-
self.assertEqual(idx.lookup(1), 1)
100-
self.assertEqual(idx.lookup(2), 2)
101-
self.assertEqual(idx.lookup(3), 3)
102-
self.assertEqual(idx.lookup(4), -1)
103-
104-
idx = sparray._make_index(4, np.array([0, 2, 3], dtype=np.int32),
105-
kind=kind)
106-
self.assertEqual(idx.lookup(-1), -1)
107-
self.assertEqual(idx.lookup(0), 0)
108-
self.assertEqual(idx.lookup(1), -1)
109-
self.assertEqual(idx.lookup(2), 1)
110-
self.assertEqual(idx.lookup(3), 2)
111-
self.assertEqual(idx.lookup(4), -1)
112-
113-
def test_lookup_array(self):
114-
for kind in ['integer', 'block']:
115-
idx = sparray._make_index(4, np.array([2, 3], dtype=np.int32),
116-
kind=kind)
117-
118-
res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32))
119-
exp = np.array([-1, -1, 0], dtype=np.int32)
120-
self.assert_numpy_array_equal(res, exp)
121-
122-
res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32))
123-
exp = np.array([-1, 0, -1, 1], dtype=np.int32)
124-
self.assert_numpy_array_equal(res, exp)
125-
126-
idx = sparray._make_index(4, np.array([], dtype=np.int32),
127-
kind=kind)
128-
res = idx.lookup_array(np.array([-1, 0, 2, 4], dtype=np.int32))
129-
exp = np.array([-1, -1, -1, -1], dtype=np.int32)
130-
131-
idx = sparray._make_index(4, np.array([0, 1, 2, 3],
132-
dtype=np.int32),
133-
kind=kind)
134-
res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32))
135-
exp = np.array([-1, 0, 2], dtype=np.int32)
136-
self.assert_numpy_array_equal(res, exp)
137-
138-
res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32))
139-
exp = np.array([-1, 2, 1, 3], dtype=np.int32)
140-
self.assert_numpy_array_equal(res, exp)
141-
142-
idx = sparray._make_index(4, np.array([0, 2, 3], dtype=np.int32),
143-
kind=kind)
144-
res = idx.lookup_array(np.array([2, 1, 3, 0], dtype=np.int32))
145-
exp = np.array([1, -1, 2, 0], dtype=np.int32)
146-
self.assert_numpy_array_equal(res, exp)
147-
148-
res = idx.lookup_array(np.array([1, 4, 2, 5], dtype=np.int32))
149-
exp = np.array([-1, -1, 1, -1], dtype=np.int32)
150-
self.assert_numpy_array_equal(res, exp)
151-
152-
15316
class TestSparseArray(tm.TestCase):
15417

15518
_multiprocess_can_split_ = True
@@ -159,6 +22,67 @@ def setUp(self):
15922
self.arr = SparseArray(self.arr_data)
16023
self.zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0)
16124

25+
def test_constructor_dtype(self):
26+
arr = SparseArray([np.nan, 1, 2, np.nan])
27+
self.assertEqual(arr.dtype, np.float64)
28+
self.assertTrue(np.isnan(arr.fill_value))
29+
30+
arr = SparseArray([np.nan, 1, 2, np.nan], fill_value=0)
31+
self.assertEqual(arr.dtype, np.float64)
32+
self.assertEqual(arr.fill_value, 0)
33+
34+
arr = SparseArray([0, 1, 2, 4], dtype=np.int64)
35+
self.assertEqual(arr.dtype, np.int64)
36+
self.assertTrue(np.isnan(arr.fill_value))
37+
38+
arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=np.int64)
39+
self.assertEqual(arr.dtype, np.int64)
40+
self.assertEqual(arr.fill_value, 0)
41+
42+
arr = SparseArray([0, 1, 2, 4], dtype=None)
43+
self.assertEqual(arr.dtype, np.int64)
44+
self.assertTrue(np.isnan(arr.fill_value))
45+
46+
arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=None)
47+
self.assertEqual(arr.dtype, np.int64)
48+
self.assertEqual(arr.fill_value, 0)
49+
50+
def test_constructor_spindex_dtype(self):
51+
arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]))
52+
tm.assert_sp_array_equal(arr, SparseArray([np.nan, 1, 2, np.nan]))
53+
self.assertEqual(arr.dtype, np.float64)
54+
self.assertTrue(np.isnan(arr.fill_value))
55+
56+
arr = SparseArray(data=[0, 1, 2, 3],
57+
sparse_index=IntIndex(4, [0, 1, 2, 3]),
58+
dtype=np.int64)
59+
exp = SparseArray([0, 1, 2, 3], dtype=np.int64)
60+
tm.assert_sp_array_equal(arr, exp)
61+
self.assertEqual(arr.dtype, np.int64)
62+
self.assertTrue(np.isnan(arr.fill_value))
63+
64+
arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]),
65+
fill_value=0, dtype=np.int64)
66+
exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=np.int64)
67+
tm.assert_sp_array_equal(arr, exp)
68+
self.assertEqual(arr.dtype, np.int64)
69+
self.assertEqual(arr.fill_value, 0)
70+
71+
arr = SparseArray(data=[0, 1, 2, 3],
72+
sparse_index=IntIndex(4, [0, 1, 2, 3]),
73+
dtype=None)
74+
exp = SparseArray([0, 1, 2, 3], dtype=None)
75+
tm.assert_sp_array_equal(arr, exp)
76+
self.assertEqual(arr.dtype, np.int64)
77+
self.assertTrue(np.isnan(arr.fill_value))
78+
79+
arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]),
80+
fill_value=0, dtype=None)
81+
exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=None)
82+
tm.assert_sp_array_equal(arr, exp)
83+
self.assertEqual(arr.dtype, np.int64)
84+
self.assertEqual(arr.fill_value, 0)
85+
16286
def test_get_item(self):
16387

16488
self.assertTrue(np.isnan(self.arr[1]))

0 commit comments

Comments
 (0)