Skip to content

Commit 24146cd

Browse files
committed
BUG: Sparse misc fixes
1 parent 101d81d commit 24146cd

File tree

11 files changed

+354
-165
lines changed

11 files changed

+354
-165
lines changed

doc/source/whatsnew/v0.18.1.txt

+13
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,13 @@ API changes
8181
- ``CParserError`` is now a ``ValueError`` instead of just an ``Exception`` (:issue:`12551`)
8282

8383
- ``pd.show_versions()`` now includes ``pandas_datareader`` version (:issue:`12740`)
84+
- ``SparseArray.take`` now returns scalar for scalar input, ``SparseArray`` for others (:issue:`10560`)
85+
86+
.. ipython:: python
87+
88+
s = pd.SparseArray([np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6])
89+
s.take(0)
90+
s.take([1, 2, 3])
8491

8592
.. _whatsnew_0181.apply_resample:
8693

@@ -211,3 +218,9 @@ Bug Fixes
211218
- Bug in ``.describe()`` resets categorical columns information (:issue:`11558`)
212219
- Bug where ``loffset`` argument was not applied when calling ``resample().count()`` on a timeseries (:issue:`12725`)
213220
- ``pd.read_excel()`` now accepts path objects (e.g. ``pathlib.Path``, ``py.path.local``) for the file path, in line with other ``read_*`` functions (:issue:`12655`)
221+
222+
223+
- Bug in ``SparseSeries.loc[]`` with list-like input raises ``TypeError`` (:issue:`10560`)
224+
- Bug in ``SparseSeries.iloc[]`` with scalar input may raise ``IndexError`` (:issue:`10560`)
225+
- Bug in ``SparseSeries.loc[]``, ``.iloc[]`` with ``slice`` returns ``SparseArray``, rather than ``SparseSeries`` (:issue:`10560`)
226+
- Bug in ``SparseSeries.__repr__`` raises ``TypeError`` when it is longer than ``max_rows`` (:issue:`10560`)

pandas/io/tests/test_pickle.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
import pandas as pd
1111
from pandas import Index
1212
from pandas.compat import u
13-
from pandas.sparse.tests import test_sparse
1413
from pandas.util.misc import is_little_endian
1514
import pandas
1615
import pandas.util.testing as tm
@@ -46,7 +45,7 @@ def compare_element(self, result, expected, typ, version=None):
4645
return
4746

4847
if typ.startswith('sp_'):
49-
comparator = getattr(test_sparse, "assert_%s_equal" % typ)
48+
comparator = getattr(tm, "assert_%s_equal" % typ)
5049
comparator(result, expected, exact_indices=False)
5150
else:
5251
comparator = getattr(tm, "assert_%s_equal" %

pandas/sparse/array.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -280,10 +280,7 @@ def __getitem__(self, key):
280280
if isinstance(key, SparseArray):
281281
key = np.asarray(key)
282282
if hasattr(key, '__len__') and len(self) != len(key):
283-
indices = self.sp_index
284-
if hasattr(indices, 'to_int_index'):
285-
indices = indices.to_int_index()
286-
data_slice = self.values.take(indices.indices)[key]
283+
return self.take(key)
287284
else:
288285
data_slice = self.values[key]
289286
return self._constructor(data_slice)
@@ -320,6 +317,11 @@ def take(self, indices, axis=0):
320317
"""
321318
if axis:
322319
raise ValueError("axis must be 0, input was {0}".format(axis))
320+
321+
if com.is_integer(indices):
322+
# return scalar
323+
return self[indices]
324+
323325
indices = np.atleast_1d(np.asarray(indices, dtype=int))
324326

325327
# allow -1 to indicate missing values
@@ -344,7 +346,7 @@ def take(self, indices, axis=0):
344346
result = np.empty(len(indices))
345347
result.fill(self.fill_value)
346348

347-
return result
349+
return self._constructor(result)
348350

349351
def __setitem__(self, key, value):
350352
# if com.is_integer(key):

pandas/sparse/frame.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -543,9 +543,10 @@ def _reindex_index(self, index, method, copy, level, fill_value=np.nan,
543543
continue
544544

545545
values = series.values
546+
# .take returns SparseArray
546547
new = values.take(indexer)
547-
548548
if need_mask:
549+
new = new.values
549550
np.putmask(new, mask, fill_value)
550551

551552
new_series[col] = new

pandas/sparse/series.py

+32-1
Original file line numberDiff line numberDiff line change
@@ -354,10 +354,33 @@ def _set_subtyp(self, is_all_dates):
354354
else:
355355
object.__setattr__(self, '_subtyp', 'sparse_series')
356356

357+
def _ixs(self, i, axis=0):
358+
"""
359+
Return the i-th value or values in the SparseSeries by location
360+
361+
Parameters
362+
----------
363+
i : int, slice, or sequence of integers
364+
365+
Returns
366+
-------
367+
value : scalar (int) or Series (slice, sequence)
368+
"""
369+
label = self.index[i]
370+
if isinstance(label, Index):
371+
return self.take(i, axis=axis, convert=True)
372+
else:
373+
return self._get_val_at(i)
374+
357375
def _get_val_at(self, loc):
358376
""" forward to the array """
359377
return self.block.values._get_val_at(loc)
360378

379+
def _slice(self, slobj, axis=0, kind=None):
380+
slobj = self.index._convert_slice_indexer(slobj,
381+
kind=kind or 'getitem')
382+
return self._get_values(slobj)
383+
361384
def __getitem__(self, key):
362385
"""
363386
@@ -382,6 +405,13 @@ def __getitem__(self, key):
382405
new_index = Index(self.index.view(ndarray)[key])
383406
return self._constructor(dataSlice, index=new_index).__finalize__(self)
384407

408+
def _get_values(self, indexer):
409+
try:
410+
return self._constructor(self._data.get_slice(indexer),
411+
fastpath=True).__finalize__(self)
412+
except Exception:
413+
return self[indexer]
414+
385415
def _set_with_engine(self, key, value):
386416
return self.set_value(key, value)
387417

@@ -517,7 +547,8 @@ def copy(self, deep=True):
517547
return self._constructor(new_data, sparse_index=self.sp_index,
518548
fill_value=self.fill_value).__finalize__(self)
519549

520-
def reindex(self, index=None, method=None, copy=True, limit=None):
550+
def reindex(self, index=None, method=None, copy=True, limit=None,
551+
**kwargs):
521552
"""
522553
Conform SparseSeries to new Index
523554

pandas/sparse/tests/test_array.py

+28-16
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,6 @@
1111
import pandas.util.testing as tm
1212

1313

14-
def assert_sp_array_equal(left, right):
15-
assert_almost_equal(left.sp_values, right.sp_values)
16-
assert (left.sp_index.equals(right.sp_index))
17-
if np.isnan(left.fill_value):
18-
assert (np.isnan(right.fill_value))
19-
else:
20-
assert (left.fill_value == right.fill_value)
21-
22-
2314
class TestSparseArray(tm.TestCase):
2415
_multiprocess_can_split_ = True
2516

@@ -29,11 +20,32 @@ def setUp(self):
2920
self.zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0)
3021

3122
def test_get_item(self):
23+
24+
self.assertTrue(np.isnan(self.arr[1]))
25+
self.assertEqual(self.arr[2], 1)
26+
self.assertEqual(self.arr[7], 5)
27+
28+
self.assertEqual(self.zarr[0], 0)
29+
self.assertEqual(self.zarr[2], 1)
30+
self.assertEqual(self.zarr[7], 5)
31+
3232
errmsg = re.compile("bounds")
3333
assertRaisesRegexp(IndexError, errmsg, lambda: self.arr[11])
3434
assertRaisesRegexp(IndexError, errmsg, lambda: self.arr[-11])
3535
self.assertEqual(self.arr[-1], self.arr[len(self.arr) - 1])
3636

37+
def test_take(self):
38+
self.assertTrue(np.isnan(self.arr.take(0)))
39+
self.assertTrue(np.isscalar(self.arr.take(2)))
40+
self.assertEqual(self.arr.take(2), np.take(self.arr_data, 2))
41+
self.assertEqual(self.arr.take(6), np.take(self.arr_data, 6))
42+
43+
tm.assert_sp_array_equal(self.arr.take([2, 3]),
44+
SparseArray(np.take(self.arr_data, [2, 3])))
45+
tm.assert_sp_array_equal(self.arr.take([0, 1, 2]),
46+
SparseArray(np.take(self.arr_data,
47+
[0, 1, 2])))
48+
3749
def test_bad_take(self):
3850
assertRaisesRegexp(IndexError, "bounds", lambda: self.arr.take(11))
3951
self.assertRaises(IndexError, lambda: self.arr.take(-11))
@@ -96,20 +108,20 @@ def _checkit(i):
96108
def test_getslice(self):
97109
result = self.arr[:-3]
98110
exp = SparseArray(self.arr.values[:-3])
99-
assert_sp_array_equal(result, exp)
111+
tm.assert_sp_array_equal(result, exp)
100112

101113
result = self.arr[-4:]
102114
exp = SparseArray(self.arr.values[-4:])
103-
assert_sp_array_equal(result, exp)
115+
tm.assert_sp_array_equal(result, exp)
104116

105117
# two corner cases from Series
106118
result = self.arr[-12:]
107119
exp = SparseArray(self.arr)
108-
assert_sp_array_equal(result, exp)
120+
tm.assert_sp_array_equal(result, exp)
109121

110122
result = self.arr[:-12]
111123
exp = SparseArray(self.arr.values[:0])
112-
assert_sp_array_equal(result, exp)
124+
tm.assert_sp_array_equal(result, exp)
113125

114126
def test_binary_operators(self):
115127
data1 = np.random.randn(20)
@@ -134,11 +146,11 @@ def _check_op(op, first, second):
134146

135147
res2 = op(first, second.values)
136148
tm.assertIsInstance(res2, SparseArray)
137-
assert_sp_array_equal(res, res2)
149+
tm.assert_sp_array_equal(res, res2)
138150

139151
res3 = op(first.values, second)
140152
tm.assertIsInstance(res3, SparseArray)
141-
assert_sp_array_equal(res, res3)
153+
tm.assert_sp_array_equal(res, res3)
142154

143155
res4 = op(first, 4)
144156
tm.assertIsInstance(res4, SparseArray)
@@ -169,7 +181,7 @@ def _check_inplace_op(op):
169181
def test_pickle(self):
170182
def _check_roundtrip(obj):
171183
unpickled = self.round_trip_pickle(obj)
172-
assert_sp_array_equal(unpickled, obj)
184+
tm.assert_sp_array_equal(unpickled, obj)
173185

174186
_check_roundtrip(self.arr)
175187
_check_roundtrip(self.zarr)

pandas/sparse/tests/test_indexing.py

+84
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
# pylint: disable-msg=E1101,W0612
2+
3+
import nose # noqa
4+
import numpy as np
5+
import pandas as pd
6+
import pandas.util.testing as tm
7+
8+
9+
class TestSparseSeriesIndexing(tm.TestCase):
10+
11+
_multiprocess_can_split_ = True
12+
13+
def test_loc(self):
14+
orig = pd.Series([1, np.nan, np.nan, 3, np.nan])
15+
sparse = orig.to_sparse()
16+
17+
self.assertEqual(sparse.loc[0], 1)
18+
self.assertTrue(np.isnan(sparse.loc[1]))
19+
20+
result = sparse.loc[[1, 3, 4]]
21+
exp = orig.loc[[1, 3, 4]].to_sparse()
22+
tm.assert_sp_series_equal(result, exp)
23+
24+
# exceeds the bounds
25+
result = sparse.loc[[1, 3, 4, 5]]
26+
exp = orig.loc[[1, 3, 4, 5]].to_sparse()
27+
tm.assert_sp_series_equal(result, exp)
28+
# padded with NaN
29+
self.assertTrue(np.isnan(result[-1]))
30+
31+
# dense array
32+
result = sparse.loc[orig % 2 == 1]
33+
exp = orig.loc[orig % 2 == 1].to_sparse()
34+
tm.assert_sp_series_equal(result, exp)
35+
36+
# sparse array (actuary it coerces to normal Series)
37+
result = sparse.loc[sparse % 2 == 1]
38+
exp = orig.loc[orig % 2 == 1].to_sparse()
39+
tm.assert_sp_series_equal(result, exp)
40+
41+
def test_loc_index(self):
42+
orig = pd.Series([1, np.nan, np.nan, 3, np.nan], index=list('ABCDE'))
43+
sparse = orig.to_sparse()
44+
45+
self.assertEqual(sparse.loc['A'], 1)
46+
self.assertTrue(np.isnan(sparse.loc['B']))
47+
48+
result = sparse.loc[['A', 'C', 'D']]
49+
exp = orig.loc[['A', 'C', 'D']].to_sparse()
50+
tm.assert_sp_series_equal(result, exp)
51+
52+
# dense array
53+
result = sparse.loc[orig % 2 == 1]
54+
exp = orig.loc[orig % 2 == 1].to_sparse()
55+
tm.assert_sp_series_equal(result, exp)
56+
57+
# sparse array (actuary it coerces to normal Series)
58+
result = sparse.loc[sparse % 2 == 1]
59+
exp = orig.loc[orig % 2 == 1].to_sparse()
60+
tm.assert_sp_series_equal(result, exp)
61+
62+
def test_loc_slice(self):
63+
orig = pd.Series([1, np.nan, np.nan, 3, np.nan])
64+
sparse = orig.to_sparse()
65+
tm.assert_sp_series_equal(sparse.loc[2:], orig.loc[2:].to_sparse())
66+
67+
def test_iloc(self):
68+
orig = pd.Series([1, np.nan, np.nan, 3, np.nan])
69+
sparse = orig.to_sparse()
70+
71+
self.assertEqual(sparse.iloc[3], 3)
72+
self.assertTrue(np.isnan(sparse.iloc[2]))
73+
74+
result = sparse.iloc[[1, 3, 4]]
75+
exp = orig.iloc[[1, 3, 4]].to_sparse()
76+
tm.assert_sp_series_equal(result, exp)
77+
78+
with tm.assertRaises(IndexError):
79+
sparse.iloc[[1, 3, 5]]
80+
81+
def test_iloc_slice(self):
82+
orig = pd.Series([1, np.nan, np.nan, 3, np.nan])
83+
sparse = orig.to_sparse()
84+
tm.assert_sp_series_equal(sparse.iloc[2:], orig.iloc[2:].to_sparse())

0 commit comments

Comments
 (0)