Skip to content

Commit 4873e06

Browse files
committed
BUG: Sparse concat may fill fill_value with NaN
1 parent 92b5322 commit 4873e06

File tree

10 files changed

+422
-121
lines changed

10 files changed

+422
-121
lines changed

doc/source/whatsnew/v0.18.1.txt

+3
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ These changes conform sparse handling to return the correct types and work to ma
111111
s.take([1, 2, 3])
112112

113113
- Bug in ``SparseSeries[]`` indexing with ``Ellipsis`` raises ``KeyError`` (:issue:`9467`)
114+
- Bug in ``SparseArray[]`` indexing with tuples are not handled properly (:issue:`12966`)
114115
- Bug in ``SparseSeries.loc[]`` with list-like input raises ``TypeError`` (:issue:`10560`)
115116
- Bug in ``SparseSeries.iloc[]`` with scalar input may raise ``IndexError`` (:issue:`10560`)
116117
- Bug in ``SparseSeries.loc[]``, ``.iloc[]`` with ``slice`` returns ``SparseArray``, rather than ``SparseSeries`` (:issue:`10560`)
@@ -126,6 +127,8 @@ These changes conform sparse handling to return the correct types and work to ma
126127
- Bug in ``SparseArray.to_dense()`` does not preserve ``dtype`` (:issue:`10648`)
127128
- Bug in ``SparseArray.to_dense()`` incorrectly handle ``fill_value`` (:issue:`12797`)
128129
- Bug in ``pd.concat()`` of ``SparseSeries`` results in dense (:issue:`10536`)
130+
- Bug in ``pd.concat()`` of ``SparseDataFrame`` incorrectly handle ``fill_value`` (:issue:`9765`)
131+
- Bug in ``pd.concat()`` of ``SparseDataFrame`` may raise ``AttributeError`` (:issue:`12174`)
129132
- Bug in ``SparseArray.shift()`` may raise ``NameError`` or ``TypeError`` (:issue:`12908`)
130133

131134
.. _whatsnew_0181.api:

pandas/core/internals.py

+7
Original file line numberDiff line numberDiff line change
@@ -4872,6 +4872,11 @@ def is_null(self):
48724872
values = self.block.values
48734873
if self.block.is_categorical:
48744874
values_flat = values.categories
4875+
elif self.block.is_sparse:
4876+
# fill_value is not NaN and have holes
4877+
if not values._null_fill_value and values.sp_index.ngaps > 0:
4878+
return False
4879+
values_flat = values.ravel(order='K')
48754880
else:
48764881
values_flat = values.ravel(order='K')
48774882
total_len = values_flat.shape[0]
@@ -4904,6 +4909,8 @@ def get_reindexed_values(self, empty_dtype, upcasted_na):
49044909
pass
49054910
elif getattr(self.block, 'is_categorical', False):
49064911
pass
4912+
elif getattr(self.block, 'is_sparse', False):
4913+
pass
49074914
else:
49084915
missing_arr = np.empty(self.shape, dtype=empty_dtype)
49094916
missing_arr.fill(fill_value)

pandas/sparse/array.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -278,14 +278,18 @@ def __getitem__(self, key):
278278
"""
279279
if com.is_integer(key):
280280
return self._get_val_at(key)
281+
elif isinstance(key, tuple):
282+
data_slice = self.values[key]
281283
else:
282284
if isinstance(key, SparseArray):
283285
key = np.asarray(key)
286+
284287
if hasattr(key, '__len__') and len(self) != len(key):
285288
return self.take(key)
286289
else:
287290
data_slice = self.values[key]
288-
return self._constructor(data_slice)
291+
292+
return self._constructor(data_slice)
289293

290294
def __getslice__(self, i, j):
291295
if i < 0:

pandas/sparse/series.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -115,9 +115,12 @@ def __init__(self, data=None, index=None, sparse_index=None, kind='block',
115115
if fastpath:
116116

117117
# data is an ndarray, index is defined
118-
data = SingleBlockManager(data, index, fastpath=True)
118+
119+
if not isinstance(data, SingleBlockManager):
120+
data = SingleBlockManager(data, index, fastpath=True)
119121
if copy:
120122
data = data.copy()
123+
121124
else:
122125

123126
if data is None:

pandas/sparse/tests/test_array.py

+20
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,26 @@ def test_getslice(self):
347347
exp = SparseArray(self.arr.values[:0])
348348
tm.assert_sp_array_equal(result, exp)
349349

350+
def test_getslice_tuple(self):
351+
dense = np.array([np.nan, 0, 3, 4, 0, 5, np.nan, np.nan, 0])
352+
353+
sparse = SparseArray(dense)
354+
res = sparse[4:, ]
355+
exp = SparseArray(dense[4:, ])
356+
tm.assert_sp_array_equal(res, exp)
357+
358+
sparse = SparseArray(dense, fill_value=0)
359+
res = sparse[4:, ]
360+
exp = SparseArray(dense[4:, ], fill_value=0)
361+
tm.assert_sp_array_equal(res, exp)
362+
363+
with tm.assertRaises(IndexError):
364+
sparse[4:, :]
365+
366+
with tm.assertRaises(IndexError):
367+
# check numpy compat
368+
dense[4:, :]
369+
350370
def test_binary_operators(self):
351371
data1 = np.random.randn(20)
352372
data2 = np.random.randn(20)

0 commit comments

Comments
 (0)