Skip to content

Commit a23a136

Browse files
sinhrksjreback
authored andcommitted
BUG: SparseSeries.reindex with fill_value
closes #12797 Author: sinhrks <[email protected]> Closes #12831 from sinhrks/sparse_reindex and squashes the following commits: bdd8e16 [sinhrks] BUG: SparseSeries.reindex with fill_value
1 parent 979e31d commit a23a136

File tree

10 files changed

+332
-190
lines changed

10 files changed

+332
-190
lines changed

doc/source/whatsnew/v0.18.1.txt

+2
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,9 @@ These changes conform sparse handling to return the correct types and work to ma
8080
- Bug in ``SparseDataFrame.loc[]``, ``.iloc[]`` may results in dense ``Series``, rather than ``SparseSeries`` (:issue:`12787`)
8181
- Bug in ``SparseSeries.__repr__`` raises ``TypeError`` when it is longer than ``max_rows`` (:issue:`10560`)
8282
- Bug in ``SparseSeries.shape`` ignores ``fill_value`` (:issue:`10452`)
83+
- Bug in ``SparseSeries.reindex`` incorrectly handle ``fill_value`` (:issue:`12797`)
8384
- Bug in ``SparseArray.to_dense()`` does not preserve ``dtype`` (:issue:`10648`)
85+
- Bug in ``SparseArray.to_dense()`` incorrectly handle ``fill_value`` (:issue:`12797`)
8486
- ``SparseArray.take`` now returns scalar for scalar input, ``SparseArray`` for others. Also now it handles negative indexer as the same rule as ``Index`` (:issue:`10560`, :issue:`12796`)
8587

8688
.. ipython:: python

pandas/core/internals.py

+4-10
Original file line numberDiff line numberDiff line change
@@ -2385,7 +2385,7 @@ def make_block_same_class(self, values, placement, sparse_index=None,
23852385
""" return a new block """
23862386
if dtype is None:
23872387
dtype = self.dtype
2388-
if fill_value is None:
2388+
if fill_value is None and not isinstance(values, SparseArray):
23892389
fill_value = self.values.fill_value
23902390

23912391
# if not isinstance(values, SparseArray) and values.ndim != self.ndim:
@@ -2427,11 +2427,9 @@ def fillna(self, value, limit=None, inplace=False, downcast=None,
24272427
if limit is not None:
24282428
raise NotImplementedError("specifying a limit for 'fillna' has "
24292429
"not been implemented yet")
2430-
if issubclass(self.dtype.type, np.floating):
2431-
value = float(value)
24322430
values = self.values if inplace else self.values.copy()
2433-
return [self.make_block_same_class(values=values.get_values(value),
2434-
fill_value=value,
2431+
values = values.fillna(value, downcast=downcast)
2432+
return [self.make_block_same_class(values=values,
24352433
placement=self.mgr_locs)]
24362434

24372435
def shift(self, periods, axis=0, mgr=None):
@@ -3843,11 +3841,7 @@ def reindex(self, new_axis, indexer=None, method=None, fill_value=None,
38433841
indexer = self.items.get_indexer_for(new_axis)
38443842

38453843
if fill_value is None:
3846-
# FIXME: is fill_value used correctly in sparse blocks?
3847-
if not self._block.is_sparse:
3848-
fill_value = self._block.fill_value
3849-
else:
3850-
fill_value = np.nan
3844+
fill_value = np.nan
38513845

38523846
new_values = algos.take_1d(values, indexer, fill_value=fill_value)
38533847

pandas/indexes/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3036,7 +3036,7 @@ def duplicated(self, keep='first'):
30363036
30373037
Returns
30383038
-------
3039-
filled : Index
3039+
filled : %(klass)s
30403040
"""
30413041

30423042
@Appender(_index_shared_docs['fillna'])

pandas/sparse/array.py

+18-9
Original file line numberDiff line numberDiff line change
@@ -271,15 +271,7 @@ def to_dense(self, fill=None):
271271
"""
272272
Convert SparseSeries to (dense) Series
273273
"""
274-
values = self.values
275-
276-
# fill the nans
277-
if fill is None:
278-
fill = self.fill_value
279-
if not com.isnull(fill):
280-
values[com.isnull(values)] = fill
281-
282-
return values
274+
return self.values
283275

284276
def __iter__(self):
285277
for i in range(len(self)):
@@ -444,6 +436,23 @@ def _valid_sp_values(self):
444436
mask = np.isfinite(sp_vals)
445437
return sp_vals[mask]
446438

439+
@Appender(_index_shared_docs['fillna'] % _sparray_doc_kwargs)
440+
def fillna(self, value, downcast=None):
441+
if downcast is not None:
442+
raise NotImplementedError
443+
444+
if issubclass(self.dtype.type, np.floating):
445+
value = float(value)
446+
447+
if self._null_fill_value:
448+
return self._simple_new(self.sp_values, self.sp_index,
449+
fill_value=value)
450+
else:
451+
new_values = self.sp_values.copy()
452+
new_values[com.isnull(new_values)] = value
453+
return self._simple_new(new_values, self.sp_index,
454+
fill_value=self.fill_value)
455+
447456
def sum(self, axis=None, dtype=None, out=None):
448457
"""
449458
Sum of non-NA/null values

pandas/sparse/frame.py

+7-26
Original file line numberDiff line numberDiff line change
@@ -119,23 +119,7 @@ def __init__(self, data=None, index=None, columns=None, default_kind=None,
119119

120120
@property
121121
def _constructor(self):
122-
def wrapper(data=None, index=None, columns=None,
123-
default_fill_value=None, kind=None, fill_value=None,
124-
copy=False):
125-
result = SparseDataFrame(data, index=index, columns=columns,
126-
default_fill_value=fill_value,
127-
default_kind=kind, copy=copy)
128-
129-
# fill if requested
130-
if fill_value is not None and not isnull(fill_value):
131-
result.fillna(fill_value, inplace=True)
132-
133-
# set the default_fill_value
134-
# if default_fill_value is not None:
135-
# result._default_fill_value = default_fill_value
136-
return result
137-
138-
return wrapper
122+
return SparseDataFrame
139123

140124
_constructor_sliced = SparseSeries
141125

@@ -452,8 +436,8 @@ def _combine_frame(self, other, func, fill_value=None, level=None):
452436

453437
return self._constructor(data=new_data, index=new_index,
454438
columns=new_columns,
455-
default_fill_value=new_fill_value,
456-
fill_value=new_fill_value).__finalize__(self)
439+
default_fill_value=new_fill_value
440+
).__finalize__(self)
457441

458442
def _combine_match_index(self, other, func, level=None, fill_value=None):
459443
new_data = {}
@@ -483,8 +467,7 @@ def _combine_match_index(self, other, func, level=None, fill_value=None):
483467

484468
return self._constructor(
485469
new_data, index=new_index, columns=self.columns,
486-
default_fill_value=fill_value,
487-
fill_value=self.default_fill_value).__finalize__(self)
470+
default_fill_value=fill_value).__finalize__(self)
488471

489472
def _combine_match_columns(self, other, func, level=None, fill_value=None):
490473
# patched version of DataFrame._combine_match_columns to account for
@@ -510,8 +493,7 @@ def _combine_match_columns(self, other, func, level=None, fill_value=None):
510493

511494
return self._constructor(
512495
new_data, index=self.index, columns=union,
513-
default_fill_value=self.default_fill_value,
514-
fill_value=self.default_fill_value).__finalize__(self)
496+
default_fill_value=self.default_fill_value).__finalize__(self)
515497

516498
def _combine_const(self, other, func):
517499
new_data = {}
@@ -520,8 +502,7 @@ def _combine_const(self, other, func):
520502

521503
return self._constructor(
522504
data=new_data, index=self.index, columns=self.columns,
523-
default_fill_value=self.default_fill_value,
524-
fill_value=self.default_fill_value).__finalize__(self)
505+
default_fill_value=self.default_fill_value).__finalize__(self)
525506

526507
def _reindex_index(self, index, method, copy, level, fill_value=np.nan,
527508
limit=None, takeable=False):
@@ -715,7 +696,7 @@ def apply(self, func, axis=0, broadcast=False, reduce=False):
715696
return self._constructor(
716697
new_series, index=self.index, columns=self.columns,
717698
default_fill_value=self._default_fill_value,
718-
kind=self._default_kind).__finalize__(self)
699+
default_kind=self._default_kind).__finalize__(self)
719700
else:
720701
if not broadcast:
721702
return self._apply_standard(func, axis, reduce=reduce)

pandas/sparse/tests/test_array.py

+76
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,28 @@ def test_values_asarray(self):
366366
assert_almost_equal(self.arr.to_dense(), self.arr_data)
367367
assert_almost_equal(self.arr.sp_values, np.asarray(self.arr))
368368

369+
def test_to_dense(self):
370+
vals = np.array([1, np.nan, np.nan, 3, np.nan])
371+
res = SparseArray(vals).to_dense()
372+
tm.assert_numpy_array_equal(res, vals)
373+
374+
res = SparseArray(vals, fill_value=0).to_dense()
375+
tm.assert_numpy_array_equal(res, vals)
376+
377+
vals = np.array([1, np.nan, 0, 3, 0])
378+
res = SparseArray(vals).to_dense()
379+
tm.assert_numpy_array_equal(res, vals)
380+
381+
res = SparseArray(vals, fill_value=0).to_dense()
382+
tm.assert_numpy_array_equal(res, vals)
383+
384+
vals = np.array([np.nan, np.nan, np.nan, np.nan, np.nan])
385+
res = SparseArray(vals).to_dense()
386+
tm.assert_numpy_array_equal(res, vals)
387+
388+
res = SparseArray(vals, fill_value=0).to_dense()
389+
tm.assert_numpy_array_equal(res, vals)
390+
369391
def test_getitem(self):
370392
def _checkit(i):
371393
assert_almost_equal(self.arr[i], self.arr.values[i])
@@ -466,6 +488,60 @@ def test_generator_warnings(self):
466488
pass
467489
assert len(w) == 0
468490

491+
def test_fillna(self):
492+
s = SparseArray([1, np.nan, np.nan, 3, np.nan])
493+
res = s.fillna(-1)
494+
exp = SparseArray([1, -1, -1, 3, -1], fill_value=-1)
495+
tm.assert_sp_array_equal(res, exp)
496+
497+
s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0)
498+
res = s.fillna(-1)
499+
exp = SparseArray([1, -1, -1, 3, -1], fill_value=0)
500+
tm.assert_sp_array_equal(res, exp)
501+
502+
s = SparseArray([1, np.nan, 0, 3, 0])
503+
res = s.fillna(-1)
504+
exp = SparseArray([1, -1, 0, 3, 0], fill_value=-1)
505+
tm.assert_sp_array_equal(res, exp)
506+
507+
s = SparseArray([1, np.nan, 0, 3, 0], fill_value=0)
508+
res = s.fillna(-1)
509+
exp = SparseArray([1, -1, 0, 3, 0], fill_value=0)
510+
tm.assert_sp_array_equal(res, exp)
511+
512+
s = SparseArray([np.nan, np.nan, np.nan, np.nan])
513+
res = s.fillna(-1)
514+
exp = SparseArray([-1, -1, -1, -1], fill_value=-1)
515+
tm.assert_sp_array_equal(res, exp)
516+
517+
s = SparseArray([np.nan, np.nan, np.nan, np.nan], fill_value=0)
518+
res = s.fillna(-1)
519+
exp = SparseArray([-1, -1, -1, -1], fill_value=0)
520+
tm.assert_sp_array_equal(res, exp)
521+
522+
s = SparseArray([0, 0, 0, 0])
523+
res = s.fillna(-1)
524+
exp = SparseArray([0, 0, 0, 0], fill_value=-1)
525+
tm.assert_sp_array_equal(res, exp)
526+
527+
s = SparseArray([0, 0, 0, 0], fill_value=0)
528+
res = s.fillna(-1)
529+
exp = SparseArray([0, 0, 0, 0], fill_value=0)
530+
tm.assert_sp_array_equal(res, exp)
531+
532+
def test_fillna_overlap(self):
533+
s = SparseArray([1, np.nan, np.nan, 3, np.nan])
534+
# filling with existing value doesn't replace existing value with
535+
# fill_value, i.e. existing 3 remains in sp_values
536+
res = s.fillna(3)
537+
exp = np.array([1, 3, 3, 3, 3])
538+
tm.assert_numpy_array_equal(res.to_dense(), exp)
539+
540+
s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0)
541+
res = s.fillna(3)
542+
exp = SparseArray([1, 3, 3, 3, 3], fill_value=0)
543+
tm.assert_sp_array_equal(res, exp)
544+
469545

470546
if __name__ == '__main__':
471547
import nose

0 commit comments

Comments
 (0)