Skip to content

BUG: SparseSeries.reindex with fill_value #12831

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.18.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,9 @@ These changes conform sparse handling to return the correct types and work to ma
- Bug in ``SparseDataFrame.loc[]``, ``.iloc[]`` may results in dense ``Series``, rather than ``SparseSeries`` (:issue:`12787`)
- Bug in ``SparseSeries.__repr__`` raises ``TypeError`` when it is longer than ``max_rows`` (:issue:`10560`)
- Bug in ``SparseSeries.shape`` ignores ``fill_value`` (:issue:`10452`)
- Bug in ``SparseSeries.reindex`` incorrectly handle ``fill_value`` (:issue:`12797`)
- Bug in ``SparseArray.to_dense()`` does not preserve ``dtype`` (:issue:`10648`)
- Bug in ``SparseArray.to_dense()`` incorrectly handle ``fill_value`` (:issue:`12797`)
- ``SparseArray.take`` now returns scalar for scalar input, ``SparseArray`` for others. Also now it handles negative indexer as the same rule as ``Index`` (:issue:`10560`, :issue:`12796`)

.. ipython:: python
Expand Down
14 changes: 4 additions & 10 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -2385,7 +2385,7 @@ def make_block_same_class(self, values, placement, sparse_index=None,
""" return a new block """
if dtype is None:
dtype = self.dtype
if fill_value is None:
if fill_value is None and not isinstance(values, SparseArray):
fill_value = self.values.fill_value

# if not isinstance(values, SparseArray) and values.ndim != self.ndim:
Expand Down Expand Up @@ -2427,11 +2427,9 @@ def fillna(self, value, limit=None, inplace=False, downcast=None,
if limit is not None:
raise NotImplementedError("specifying a limit for 'fillna' has "
"not been implemented yet")
if issubclass(self.dtype.type, np.floating):
value = float(value)
values = self.values if inplace else self.values.copy()
return [self.make_block_same_class(values=values.get_values(value),
fill_value=value,
values = values.fillna(value, downcast=downcast)
return [self.make_block_same_class(values=values,
placement=self.mgr_locs)]

def shift(self, periods, axis=0, mgr=None):
Expand Down Expand Up @@ -3843,11 +3841,7 @@ def reindex(self, new_axis, indexer=None, method=None, fill_value=None,
indexer = self.items.get_indexer_for(new_axis)

if fill_value is None:
# FIXME: is fill_value used correctly in sparse blocks?
if not self._block.is_sparse:
fill_value = self._block.fill_value
else:
fill_value = np.nan
fill_value = np.nan

new_values = algos.take_1d(values, indexer, fill_value=fill_value)

Expand Down
2 changes: 1 addition & 1 deletion pandas/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3036,7 +3036,7 @@ def duplicated(self, keep='first'):

Returns
-------
filled : Index
filled : %(klass)s
"""

@Appender(_index_shared_docs['fillna'])
Expand Down
27 changes: 18 additions & 9 deletions pandas/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,15 +271,7 @@ def to_dense(self, fill=None):
"""
Convert SparseSeries to (dense) Series
"""
values = self.values

# fill the nans
if fill is None:
fill = self.fill_value
if not com.isnull(fill):
values[com.isnull(values)] = fill

return values
return self.values

def __iter__(self):
for i in range(len(self)):
Expand Down Expand Up @@ -444,6 +436,23 @@ def _valid_sp_values(self):
mask = np.isfinite(sp_vals)
return sp_vals[mask]

@Appender(_index_shared_docs['fillna'] % _sparray_doc_kwargs)
def fillna(self, value, downcast=None):
if downcast is not None:
raise NotImplementedError

if issubclass(self.dtype.type, np.floating):
value = float(value)

if self._null_fill_value:
return self._simple_new(self.sp_values, self.sp_index,
fill_value=value)
else:
new_values = self.sp_values.copy()
new_values[com.isnull(new_values)] = value
return self._simple_new(new_values, self.sp_index,
fill_value=self.fill_value)

def sum(self, axis=None, dtype=None, out=None):
"""
Sum of non-NA/null values
Expand Down
33 changes: 7 additions & 26 deletions pandas/sparse/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,23 +119,7 @@ def __init__(self, data=None, index=None, columns=None, default_kind=None,

@property
def _constructor(self):
def wrapper(data=None, index=None, columns=None,
default_fill_value=None, kind=None, fill_value=None,
copy=False):
result = SparseDataFrame(data, index=index, columns=columns,
default_fill_value=fill_value,
default_kind=kind, copy=copy)

# fill if requested
if fill_value is not None and not isnull(fill_value):
result.fillna(fill_value, inplace=True)

# set the default_fill_value
# if default_fill_value is not None:
# result._default_fill_value = default_fill_value
return result

return wrapper
return SparseDataFrame

_constructor_sliced = SparseSeries

Expand Down Expand Up @@ -452,8 +436,8 @@ def _combine_frame(self, other, func, fill_value=None, level=None):

return self._constructor(data=new_data, index=new_index,
columns=new_columns,
default_fill_value=new_fill_value,
fill_value=new_fill_value).__finalize__(self)
default_fill_value=new_fill_value
).__finalize__(self)

def _combine_match_index(self, other, func, level=None, fill_value=None):
new_data = {}
Expand Down Expand Up @@ -483,8 +467,7 @@ def _combine_match_index(self, other, func, level=None, fill_value=None):

return self._constructor(
new_data, index=new_index, columns=self.columns,
default_fill_value=fill_value,
fill_value=self.default_fill_value).__finalize__(self)
default_fill_value=fill_value).__finalize__(self)

def _combine_match_columns(self, other, func, level=None, fill_value=None):
# patched version of DataFrame._combine_match_columns to account for
Expand All @@ -510,8 +493,7 @@ def _combine_match_columns(self, other, func, level=None, fill_value=None):

return self._constructor(
new_data, index=self.index, columns=union,
default_fill_value=self.default_fill_value,
fill_value=self.default_fill_value).__finalize__(self)
default_fill_value=self.default_fill_value).__finalize__(self)

def _combine_const(self, other, func):
new_data = {}
Expand All @@ -520,8 +502,7 @@ def _combine_const(self, other, func):

return self._constructor(
data=new_data, index=self.index, columns=self.columns,
default_fill_value=self.default_fill_value,
fill_value=self.default_fill_value).__finalize__(self)
default_fill_value=self.default_fill_value).__finalize__(self)

def _reindex_index(self, index, method, copy, level, fill_value=np.nan,
limit=None, takeable=False):
Expand Down Expand Up @@ -715,7 +696,7 @@ def apply(self, func, axis=0, broadcast=False, reduce=False):
return self._constructor(
new_series, index=self.index, columns=self.columns,
default_fill_value=self._default_fill_value,
kind=self._default_kind).__finalize__(self)
default_kind=self._default_kind).__finalize__(self)
else:
if not broadcast:
return self._apply_standard(func, axis, reduce=reduce)
Expand Down
76 changes: 76 additions & 0 deletions pandas/sparse/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,28 @@ def test_values_asarray(self):
assert_almost_equal(self.arr.to_dense(), self.arr_data)
assert_almost_equal(self.arr.sp_values, np.asarray(self.arr))

def test_to_dense(self):
vals = np.array([1, np.nan, np.nan, 3, np.nan])
res = SparseArray(vals).to_dense()
tm.assert_numpy_array_equal(res, vals)

res = SparseArray(vals, fill_value=0).to_dense()
tm.assert_numpy_array_equal(res, vals)

vals = np.array([1, np.nan, 0, 3, 0])
res = SparseArray(vals).to_dense()
tm.assert_numpy_array_equal(res, vals)

res = SparseArray(vals, fill_value=0).to_dense()
tm.assert_numpy_array_equal(res, vals)

vals = np.array([np.nan, np.nan, np.nan, np.nan, np.nan])
res = SparseArray(vals).to_dense()
tm.assert_numpy_array_equal(res, vals)

res = SparseArray(vals, fill_value=0).to_dense()
tm.assert_numpy_array_equal(res, vals)

def test_getitem(self):
def _checkit(i):
assert_almost_equal(self.arr[i], self.arr.values[i])
Expand Down Expand Up @@ -466,6 +488,60 @@ def test_generator_warnings(self):
pass
assert len(w) == 0

def test_fillna(self):
s = SparseArray([1, np.nan, np.nan, 3, np.nan])
res = s.fillna(-1)
exp = SparseArray([1, -1, -1, 3, -1], fill_value=-1)
tm.assert_sp_array_equal(res, exp)

s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0)
res = s.fillna(-1)
exp = SparseArray([1, -1, -1, 3, -1], fill_value=0)
tm.assert_sp_array_equal(res, exp)

s = SparseArray([1, np.nan, 0, 3, 0])
res = s.fillna(-1)
exp = SparseArray([1, -1, 0, 3, 0], fill_value=-1)
tm.assert_sp_array_equal(res, exp)

s = SparseArray([1, np.nan, 0, 3, 0], fill_value=0)
res = s.fillna(-1)
exp = SparseArray([1, -1, 0, 3, 0], fill_value=0)
tm.assert_sp_array_equal(res, exp)

s = SparseArray([np.nan, np.nan, np.nan, np.nan])
res = s.fillna(-1)
exp = SparseArray([-1, -1, -1, -1], fill_value=-1)
tm.assert_sp_array_equal(res, exp)

s = SparseArray([np.nan, np.nan, np.nan, np.nan], fill_value=0)
res = s.fillna(-1)
exp = SparseArray([-1, -1, -1, -1], fill_value=0)
tm.assert_sp_array_equal(res, exp)

s = SparseArray([0, 0, 0, 0])
res = s.fillna(-1)
exp = SparseArray([0, 0, 0, 0], fill_value=-1)
tm.assert_sp_array_equal(res, exp)

s = SparseArray([0, 0, 0, 0], fill_value=0)
res = s.fillna(-1)
exp = SparseArray([0, 0, 0, 0], fill_value=0)
tm.assert_sp_array_equal(res, exp)

def test_fillna_overlap(self):
s = SparseArray([1, np.nan, np.nan, 3, np.nan])
# filling with existing value doesn't replace existing value with
# fill_value, i.e. existing 3 remains in sp_values
res = s.fillna(3)
exp = np.array([1, 3, 3, 3, 3])
tm.assert_numpy_array_equal(res.to_dense(), exp)

s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0)
res = s.fillna(3)
exp = SparseArray([1, 3, 3, 3, 3], fill_value=0)
tm.assert_sp_array_equal(res, exp)


if __name__ == '__main__':
import nose
Expand Down
Loading