pandas-dev · kernc · Jun 15, 2017 · Oct 3, 2017 · Jun 15, 2017 · Jul 12, 2018
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -175,6 +175,7 @@ Other Enhancements
   (:issue:`21627`)
 - New method :meth:`HDFStore.walk` will recursively walk the group hierarchy of an HDF5 file (:issue:`10932`)
 - :func:`read_html` copies cell data across ``colspan`` and ``rowspan``, and it treats all-``th`` table rows as headers if ``header`` kwarg is not given and there is no ``thead`` (:issue:`17054`)
+- :class:`SparseDataFrame` and :class:`SparseSeries` support value assignment (:issue:`21818`)
 - :meth:`Series.nlargest`, :meth:`Series.nsmallest`, :meth:`DataFrame.nlargest`, and :meth:`DataFrame.nsmallest` now accept the value ``"all"`` for the ``keep`` argument. This keeps all ties for the nth largest/smallest value (:issue:`16818`)
 - :class:`IntervalIndex` has gained the :meth:`~IntervalIndex.set_closed` method to change the existing ``closed`` value (:issue:`21670`)
 - :func:`~DataFrame.to_csv`, :func:`~Series.to_csv`, :func:`~DataFrame.to_json`, and :func:`~Series.to_json` now support ``compression='infer'`` to infer compression based on filename extension (:issue:`15008`).

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -785,11 +785,9 @@ def iterrows(self):
         iteritems : Iterate over (column name, Series) pairs.
 
         """
-        columns = self.columns
-        klass = self._constructor_sliced
-        for k, v in zip(self.index, self.values):
-            s = klass(v, index=columns, name=k)
-            yield k, s
+        iloc = self.iloc
+        for i, k in enumerate(self.index):
+            yield k, iloc[i]
 
     def itertuples(self, index=True, name="Pandas"):
         """
@@ -2550,9 +2548,7 @@ def set_value(self, index, col, value, takeable=False):
 
         Returns
         -------
-        frame : DataFrame
-            If label pair is contained, will be reference to calling DataFrame,
-            otherwise a new object
+        self : DataFrame
         """
         warnings.warn("set_value is deprecated and will be removed "
                       "in a future release. Please use "
@@ -2765,7 +2761,7 @@ def _getitem_multilevel(self, key):
             return self._get_item_cache(key)
 
     def _getitem_frame(self, key):
-        if key.values.size and not is_bool_dtype(key.values):
+        if key.size and not key.dtypes.map(is_bool_dtype).all():
             raise ValueError('Must pass DataFrame with boolean values only')
         return self.where(key)
 
@@ -3153,7 +3149,7 @@ def _setitem_frame(self, key, value):
                 )
             key = self._constructor(key, **self._construct_axes_dict())
 
-        if key.values.size and not is_bool_dtype(key.values):
+        if key.size and not key.dtypes.map(is_bool_dtype).all():
             raise TypeError(
                 'Must pass DataFrame or 2-d ndarray with boolean values only'
             )

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -921,6 +921,9 @@ def _is_empty_indexer(indexer):
         if _is_empty_indexer(indexer):
             pass
 
+        elif is_sparse(values):
+            values = values.set_values(indexer, value)
+
         # setting a single element for each dim and with a rhs that could
         # be say a list
         # GH 6043
@@ -3154,6 +3157,17 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
         return self.make_block_same_class(values=values,
                                           placement=self.mgr_locs)
 
+    def _can_hold_element(self, element):
+        return np.can_cast(np.asarray(element).dtype, self.sp_values.dtype)
+
+    def _try_coerce_result(self, result):
+        if (isinstance(result, np.ndarray) and
+                np.ndim(result) == 1 and
+                not is_sparse(result)):
+            result = SparseArray(result, kind=self.kind,
+                                 fill_value=self.fill_value)
+        return result
+
     def __len__(self):
         try:
             return self.sp_index.length
@@ -3246,6 +3260,63 @@ def sparse_reindex(self, new_index):
         return self.make_block_same_class(values, sparse_index=new_index,
                                           placement=self.mgr_locs)
 
+    def where(self, other, cond, align=True, errors='raise',
+              try_cast=False, axis=0, transpose=False, mgr=None):
+        """
+        evaluate the block; return result block(s) from the result
+
+        Parameters
+        ----------
+        other : a ndarray/object
+        cond  : the condition to respect
+        align : boolean, perform alignment on other/cond
+        errors : str, {'raise', 'ignore'}, default 'raise'
+            - ``raise`` : allow exceptions to be raised
+            - ``ignore`` : suppress exceptions. On error return original object
+
+        axis : int
+        transpose : boolean
+            Set to True if self is stored with axes reversed
+
+        Returns
+        -------
+        a new sparse block(s), the result of the func
+        """
+        cond = getattr(cond, 'values', cond)
+        # For SparseBlock, self.values is always 1D.
+        # If cond was a frame, its 2D values would incorrectly broadcast
+        # later on.
+        if self.values.ndim == 1 and any(ax == 1 for ax in cond.shape):
+            cond = cond.ravel()
+
+        return super(self, SparseBlock).where(
+            other, cond, align=align, errors=errors, try_cast=try_cast,
+            axis=axis, transpose=transpose, mgr=mgr)
+
+    def putmask(self, mask, new, align=True, inplace=False, axis=0,
+                transpose=False, mgr=None):
+        """
+        putmask the data to the block; we must be a single block and not
+        generate other blocks
+
+        return the resulting block
+
+        Parameters
+        ----------
+        mask  : the condition to respect
+        new : a ndarray/object
+        align : boolean, perform alignment on other/cond, default is True
+        inplace : perform inplace modification, default is False
+
+        Returns
+        -------
+        a new block, the result of the putmask
+        """
+        _, _, new, _ = self._try_coerce_args(self.values, new)
+        indexer = mask.to_dense().values.ravel().nonzero()[0]
+        block = self.setitem(indexer, new)
+        return [block]
+
 
 # -----------------------------------------------------------------
 # Constructor Helpers

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -1071,9 +1071,7 @@ def set_value(self, label, value, takeable=False):
 
         Returns
         -------
-        series : Series
-            If label is contained, will be reference to calling Series,
-            otherwise a new object
+        self : Series
         """
         warnings.warn("set_value is deprecated and will be removed "
                       "in a future release. Please use "

diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py
@@ -37,6 +37,7 @@
 import pandas.core.algorithms as algos
 import pandas.core.ops as ops
 import pandas.io.formats.printing as printing
+from pandas.errors import PerformanceWarning
 from pandas.util._decorators import Appender
 from pandas.core.indexes.base import _index_shared_docs
 
@@ -369,6 +370,53 @@ def get_values(self, fill=None):
         """ return a dense representation """
         return self.to_dense(fill=fill)
 
+    def set_values(self, indexer, value):
+        """
+        Return new SparseArray with indexed values set to `value`.
+
+        Returns
+        -------
+        SparseArray
+            A new sparse array with indexer positions filled with value.
+        """
+        # If indexer is not a single int position, easiest to handle via dense
+        if not is_scalar(indexer):
+            warnings.warn(
+                'Setting SparseSeries/Array values is inefficient when '
+                'indexing with multiple keys because the whole series '
+                'is made dense interim.',
+                PerformanceWarning, stacklevel=2)
+
+            values = self.to_dense()
+            values[indexer] = value
+            return SparseArray(values, kind=self.kind,
+                               fill_value=self.fill_value)
+
+        # If label already in sparse index, just switch the value on a copy
+        idx = self.sp_index.lookup(indexer)
+        if idx != -1:
+            self.sp_values[idx] = value
+            return self
+
+        warnings.warn(
+            'Setting new SparseSeries values is inefficient '
+            '(a copy of data is made).', PerformanceWarning, stacklevel=2)
+
+        # Otherwise, construct a new array, and insert the new value in the
+        # correct position
+        indices = self.sp_index.to_int_index().indices
+        pos = np.searchsorted(indices, indexer)
+
+        indices = np.insert(indices, pos, indexer)
+        sp_values = np.insert(self.sp_values, pos, value)
+
+        # Length can be increased when adding a new value into index
+        length = max(self.sp_index.length, indexer + 1)
+        sp_index = _make_index(length, indices, self.kind)
+
+        return SparseArray(sp_values, sparse_index=sp_index,
+                           fill_value=self.fill_value)
+
     def to_dense(self, fill=None):
         """
         Convert SparseArray to a NumPy array.
@@ -544,6 +592,10 @@ def astype(self, dtype=None, copy=True):
         return self._simple_new(sp_values, self.sp_index,
                                 fill_value=fill_value)
 
+    def tolist(self):
+        """Return *dense* self as list"""
+        return self.values.tolist()
+
     def copy(self, deep=True):
         """
         Make a copy of the SparseArray. Only the actual sparse values need to

diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py
@@ -330,10 +330,11 @@ def _apply_columns(self, func):
 
         return self._constructor(
             data=new_data, index=self.index, columns=self.columns,
-            default_fill_value=self.default_fill_value).__finalize__(self)
+            default_fill_value=self.default_fill_value,
+            default_kind=self.default_kind).__finalize__(self)
 
-    def astype(self, dtype):
-        return self._apply_columns(lambda x: x.astype(dtype))
+    def astype(self, dtype, **kwargs):
+        return self._apply_columns(lambda x: x.astype(dtype, **kwargs))
 
     def copy(self, deep=True):
         """
@@ -464,44 +465,6 @@ def _get_value(self, index, col, takeable=False):
         return series._get_value(index, takeable=takeable)
     _get_value.__doc__ = get_value.__doc__
 
-    def set_value(self, index, col, value, takeable=False):
-        """
-        Put single value at passed column and index
-
-        .. deprecated:: 0.21.0
-
-        Please use .at[] or .iat[] accessors.
-
-        Parameters
-        ----------
-        index : row label
-        col : column label
-        value : scalar value
-        takeable : interpret the index/col as indexers, default False
-
-        Notes
-        -----
-        This method *always* returns a new object. It is currently not
-        particularly efficient (and potentially very expensive) but is provided
-        for API compatibility with DataFrame
-
-        Returns
-        -------
-        frame : DataFrame
-        """
-        warnings.warn("set_value is deprecated and will be removed "
-                      "in a future release. Please use "
-                      ".at[] or .iat[] accessors instead", FutureWarning,
-                      stacklevel=2)
-        return self._set_value(index, col, value, takeable=takeable)
-
-    def _set_value(self, index, col, value, takeable=False):
-        dense = self.to_dense()._set_value(
-            index, col, value, takeable=takeable)
-        return dense.to_sparse(kind=self._default_kind,
-                               fill_value=self._default_fill_value)
-    _set_value.__doc__ = set_value.__doc__
-
     def _slice(self, slobj, axis=0, kind=None):
         if axis == 0:
             new_index = self.index[slobj]
@@ -576,7 +539,8 @@ def _combine_frame(self, other, func, fill_value=None, level=None):
 
         return self._constructor(data=new_data, index=new_index,
                                  columns=new_columns,
-                                 default_fill_value=new_fill_value
+                                 default_fill_value=new_fill_value,
+                                 default_kind=self.default_kind,
                                  ).__finalize__(self)
 
     def _combine_match_index(self, other, func, level=None):
@@ -605,7 +569,8 @@ def _combine_match_index(self, other, func, level=None):
 
         return self._constructor(
             new_data, index=new_index, columns=self.columns,
-            default_fill_value=fill_value).__finalize__(self)
+            default_fill_value=fill_value,
+            default_kind=self.default_kind).__finalize__(self)
 
     def _combine_match_columns(self, other, func, level=None, try_cast=True):
         # patched version of DataFrame._combine_match_columns to account for
@@ -629,7 +594,8 @@ def _combine_match_columns(self, other, func, level=None, try_cast=True):
 
         return self._constructor(
             new_data, index=self.index, columns=union,
-            default_fill_value=self.default_fill_value).__finalize__(self)
+            default_fill_value=self.default_fill_value,
+            default_kind=self.default_kind).__finalize__(self)
 
     def _combine_const(self, other, func, errors='raise', try_cast=True):
         return self._apply_columns(lambda x: func(x, other))
@@ -673,7 +639,8 @@ def _reindex_index(self, index, method, copy, level, fill_value=np.nan,
 
         return self._constructor(
             new_series, index=index, columns=self.columns,
-            default_fill_value=self._default_fill_value).__finalize__(self)
+            default_fill_value=self._default_fill_value,
+            default_kind=self.default_kind).__finalize__(self)
 
     def _reindex_columns(self, columns, method, copy, level, fill_value=None,
                          limit=None, takeable=False):
@@ -693,7 +660,8 @@ def _reindex_columns(self, columns, method, copy, level, fill_value=None,
         sdict = {k: v for k, v in compat.iteritems(self) if k in columns}
         return self._constructor(
             sdict, index=self.index, columns=columns,
-            default_fill_value=self._default_fill_value).__finalize__(self)
+            default_fill_value=self._default_fill_value,
+            default_kind=self.default_kind).__finalize__(self)
 
     def _reindex_with_indexers(self, reindexers, method=None, fill_value=None,
                                limit=None, copy=False, allow_dups=False):
@@ -725,8 +693,10 @@ def _reindex_with_indexers(self, reindexers, method=None, fill_value=None,
             else:
                 new_arrays[col] = self[col]
 
-        return self._constructor(new_arrays, index=index,
-                                 columns=columns).__finalize__(self)
+        return self._constructor(
+            new_arrays, index=index, columns=columns,
+            default_fill_value=self.default_fill_value,
+            default_kind=self.default_kind).__finalize__(self)
 
     def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='',
                      sort=False):