Skip to content

DEPR: deprecate SparseArray.values #26421

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
4 changes: 3 additions & 1 deletion doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -256,8 +256,10 @@ Deprecations
~~~~~~~~~~~~

- Deprecated the ``units=M`` (months) and ``units=Y`` (year) parameters for ``units`` of :func:`pandas.to_timedelta`, :func:`pandas.Timedelta` and :func:`pandas.TimedeltaIndex` (:issue:`16344`)
- The :attr:`SparseArray.values` attribute is deprecated. You can use ``np.asarray(...)`` or
the :meth:`SparseArray.to_dense` method instead (:issue:`26421`).
- The functions :func:`pandas.to_datetime` and :func:`pandas.to_timedelta` have deprecated the ``box`` keyword. Instead, use :meth:`to_numpy` or :meth:`Timestamp.to_datetime64` or :meth:`Timedelta.to_timedelta64`. (:issue:`24416`)
- The :meth:`DataFrame.compound` and :meth:`Series.compound` methods are deprecated and will be removed in a future version.
- The :meth:`DataFrame.compound` and :meth:`Series.compound` methods are deprecated and will be removed in a future version (:issue:`26405`).


.. _whatsnew_0250.prior_deprecations:
Expand Down
15 changes: 12 additions & 3 deletions pandas/core/arrays/sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -890,7 +890,16 @@ def npoints(self):
def values(self):
"""
Dense values

.. deprecated:: 0.25.0

Use ``np.asarray(...)`` or the ``.to_dense()`` method instead.
"""
msg = (
"The 'values' attribute of a SparseArray is deprecated and will "
"be removed in a future version. You can use `np.asarray(...)` or "
"the `.to_dense()` method instead.")
warnings.warn(msg, FutureWarning, stacklevel=2)
return self.to_dense()

def isna(self):
Expand Down Expand Up @@ -1076,7 +1085,7 @@ def __getitem__(self, key):
if is_integer(key):
return self._get_val_at(key)
elif isinstance(key, tuple):
data_slice = self.values[key]
data_slice = self.to_dense()[key]
elif isinstance(key, slice):
# special case to preserve dtypes
if key == slice(None):
Expand Down Expand Up @@ -1635,7 +1644,7 @@ def __array_wrap__(self, array, context=None):
from pandas.core.dtypes.generic import ABCSparseSeries

ufunc, inputs, _ = context
inputs = tuple(x.values if isinstance(x, ABCSparseSeries) else x
inputs = tuple(x.to_dense() if isinstance(x, ABCSparseSeries) else x
for x in inputs)
return self.__array_ufunc__(ufunc, '__call__', *inputs)

Expand Down Expand Up @@ -1854,7 +1863,7 @@ def _maybe_to_sparse(array):
array must be SparseSeries or SparseArray
"""
if isinstance(array, ABCSparseSeries):
array = array.values.copy()
array = array.array.copy()
return array


Expand Down
4 changes: 2 additions & 2 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,8 +375,8 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False,
# with a .values attribute.
aligned_args = {k: kwargs[k]
for k in align_keys
if hasattr(kwargs[k], 'values') and
not isinstance(kwargs[k], ABCExtensionArray)}
if not isinstance(kwargs[k], ABCExtensionArray) and
hasattr(kwargs[k], 'values')}

for b in self.blocks:
if filter is not None:
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -2272,10 +2272,10 @@ def _cast_sparse_series_op(left, right, opname):
# TODO: This should be moved to the array?
if is_integer_dtype(left) and is_integer_dtype(right):
# series coerces to float64 if result should have NaN/inf
if opname in ('floordiv', 'mod') and (right.values == 0).any():
if opname in ('floordiv', 'mod') and (right.to_dense() == 0).any():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we not be using np.asarry? generally rather than .to_dense()?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both are equivalent (although to_dense actually does a bit less as it specified the dtype and asarray does some inference (not sure for that difference though)).

left = left.astype(SparseDtype(np.float64, left.fill_value))
right = right.astype(SparseDtype(np.float64, right.fill_value))
elif opname in ('rfloordiv', 'rmod') and (left.values == 0).any():
elif opname in ('rfloordiv', 'rmod') and (left.to_dense() == 0).any():
left = left.astype(SparseDtype(np.float64, left.fill_value))
right = right.astype(SparseDtype(np.float64, right.fill_value))

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/sparse/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -627,7 +627,7 @@ def _reindex_index(self, index, method, copy, level, fill_value=np.nan,
# .take returns SparseArray
new = values.take(indexer)
if need_mask:
new = new.values
new = new.to_dense()
# convert integer to float if necessary. need to do a lot
# more than that, handle boolean etc also
new, fill_value = maybe_upcast(new, fill_value=fill_value)
Expand Down
44 changes: 26 additions & 18 deletions pandas/tests/arrays/sparse/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,9 +433,9 @@ def test_constructor_bool(self):
tm.assert_numpy_array_equal(arr.sp_index.indices,
np.array([2, 3], np.int32))

for dense in [arr.to_dense(), arr.values]:
assert dense.dtype == bool
tm.assert_numpy_array_equal(dense, data)
dense = arr.to_dense()
assert dense.dtype == bool
tm.assert_numpy_array_equal(dense, data)

def test_constructor_bool_fill_value(self):
arr = SparseArray([True, False, True], dtype=None)
Expand Down Expand Up @@ -463,9 +463,9 @@ def test_constructor_float32(self):
tm.assert_numpy_array_equal(arr.sp_index.indices,
np.array([0, 2], dtype=np.int32))

for dense in [arr.to_dense(), arr.values]:
assert dense.dtype == np.float32
tm.assert_numpy_array_equal(dense, data)
dense = arr.to_dense()
assert dense.dtype == np.float32
tm.assert_numpy_array_equal(dense, data)

def test_astype(self):
# float -> float
Expand Down Expand Up @@ -514,7 +514,7 @@ def test_astype_all(self, any_real_dtype):
assert res.dtype == SparseDtype(typ, 1)
assert res.sp_values.dtype == typ

tm.assert_numpy_array_equal(np.asarray(res.values),
tm.assert_numpy_array_equal(np.asarray(res.to_dense()),
vals.astype(typ))

@pytest.mark.parametrize('array, dtype, expected', [
Expand Down Expand Up @@ -596,7 +596,6 @@ def test_copy_shallow(self):
assert arr2.sp_index is self.arr.sp_index

def test_values_asarray(self):
assert_almost_equal(self.arr.values, self.arr_data)
assert_almost_equal(self.arr.to_dense(), self.arr_data)

@pytest.mark.parametrize('data,shape,dtype', [
Expand Down Expand Up @@ -627,7 +626,7 @@ def test_dense_repr(self, vals, fill_value, method):

def test_getitem(self):
def _checkit(i):
assert_almost_equal(self.arr[i], self.arr.values[i])
assert_almost_equal(self.arr[i], self.arr.to_dense()[i])

for i in range(len(self.arr)):
_checkit(i)
Expand All @@ -641,11 +640,11 @@ def test_getitem_arraylike_mask(self):

def test_getslice(self):
result = self.arr[:-3]
exp = SparseArray(self.arr.values[:-3])
exp = SparseArray(self.arr.to_dense()[:-3])
tm.assert_sp_array_equal(result, exp)

result = self.arr[-4:]
exp = SparseArray(self.arr.values[-4:])
exp = SparseArray(self.arr.to_dense()[-4:])
tm.assert_sp_array_equal(result, exp)

# two corner cases from Series
Expand All @@ -654,7 +653,7 @@ def test_getslice(self):
tm.assert_sp_array_equal(result, exp)

result = self.arr[:-12]
exp = SparseArray(self.arr.values[:0])
exp = SparseArray(self.arr.to_dense()[:0])
tm.assert_sp_array_equal(result, exp)

def test_getslice_tuple(self):
Expand Down Expand Up @@ -702,16 +701,16 @@ def test_binary_operators(self, op):

def _check_op(op, first, second):
res = op(first, second)
exp = SparseArray(op(first.values, second.values),
exp = SparseArray(op(first.to_dense(), second.to_dense()),
fill_value=first.fill_value)
assert isinstance(res, SparseArray)
assert_almost_equal(res.values, exp.values)
assert_almost_equal(res.to_dense(), exp.to_dense())

res2 = op(first, second.values)
res2 = op(first, second.to_dense())
assert isinstance(res2, SparseArray)
tm.assert_sp_array_equal(res, res2)

res3 = op(first.values, second)
res3 = op(first.to_dense(), second)
assert isinstance(res3, SparseArray)
tm.assert_sp_array_equal(res, res3)

Expand All @@ -720,13 +719,13 @@ def _check_op(op, first, second):

# Ignore this if the actual op raises (e.g. pow).
try:
exp = op(first.values, 4)
exp = op(first.to_dense(), 4)
exp_fv = op(first.fill_value, 4)
except ValueError:
pass
else:
assert_almost_equal(res4.fill_value, exp_fv)
assert_almost_equal(res4.values, exp)
assert_almost_equal(res4.to_dense(), exp)

with np.errstate(all="ignore"):
for first_arr, second_arr in [(arr1, arr2), (farr1, farr2)]:
Expand Down Expand Up @@ -1230,3 +1229,12 @@ def test_map_missing():

result = arr.map({0: 10, 1: 11})
tm.assert_sp_array_equal(result, expected)


def test_deprecated_values():
arr = SparseArray([0, 1, 2])

with tm.assert_produces_warning(FutureWarning):
result = arr.values

tm.assert_numpy_array_equal(result, arr.to_dense())
6 changes: 3 additions & 3 deletions pandas/tests/sparse/series/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ def test_constructor(self):
assert isinstance(self.iseries.sp_index, IntIndex)

assert self.zbseries.fill_value == 0
tm.assert_numpy_array_equal(self.zbseries.values.values,
tm.assert_numpy_array_equal(self.zbseries.values.to_dense(),
self.bseries.to_dense().fillna(0).values)

# pass SparseSeries
Expand Down Expand Up @@ -322,7 +322,7 @@ def test_constructor_ndarray(self):
def test_constructor_nonnan(self):
arr = [0, 0, 0, nan, nan]
sp_series = SparseSeries(arr, fill_value=0)
tm.assert_numpy_array_equal(sp_series.values.values, np.array(arr))
tm.assert_numpy_array_equal(sp_series.values.to_dense(), np.array(arr))
assert len(sp_series) == 5
assert sp_series.shape == (5, )

Expand Down Expand Up @@ -514,7 +514,7 @@ def _compare(idx):
sparse_result = sp.take(idx)
assert isinstance(sparse_result, SparseSeries)
tm.assert_almost_equal(dense_result,
sparse_result.values.values)
sparse_result.values.to_dense())

_compare([1., 2., 3., 4., 5., 0.])
_compare([7, 2, 9, 0, 4])
Expand Down
2 changes: 1 addition & 1 deletion pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1403,7 +1403,7 @@ def assert_sp_array_equal(left, right, check_dtype=True, check_kind=True,
assert_attr_equal('fill_value', left, right)
if check_dtype:
assert_attr_equal('dtype', left, right)
assert_numpy_array_equal(left.values, right.values,
assert_numpy_array_equal(left.to_dense(), right.to_dense(),
check_dtype=check_dtype)


Expand Down