Skip to content

Commit e7ad884

Browse files
TomAugspurgerjreback
authored andcommitted
Deprecate SparseDataFrame and SparseSeries (pandas-dev#26137)
1 parent 7629a18 commit e7ad884

40 files changed

+488
-175
lines changed

doc/source/user_guide/sparse.rst

+204-122
Large diffs are not rendered by default.

doc/source/whatsnew/v0.25.0.rst

+26-1
Original file line numberDiff line numberDiff line change
@@ -299,14 +299,39 @@ Other API Changes
299299
Deprecations
300300
~~~~~~~~~~~~
301301

302+
Sparse Subclasses
303+
^^^^^^^^^^^^^^^^^
304+
305+
The ``SparseSeries`` and ``SparseDataFrame`` subclasses are deprecated. Their functionality is better-provided
306+
by a ``Series`` or ``DataFrame`` with sparse values.
307+
308+
**Previous Way**
309+
310+
.. ipython:: python
311+
:okwarning:
312+
313+
df = pd.SparseDataFrame({"A": [0, 0, 1, 2]})
314+
df.dtypes
315+
316+
**New Way**
317+
318+
.. ipython:: python
319+
320+
df = pd.DataFrame({"A": pd.SparseArray([0, 0, 1, 2])})
321+
df.dtypes
322+
323+
The memory usage of the two approaches is identical. See :ref:`sparse.migration` for more (:issue:`19239`).
324+
325+
Other Deprecations
326+
^^^^^^^^^^^^^^^^^^
327+
302328
- The deprecated ``.ix[]`` indexer now raises a more visible FutureWarning instead of DeprecationWarning (:issue:`26438`).
303329
- Deprecated the ``units=M`` (months) and ``units=Y`` (year) parameters for ``units`` of :func:`pandas.to_timedelta`, :func:`pandas.Timedelta` and :func:`pandas.TimedeltaIndex` (:issue:`16344`)
304330
- The :attr:`SparseArray.values` attribute is deprecated. You can use ``np.asarray(...)`` or
305331
the :meth:`SparseArray.to_dense` method instead (:issue:`26421`).
306332
- The functions :func:`pandas.to_datetime` and :func:`pandas.to_timedelta` have deprecated the ``box`` keyword. Instead, use :meth:`to_numpy` or :meth:`Timestamp.to_datetime64` or :meth:`Timedelta.to_timedelta64`. (:issue:`24416`)
307333
- The :meth:`DataFrame.compound` and :meth:`Series.compound` methods are deprecated and will be removed in a future version (:issue:`26405`).
308334

309-
310335
.. _whatsnew_0250.prior_deprecations:
311336

312337
Removal of prior version deprecations/changes

pandas/core/arrays/sparse.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -2014,9 +2014,9 @@ def from_coo(cls, A, dense_index=False):
20142014
from pandas.core.sparse.scipy_sparse import _coo_to_sparse_series
20152015
from pandas import Series
20162016

2017-
result = _coo_to_sparse_series(A, dense_index=dense_index)
2018-
# SparseSeries -> Series[sparse]
2019-
result = Series(result.values, index=result.index, copy=False)
2017+
result = _coo_to_sparse_series(A, dense_index=dense_index,
2018+
sparse_series=False)
2019+
result = Series(result.array, index=result.index, copy=False)
20202020

20212021
return result
20222022

pandas/core/frame.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1930,13 +1930,13 @@ def to_sparse(self, fill_value=None, kind='block'):
19301930
>>> type(df)
19311931
<class 'pandas.core.frame.DataFrame'>
19321932
1933-
>>> sdf = df.to_sparse()
1934-
>>> sdf
1933+
>>> sdf = df.to_sparse() # doctest: +SKIP
1934+
>>> sdf # doctest: +SKIP
19351935
0 1
19361936
0 NaN NaN
19371937
1 1.0 NaN
19381938
2 NaN 1.0
1939-
>>> type(sdf)
1939+
>>> type(sdf) # doctest: +SKIP
19401940
<class 'pandas.core.sparse.frame.SparseDataFrame'>
19411941
"""
19421942
from pandas.core.sparse.api import SparseDataFrame

pandas/core/generic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -5589,7 +5589,7 @@ def ftypes(self):
55895589
3 float64:dense
55905590
dtype: object
55915591
5592-
>>> pd.SparseDataFrame(arr).ftypes
5592+
>>> pd.SparseDataFrame(arr).ftypes # doctest: +SKIP
55935593
0 float64:sparse
55945594
1 float64:sparse
55955595
2 float64:sparse

pandas/core/series.py

-1
Original file line numberDiff line numberDiff line change
@@ -1586,7 +1586,6 @@ def to_sparse(self, kind='block', fill_value=None):
15861586
SparseSeries
15871587
Sparse representation of the Series.
15881588
"""
1589-
# TODO: deprecate
15901589
from pandas.core.sparse.series import SparseSeries
15911590

15921591
values = SparseArray(self, kind=kind, fill_value=fill_value)

pandas/core/sparse/frame.py

+12
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,24 @@
2828
from pandas.core.sparse.series import SparseSeries
2929

3030
_shared_doc_kwargs = dict(klass='SparseDataFrame')
31+
depr_msg = """\
32+
SparseDataFrame is deprecated and will be removed in a future version.
33+
Use a regular DataFrame whose columns are SparseArrays instead.
34+
35+
See http://pandas.pydata.org/pandas-docs/stable/\
36+
user_guide/sparse.html#migrating for more.
37+
"""
3138

3239

3340
class SparseDataFrame(DataFrame):
3441
"""
3542
DataFrame containing sparse floating point data in the form of SparseSeries
3643
objects
3744
45+
.. deprectaed:: 0.25.0
46+
47+
Use a DataFrame with sparse values instead.
48+
3849
Parameters
3950
----------
4051
data : same types as can be passed to DataFrame or scipy.sparse.spmatrix
@@ -56,6 +67,7 @@ class SparseDataFrame(DataFrame):
5667
def __init__(self, data=None, index=None, columns=None, default_kind=None,
5768
default_fill_value=None, dtype=None, copy=False):
5869

70+
warnings.warn(depr_msg, FutureWarning, stacklevel=2)
5971
# pick up the defaults from the Sparse structures
6072
if isinstance(data, SparseDataFrame):
6173
if index is None:

pandas/core/sparse/scipy_sparse.py

+21-3
Original file line numberDiff line numberDiff line change
@@ -116,14 +116,32 @@ def _sparse_series_to_coo(ss, row_levels=(0, ), column_levels=(1, ),
116116
return sparse_matrix, rows, columns
117117

118118

119-
def _coo_to_sparse_series(A, dense_index=False):
119+
def _coo_to_sparse_series(A, dense_index: bool = False,
120+
sparse_series: bool = True):
120121
"""
121122
Convert a scipy.sparse.coo_matrix to a SparseSeries.
122-
Use the defaults given in the SparseSeries constructor.
123+
124+
Parameters
125+
----------
126+
A : scipy.sparse.coo.coo_matrix
127+
dense_index : bool, default False
128+
sparse_series : bool, default True
129+
130+
Returns
131+
-------
132+
Series or SparseSeries
123133
"""
134+
from pandas import SparseDtype
135+
124136
s = Series(A.data, MultiIndex.from_arrays((A.row, A.col)))
125137
s = s.sort_index()
126-
s = s.to_sparse() # TODO: specify kind?
138+
if sparse_series:
139+
# TODO(SparseSeries): remove this and the sparse_series keyword.
140+
# This is just here to avoid a DeprecationWarning when
141+
# _coo_to_sparse_series is called via Series.sparse.from_coo
142+
s = s.to_sparse() # TODO: specify kind?
143+
else:
144+
s = s.astype(SparseDtype(s.dtype))
127145
if dense_index:
128146
# is there a better constructor method to use here?
129147
i = range(A.shape[0])

pandas/core/sparse/series.py

+16
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,24 @@
3232
optional_labels='', optional_axis='')
3333

3434

35+
depr_msg = """\
36+
SparseSeries is deprecated and will be removed in a future version.
37+
Use a Series with sparse values instead.
38+
39+
>>> series = pd.Series(pd.SparseArray(...))
40+
41+
See http://pandas.pydata.org/pandas-docs/stable/\
42+
user_guide/sparse.html#migrating for more.
43+
"""
44+
45+
3546
class SparseSeries(Series):
3647
"""Data structure for labeled, sparse floating point data
3748
49+
.. deprectaed:: 0.25.0
50+
51+
Use a Series with sparse values instead.
52+
3853
Parameters
3954
----------
4055
data : {array-like, Series, SparseSeries, dict}
@@ -60,6 +75,7 @@ class SparseSeries(Series):
6075
def __init__(self, data=None, index=None, sparse_index=None, kind='block',
6176
fill_value=None, name=None, dtype=None, copy=False,
6277
fastpath=False):
78+
warnings.warn(depr_msg, FutureWarning, stacklevel=2)
6379
# TODO: Most of this should be refactored and shared with Series
6480
# 1. BlockManager -> array
6581
# 2. Series.index, Series.name, index, name reconciliation

pandas/tests/arrays/sparse/test_accessor.py

+18
Original file line numberDiff line numberDiff line change
@@ -101,3 +101,21 @@ def test_density(self):
101101
res = df.sparse.density
102102
expected = 0.75
103103
assert res == expected
104+
105+
@pytest.mark.parametrize("dtype", ['int64', 'float64'])
106+
@pytest.mark.parametrize("dense_index", [True, False])
107+
@td.skip_if_no_scipy
108+
def test_series_from_coo(self, dtype, dense_index):
109+
import scipy.sparse
110+
111+
A = scipy.sparse.eye(3, format='coo', dtype=dtype)
112+
result = pd.Series.sparse.from_coo(A, dense_index=dense_index)
113+
index = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)])
114+
expected = pd.Series(pd.SparseArray(np.array([1, 1, 1], dtype=dtype)),
115+
index=index)
116+
if dense_index:
117+
expected = expected.reindex(
118+
pd.MultiIndex.from_product(index.levels)
119+
)
120+
121+
tm.assert_series_equal(result, expected)

pandas/tests/arrays/sparse/test_arithmetics.py

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import pandas.util.testing as tm
99

1010

11+
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
1112
class TestSparseArrayArithmetics:
1213

1314
_base = np.array

pandas/tests/arrays/sparse/test_array.py

+3
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ def test_scalar_with_index_infer_dtype(self, scalar, dtype):
215215
assert exp.dtype == dtype
216216

217217
@pytest.mark.parametrize("fill", [1, np.nan, 0])
218+
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
218219
def test_sparse_series_round_trip(self, kind, fill):
219220
# see gh-13999
220221
arr = SparseArray([np.nan, 1, np.nan, 2, 3],
@@ -231,6 +232,7 @@ def test_sparse_series_round_trip(self, kind, fill):
231232
tm.assert_sp_array_equal(arr, res)
232233

233234
@pytest.mark.parametrize("fill", [True, False, np.nan])
235+
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
234236
def test_sparse_series_round_trip2(self, kind, fill):
235237
# see gh-13999
236238
arr = SparseArray([True, False, True, True], dtype=np.bool,
@@ -1098,6 +1100,7 @@ def test_npoints(self):
10981100
assert arr.npoints == 1
10991101

11001102

1103+
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
11011104
class TestAccessor:
11021105

11031106
@pytest.mark.parametrize('attr', [

pandas/tests/dtypes/test_common.py

+24-4
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@
1515
from pandas.core.sparse.api import SparseDtype
1616
import pandas.util.testing as tm
1717

18+
ignore_sparse_warning = pytest.mark.filterwarnings(
19+
"ignore:Sparse:FutureWarning"
20+
)
21+
1822

1923
# EA & Actual Dtypes
2024
def to_ea_dtypes(dtypes):
@@ -146,6 +150,7 @@ def test_is_object():
146150
@pytest.mark.parametrize("check_scipy", [
147151
False, pytest.param(True, marks=td.skip_if_no_scipy)
148152
])
153+
@ignore_sparse_warning
149154
def test_is_sparse(check_scipy):
150155
assert com.is_sparse(pd.SparseArray([1, 2, 3]))
151156
assert com.is_sparse(pd.SparseSeries([1, 2, 3]))
@@ -158,6 +163,7 @@ def test_is_sparse(check_scipy):
158163

159164

160165
@td.skip_if_no_scipy
166+
@ignore_sparse_warning
161167
def test_is_scipy_sparse():
162168
from scipy.sparse import bsr_matrix
163169
assert com.is_scipy_sparse(bsr_matrix([1, 2, 3]))
@@ -529,6 +535,7 @@ def test_is_bool_dtype():
529535
@pytest.mark.parametrize("check_scipy", [
530536
False, pytest.param(True, marks=td.skip_if_no_scipy)
531537
])
538+
@ignore_sparse_warning
532539
def test_is_extension_type(check_scipy):
533540
assert not com.is_extension_type([1, 2, 3])
534541
assert not com.is_extension_type(np.array([1, 2, 3]))
@@ -595,8 +602,6 @@ def test_is_offsetlike():
595602
(pd.DatetimeIndex([1, 2]).dtype, np.dtype('=M8[ns]')),
596603
('<M8[ns]', np.dtype('<M8[ns]')),
597604
('datetime64[ns, Europe/London]', DatetimeTZDtype('ns', 'Europe/London')),
598-
(pd.SparseSeries([1, 2], dtype='int32'), SparseDtype('int32')),
599-
(pd.SparseSeries([1, 2], dtype='int32').dtype, SparseDtype('int32')),
600605
(PeriodDtype(freq='D'), PeriodDtype(freq='D')),
601606
('period[D]', PeriodDtype(freq='D')),
602607
(IntervalDtype(), IntervalDtype()),
@@ -605,6 +610,14 @@ def test__get_dtype(input_param, result):
605610
assert com._get_dtype(input_param) == result
606611

607612

613+
@ignore_sparse_warning
614+
def test__get_dtype_sparse():
615+
ser = pd.SparseSeries([1, 2], dtype='int32')
616+
expected = SparseDtype('int32')
617+
assert com._get_dtype(ser) == expected
618+
assert com._get_dtype(ser.dtype) == expected
619+
620+
608621
@pytest.mark.parametrize('input_param,expected_error_message', [
609622
(None, "Cannot deduce dtype from null object"),
610623
(1, "data type not understood"),
@@ -640,8 +653,7 @@ def test__get_dtype_fails(input_param, expected_error_message):
640653
(pd.DatetimeIndex(['2000'], tz='Europe/London').dtype,
641654
pd.Timestamp),
642655
('datetime64[ns, Europe/London]', pd.Timestamp),
643-
(pd.SparseSeries([1, 2], dtype='int32'), np.int32),
644-
(pd.SparseSeries([1, 2], dtype='int32').dtype, np.int32),
656+
645657
(PeriodDtype(freq='D'), pd.Period),
646658
('period[D]', pd.Period),
647659
(IntervalDtype(), pd.Interval),
@@ -652,3 +664,11 @@ def test__get_dtype_fails(input_param, expected_error_message):
652664
])
653665
def test__is_dtype_type(input_param, result):
654666
assert com._is_dtype_type(input_param, lambda tipo: tipo == result)
667+
668+
669+
@ignore_sparse_warning
670+
def test__is_dtype_type_sparse():
671+
ser = pd.SparseSeries([1, 2], dtype='int32')
672+
result = np.dtype('int32')
673+
assert com._is_dtype_type(ser, lambda tipo: tipo == result)
674+
assert com._is_dtype_type(ser.dtype, lambda tipo: tipo == result)

pandas/tests/dtypes/test_dtypes.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -870,7 +870,6 @@ def test_registry_find(dtype, expected):
870870
(pd.Series([1, 2]), False),
871871
(np.array([True, False]), True),
872872
(pd.Series([True, False]), True),
873-
(pd.SparseSeries([True, False]), True),
874873
(pd.SparseArray([True, False]), True),
875874
(SparseDtype(bool), True)
876875
])
@@ -879,6 +878,12 @@ def test_is_bool_dtype(dtype, expected):
879878
assert result is expected
880879

881880

881+
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
882+
def test_is_bool_dtype_sparse():
883+
result = is_bool_dtype(pd.SparseSeries([True, False]))
884+
assert result is True
885+
886+
882887
@pytest.mark.parametrize("check", [
883888
is_categorical_dtype,
884889
is_datetime64tz_dtype,

pandas/tests/dtypes/test_generic.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from warnings import catch_warnings
1+
from warnings import catch_warnings, simplefilter
22

33
import numpy as np
44

@@ -17,9 +17,12 @@ class TestABCClasses:
1717
categorical = pd.Categorical([1, 2, 3], categories=[2, 3, 1])
1818
categorical_df = pd.DataFrame({"values": [1, 2, 3]}, index=categorical)
1919
df = pd.DataFrame({'names': ['a', 'b', 'c']}, index=multi_index)
20-
sparse_series = pd.Series([1, 2, 3]).to_sparse()
20+
with catch_warnings():
21+
simplefilter('ignore', FutureWarning)
22+
sparse_series = pd.Series([1, 2, 3]).to_sparse()
23+
sparse_frame = pd.SparseDataFrame({'a': [1, -1, None]})
24+
2125
sparse_array = pd.SparseArray(np.random.randn(10))
22-
sparse_frame = pd.SparseDataFrame({'a': [1, -1, None]})
2326
datetime_array = pd.core.arrays.DatetimeArray(datetime_index)
2427
timedelta_array = pd.core.arrays.TimedeltaArray(timedelta_index)
2528

pandas/tests/frame/test_alter_axes.py

+2
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import pandas.util.testing as tm
1414

1515

16+
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
1617
class TestDataFrameAlterAxes:
1718

1819
def test_set_index_directly(self, float_string_frame):
@@ -1376,6 +1377,7 @@ def test_droplevel(self):
13761377
tm.assert_frame_equal(result, expected)
13771378

13781379

1380+
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
13791381
class TestIntervalIndex:
13801382

13811383
def test_setitem(self):

pandas/tests/frame/test_indexing.py

+1
Original file line numberDiff line numberDiff line change
@@ -2073,6 +2073,7 @@ def test_loc_duplicates(self):
20732073
df.loc[trange[bool_idx], "A"] += 6
20742074
tm.assert_frame_equal(df, expected)
20752075

2076+
@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
20762077
def test_iloc_sparse_propegate_fill_value(self):
20772078
from pandas.core.sparse.api import SparseDataFrame
20782079
df = SparseDataFrame({'A': [999, 1]}, default_fill_value=999)

0 commit comments

Comments
 (0)