Skip to content

Commit f662e0e

Browse files
jbrockmendelnoatamir
authored andcommitted
DEPR: store SparseArray directly in Index (pandas-dev#49307)
1 parent 1b7ab79 commit f662e0e

File tree

6 files changed

+10
-55
lines changed

6 files changed

+10
-55
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,7 @@ Removal of prior version deprecations/changes
229229
- Removed the ``display.column_space`` option in favor of ``df.to_string(col_space=...)`` (:issue:`47280`)
230230
- Removed the deprecated method ``mad`` from pandas classes (:issue:`11787`)
231231
- Removed the deprecated method ``tshift`` from pandas classes (:issue:`11631`)
232+
- Changed behavior of :class:`Index` constructor when passed a ``SparseArray`` or ``SparseDtype`` to retain that dtype instead of casting to ``numpy.ndarray`` (:issue:`43930`)
232233

233234
.. ---------------------------------------------------------------------------
234235
.. _whatsnew_200.performance:

pandas/core/indexes/base.py

-12
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,6 @@
156156
tz_to_dtype,
157157
validate_tz_from_dtype,
158158
)
159-
from pandas.core.arrays.sparse import SparseDtype
160159
from pandas.core.arrays.string_ import StringArray
161160
from pandas.core.base import (
162161
IndexOpsMixin,
@@ -618,17 +617,6 @@ def _dtype_to_subclass(cls, dtype: DtypeObj):
618617

619618
return PeriodIndex
620619

621-
elif isinstance(dtype, SparseDtype):
622-
warnings.warn(
623-
"In a future version, passing a SparseArray to pd.Index "
624-
"will store that array directly instead of converting to a "
625-
"dense numpy ndarray. To retain the old behavior, use "
626-
"pd.Index(arr.to_numpy()) instead",
627-
FutureWarning,
628-
stacklevel=find_stack_level(),
629-
)
630-
return cls._dtype_to_subclass(dtype.subtype)
631-
632620
return Index
633621

634622
if dtype.kind == "M":

pandas/tests/base/test_conversion.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -334,10 +334,7 @@ def test_array_multiindex_raises():
334334
def test_to_numpy(arr, expected, index_or_series_or_array, request):
335335
box = index_or_series_or_array
336336

337-
warn = None
338-
if index_or_series_or_array is pd.Index and isinstance(arr, SparseArray):
339-
warn = FutureWarning
340-
with tm.assert_produces_warning(warn):
337+
with tm.assert_produces_warning(None):
341338
thing = box(arr)
342339

343340
if arr.dtype.name == "int64" and box is pd.array:

pandas/tests/extension/test_sparse.py

-11
Original file line numberDiff line numberDiff line change
@@ -211,17 +211,6 @@ def test_reindex(self, data, na_value):
211211

212212

213213
class TestIndex(base.BaseIndexTests):
214-
def test_index_from_array(self, data):
215-
msg = "will store that array directly"
216-
with tm.assert_produces_warning(FutureWarning, match=msg):
217-
idx = pd.Index(data)
218-
219-
if data.dtype.subtype == "f":
220-
assert idx.dtype == np.float64
221-
elif data.dtype.subtype == "i":
222-
assert idx.dtype == np.int64
223-
else:
224-
assert idx.dtype == data.dtype.subtype
225214

226215
# TODO(2.0): should pass once SparseArray is stored directly in Index.
227216
@pytest.mark.xfail(reason="Index cannot yet store sparse dtype")

pandas/tests/indexes/datetimes/test_constructors.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -142,11 +142,9 @@ def test_constructor_from_sparse_array(self):
142142
Timestamp("2016-05-01T01:00:00.000000"),
143143
]
144144
arr = pd.arrays.SparseArray(values)
145-
msg = "will store that array directly"
146-
with tm.assert_produces_warning(FutureWarning, match=msg):
147-
result = Index(arr)
148-
expected = DatetimeIndex(values)
149-
tm.assert_index_equal(result, expected)
145+
result = Index(arr)
146+
assert type(result) is Index
147+
assert result.dtype == arr.dtype
150148

151149
def test_construction_caching(self):
152150

pandas/tests/series/test_ufunc.py

+5-23
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
import numpy as np
66
import pytest
77

8-
from pandas.core.dtypes.common import is_dtype_equal
9-
108
import pandas as pd
119
import pandas._testing as tm
1210
from pandas.arrays import SparseArray
@@ -277,14 +275,10 @@ def test_multiply(self, values_for_np_reduce, box_with_array, request):
277275
box = box_with_array
278276
values = values_for_np_reduce
279277

280-
warn = None
281-
if is_dtype_equal(values.dtype, "Sparse[int]") and box is pd.Index:
282-
warn = FutureWarning
283-
msg = "passing a SparseArray to pd.Index"
284-
with tm.assert_produces_warning(warn, match=msg):
278+
with tm.assert_produces_warning(None):
285279
obj = box(values)
286280

287-
if isinstance(values, pd.core.arrays.SparseArray) and box is not pd.Index:
281+
if isinstance(values, pd.core.arrays.SparseArray):
288282
mark = pytest.mark.xfail(reason="SparseArray has no 'prod'")
289283
request.node.add_marker(mark)
290284

@@ -316,11 +310,7 @@ def test_add(self, values_for_np_reduce, box_with_array):
316310
box = box_with_array
317311
values = values_for_np_reduce
318312

319-
warn = None
320-
if is_dtype_equal(values.dtype, "Sparse[int]") and box is pd.Index:
321-
warn = FutureWarning
322-
msg = "passing a SparseArray to pd.Index"
323-
with tm.assert_produces_warning(warn, match=msg):
313+
with tm.assert_produces_warning(None):
324314
obj = box(values)
325315

326316
if values.dtype.kind in "miuf":
@@ -355,11 +345,7 @@ def test_max(self, values_for_np_reduce, box_with_array):
355345
# ATM Index casts to object, so we get python ints/floats
356346
same_type = False
357347

358-
warn = None
359-
if is_dtype_equal(values.dtype, "Sparse[int]") and box is pd.Index:
360-
warn = FutureWarning
361-
msg = "passing a SparseArray to pd.Index"
362-
with tm.assert_produces_warning(warn, match=msg):
348+
with tm.assert_produces_warning(None):
363349
obj = box(values)
364350

365351
result = np.maximum.reduce(obj)
@@ -383,11 +369,7 @@ def test_min(self, values_for_np_reduce, box_with_array):
383369
# ATM Index casts to object, so we get python ints/floats
384370
same_type = False
385371

386-
warn = None
387-
if is_dtype_equal(values.dtype, "Sparse[int]") and box is pd.Index:
388-
warn = FutureWarning
389-
msg = "passing a SparseArray to pd.Index"
390-
with tm.assert_produces_warning(warn, match=msg):
372+
with tm.assert_produces_warning(None):
391373
obj = box(values)
392374

393375
result = np.minimum.reduce(obj)

0 commit comments

Comments
 (0)