From 3aeb19183e4c1aae55acf3606357ea4d8d9cb241 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 25 Oct 2022 09:20:05 -0700 Subject: [PATCH] DEPR: store SparseArray directly in Index --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/core/indexes/base.py | 12 -------- pandas/tests/base/test_conversion.py | 5 +--- pandas/tests/extension/test_sparse.py | 11 -------- .../indexes/datetimes/test_constructors.py | 8 ++---- pandas/tests/series/test_ufunc.py | 28 ++++--------------- 6 files changed, 10 insertions(+), 55 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index a3b6d1dc90fee..89472b8defa3c 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -229,6 +229,7 @@ Removal of prior version deprecations/changes - Removed the ``display.column_space`` option in favor of ``df.to_string(col_space=...)`` (:issue:`47280`) - Removed the deprecated method ``mad`` from pandas classes (:issue:`11787`) - Removed the deprecated method ``tshift`` from pandas classes (:issue:`11631`) +- Changed behavior of :class:`Index` constructor when passed a ``SparseArray`` or ``SparseDtype`` to retain that dtype instead of casting to ``numpy.ndarray`` (:issue:`43930`) .. --------------------------------------------------------------------------- .. _whatsnew_200.performance: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 1d3efe8bedd94..2fe61623abfd5 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -156,7 +156,6 @@ tz_to_dtype, validate_tz_from_dtype, ) -from pandas.core.arrays.sparse import SparseDtype from pandas.core.arrays.string_ import StringArray from pandas.core.base import ( IndexOpsMixin, @@ -618,17 +617,6 @@ def _dtype_to_subclass(cls, dtype: DtypeObj): return PeriodIndex - elif isinstance(dtype, SparseDtype): - warnings.warn( - "In a future version, passing a SparseArray to pd.Index " - "will store that array directly instead of converting to a " - "dense numpy ndarray. To retain the old behavior, use " - "pd.Index(arr.to_numpy()) instead", - FutureWarning, - stacklevel=find_stack_level(), - ) - return cls._dtype_to_subclass(dtype.subtype) - return Index if dtype.kind == "M": diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py index 599aaae4d3527..703ac6c89fca8 100644 --- a/pandas/tests/base/test_conversion.py +++ b/pandas/tests/base/test_conversion.py @@ -334,10 +334,7 @@ def test_array_multiindex_raises(): def test_to_numpy(arr, expected, index_or_series_or_array, request): box = index_or_series_or_array - warn = None - if index_or_series_or_array is pd.Index and isinstance(arr, SparseArray): - warn = FutureWarning - with tm.assert_produces_warning(warn): + with tm.assert_produces_warning(None): thing = box(arr) if arr.dtype.name == "int64" and box is pd.array: diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 86a523404ef8b..b1111951d67fa 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -211,17 +211,6 @@ def test_reindex(self, data, na_value): class TestIndex(base.BaseIndexTests): - def test_index_from_array(self, data): - msg = "will store that array directly" - with tm.assert_produces_warning(FutureWarning, match=msg): - idx = pd.Index(data) - - if data.dtype.subtype == "f": - assert idx.dtype == np.float64 - elif data.dtype.subtype == "i": - assert idx.dtype == np.int64 - else: - assert idx.dtype == data.dtype.subtype # TODO(2.0): should pass once SparseArray is stored directly in Index. @pytest.mark.xfail(reason="Index cannot yet store sparse dtype") diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 9914f4357cee4..67f323c5afd81 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -142,11 +142,9 @@ def test_constructor_from_sparse_array(self): Timestamp("2016-05-01T01:00:00.000000"), ] arr = pd.arrays.SparseArray(values) - msg = "will store that array directly" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = Index(arr) - expected = DatetimeIndex(values) - tm.assert_index_equal(result, expected) + result = Index(arr) + assert type(result) is Index + assert result.dtype == arr.dtype def test_construction_caching(self): diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index 924980b62a51b..4e53000059cdc 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -5,8 +5,6 @@ import numpy as np import pytest -from pandas.core.dtypes.common import is_dtype_equal - import pandas as pd import pandas._testing as tm from pandas.arrays import SparseArray @@ -277,14 +275,10 @@ def test_multiply(self, values_for_np_reduce, box_with_array, request): box = box_with_array values = values_for_np_reduce - warn = None - if is_dtype_equal(values.dtype, "Sparse[int]") and box is pd.Index: - warn = FutureWarning - msg = "passing a SparseArray to pd.Index" - with tm.assert_produces_warning(warn, match=msg): + with tm.assert_produces_warning(None): obj = box(values) - if isinstance(values, pd.core.arrays.SparseArray) and box is not pd.Index: + if isinstance(values, pd.core.arrays.SparseArray): mark = pytest.mark.xfail(reason="SparseArray has no 'prod'") request.node.add_marker(mark) @@ -316,11 +310,7 @@ def test_add(self, values_for_np_reduce, box_with_array): box = box_with_array values = values_for_np_reduce - warn = None - if is_dtype_equal(values.dtype, "Sparse[int]") and box is pd.Index: - warn = FutureWarning - msg = "passing a SparseArray to pd.Index" - with tm.assert_produces_warning(warn, match=msg): + with tm.assert_produces_warning(None): obj = box(values) if values.dtype.kind in "miuf": @@ -355,11 +345,7 @@ def test_max(self, values_for_np_reduce, box_with_array): # ATM Index casts to object, so we get python ints/floats same_type = False - warn = None - if is_dtype_equal(values.dtype, "Sparse[int]") and box is pd.Index: - warn = FutureWarning - msg = "passing a SparseArray to pd.Index" - with tm.assert_produces_warning(warn, match=msg): + with tm.assert_produces_warning(None): obj = box(values) result = np.maximum.reduce(obj) @@ -383,11 +369,7 @@ def test_min(self, values_for_np_reduce, box_with_array): # ATM Index casts to object, so we get python ints/floats same_type = False - warn = None - if is_dtype_equal(values.dtype, "Sparse[int]") and box is pd.Index: - warn = FutureWarning - msg = "passing a SparseArray to pd.Index" - with tm.assert_produces_warning(warn, match=msg): + with tm.assert_produces_warning(None): obj = box(values) result = np.minimum.reduce(obj)