Skip to content

Commit 273f0fe

Browse files
jbrockmendelphofl
authored andcommitted
DEPR: SparseArray.astype (pandas-dev#49324)
* DEPR: SparseArray.astype * fix append test * remove no-longer-overriden tests
1 parent 6aba4e8 commit 273f0fe

File tree

8 files changed

+27
-90
lines changed

8 files changed

+27
-90
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,7 @@ Removal of prior version deprecations/changes
285285
- Removed the deprecated method ``tshift`` from pandas classes (:issue:`11631`)
286286
- Changed behavior of empty data passed into :class:`Series`; the default dtype will be ``object`` instead of ``float64`` (:issue:`29405`)
287287
- Changed the behavior of :func:`to_datetime` with argument "now" with ``utc=False`` to match ``Timestamp("now")`` (:issue:`18705`)
288+
- Changed behavior of :meth:`SparseArray.astype` when given a dtype that is not explicitly ``SparseDtype``, cast to the exact requested dtype rather than silently using a ``SparseDtype`` instead (:issue:`34457`)
288289
- Changed behavior of :class:`DataFrame` constructor given floating-point ``data`` and an integer ``dtype``, when the data cannot be cast losslessly, the floating point dtype is retained, matching :class:`Series` behavior (:issue:`41170`)
289290
- Changed behavior of :class:`DataFrame` constructor when passed a ``dtype`` (other than int) that the data cannot be cast to; it now raises instead of silently ignoring the dtype (:issue:`41733`)
290291
- Changed the behavior of :class:`Series` constructor, it will no longer infer a datetime64 or timedelta64 dtype from string entries (:issue:`41731`)

pandas/core/arrays/sparse/array.py

+9-8
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,8 @@ class ellipsis(Enum):
120120

121121
SparseIndexKind = Literal["integer", "block"]
122122

123+
from pandas.core.dtypes.dtypes import ExtensionDtype
124+
123125
from pandas import Series
124126

125127
else:
@@ -1328,14 +1330,13 @@ def astype(self, dtype: AstypeArg | None = None, copy: bool = True):
13281330
future_dtype = pandas_dtype(dtype)
13291331
if not isinstance(future_dtype, SparseDtype):
13301332
# GH#34457
1331-
warnings.warn(
1332-
"The behavior of .astype from SparseDtype to a non-sparse dtype "
1333-
"is deprecated. In a future version, this will return a non-sparse "
1334-
"array with the requested dtype. To retain the old behavior, use "
1335-
"`obj.astype(SparseDtype(dtype))`",
1336-
FutureWarning,
1337-
stacklevel=find_stack_level(),
1338-
)
1333+
if isinstance(future_dtype, np.dtype):
1334+
values = np.array(self)
1335+
return astype_nansafe(values, dtype=future_dtype)
1336+
else:
1337+
dtype = cast(ExtensionDtype, dtype)
1338+
cls = dtype.construct_array_type()
1339+
return cls._from_sequence(self, dtype=dtype, copy=copy)
13391340

13401341
dtype = self.dtype.update_dtype(dtype)
13411342
subtype = pandas_dtype(dtype._subtype_with_str)

pandas/tests/arrays/sparse/test_astype.py

+7-23
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,9 @@ def test_astype(self):
3939

4040
def test_astype_bool(self):
4141
a = SparseArray([1, 0, 0, 1], dtype=SparseDtype(int, 0))
42-
with tm.assert_produces_warning(FutureWarning, match="astype from Sparse"):
43-
result = a.astype(bool)
44-
expected = SparseArray(
45-
[True, False, False, True], dtype=SparseDtype(bool, False)
46-
)
47-
tm.assert_sp_array_equal(result, expected)
42+
result = a.astype(bool)
43+
expected = np.array([1, 0, 0, 1], dtype=bool)
44+
tm.assert_numpy_array_equal(result, expected)
4845

4946
# update fill value
5047
result = a.astype(SparseDtype(bool, False))
@@ -57,12 +54,8 @@ def test_astype_all(self, any_real_numpy_dtype):
5754
vals = np.array([1, 2, 3])
5855
arr = SparseArray(vals, fill_value=1)
5956
typ = np.dtype(any_real_numpy_dtype)
60-
with tm.assert_produces_warning(FutureWarning, match="astype from Sparse"):
61-
res = arr.astype(typ)
62-
assert res.dtype == SparseDtype(typ, 1)
63-
assert res.sp_values.dtype == typ
64-
65-
tm.assert_numpy_array_equal(np.asarray(res.to_dense()), vals.astype(typ))
57+
res = arr.astype(typ)
58+
tm.assert_numpy_array_equal(res, vals.astype(any_real_numpy_dtype))
6659

6760
@pytest.mark.parametrize(
6861
"arr, dtype, expected",
@@ -100,22 +93,13 @@ def test_astype_all(self, any_real_numpy_dtype):
10093
],
10194
)
10295
def test_astype_more(self, arr, dtype, expected):
103-
104-
if isinstance(dtype, SparseDtype):
105-
warn = None
106-
else:
107-
warn = FutureWarning
108-
109-
with tm.assert_produces_warning(warn, match="astype from SparseDtype"):
110-
result = arr.astype(dtype)
96+
result = arr.astype(arr.dtype.update_dtype(dtype))
11197
tm.assert_sp_array_equal(result, expected)
11298

11399
def test_astype_nan_raises(self):
114100
arr = SparseArray([1.0, np.nan])
115101
with pytest.raises(ValueError, match="Cannot convert non-finite"):
116-
msg = "astype from SparseDtype"
117-
with tm.assert_produces_warning(FutureWarning, match=msg):
118-
arr.astype(int)
102+
arr.astype(int)
119103

120104
def test_astype_copy_false(self):
121105
# GH#34456 bug caused by using .view instead of .astype in astype_nansafe

pandas/tests/extension/base/reshaping.py

+3-14
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import pytest
55

66
import pandas as pd
7-
import pandas._testing as tm
87
from pandas.api.extensions import ExtensionArray
98
from pandas.core.internals.blocks import EABackedBlock
109
from pandas.tests.extension.base.base import BaseExtensionTests
@@ -319,23 +318,13 @@ def test_unstack(self, data, index, obj):
319318
alt = df.unstack(level=level).droplevel(0, axis=1)
320319
self.assert_frame_equal(result, alt)
321320

322-
if obj == "series":
323-
is_sparse = isinstance(ser.dtype, pd.SparseDtype)
324-
else:
325-
is_sparse = isinstance(ser.dtypes.iat[0], pd.SparseDtype)
326-
warn = None if not is_sparse else FutureWarning
327-
with tm.assert_produces_warning(warn, match="astype from Sparse"):
328-
obj_ser = ser.astype(object)
321+
obj_ser = ser.astype(object)
329322

330323
expected = obj_ser.unstack(level=level, fill_value=data.dtype.na_value)
331-
if obj == "series" and not is_sparse:
332-
# GH#34457 SparseArray.astype(object) gives Sparse[object]
333-
# instead of np.dtype(object)
324+
if obj == "series":
334325
assert (expected.dtypes == object).all()
335326

336-
with tm.assert_produces_warning(warn, match="astype from Sparse"):
337-
result = result.astype(object)
338-
327+
result = result.astype(object)
339328
self.assert_frame_equal(result, expected)
340329

341330
def test_ravel(self, data):

pandas/tests/extension/test_sparse.py

+5-32
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@
1919

2020
from pandas.errors import PerformanceWarning
2121

22-
from pandas.core.dtypes.common import is_object_dtype
23-
2422
import pandas as pd
2523
from pandas import SparseDtype
2624
import pandas._testing as tm
@@ -159,10 +157,7 @@ def test_concat_mixed_dtypes(self, data):
159157
],
160158
)
161159
def test_stack(self, data, columns):
162-
with tm.assert_produces_warning(
163-
FutureWarning, check_stacklevel=False, match="astype from Sparse"
164-
):
165-
super().test_stack(data, columns)
160+
super().test_stack(data, columns)
166161

167162
def test_concat_columns(self, data, na_value):
168163
self._check_unsupported(data)
@@ -385,33 +380,11 @@ def test_equals(self, data, na_value, as_series, box):
385380

386381

387382
class TestCasting(BaseSparseTests, base.BaseCastingTests):
388-
def test_astype_object_series(self, all_data):
389-
# Unlike the base class, we do not expect the resulting Block
390-
# to be ObjectBlock / resulting array to be np.dtype("object")
391-
ser = pd.Series(all_data, name="A")
392-
with tm.assert_produces_warning(FutureWarning, match="astype from Sparse"):
393-
result = ser.astype(object)
394-
assert is_object_dtype(result.dtype)
395-
assert is_object_dtype(result._mgr.array.dtype)
396-
397-
def test_astype_object_frame(self, all_data):
398-
# Unlike the base class, we do not expect the resulting Block
399-
# to be ObjectBlock / resulting array to be np.dtype("object")
400-
df = pd.DataFrame({"A": all_data})
401-
402-
with tm.assert_produces_warning(FutureWarning, match="astype from Sparse"):
403-
result = df.astype(object)
404-
assert is_object_dtype(result._mgr.arrays[0].dtype)
405-
406-
# check that we can compare the dtypes
407-
comp = result.dtypes == df.dtypes
408-
assert not comp.any()
409-
410383
def test_astype_str(self, data):
411-
with tm.assert_produces_warning(FutureWarning, match="astype from Sparse"):
412-
result = pd.Series(data[:5]).astype(str)
413-
expected_dtype = SparseDtype(str, str(data.fill_value))
414-
expected = pd.Series([str(x) for x in data[:5]], dtype=expected_dtype)
384+
# pre-2.0 this would give a SparseDtype even if the user asked
385+
# for a non-sparse dtype.
386+
result = pd.Series(data[:5]).astype(str)
387+
expected = pd.Series([str(x) for x in data[:5]], dtype=object)
415388
self.assert_series_equal(result, expected)
416389

417390
@pytest.mark.xfail(raises=TypeError, reason="no sparse StringDtype")

pandas/tests/frame/methods/test_append.py

+1-6
Original file line numberDiff line numberDiff line change
@@ -241,12 +241,7 @@ def test_append_timestamps_aware_or_naive(self, tz_naive_fixture, timestamp):
241241
def test_other_dtypes(self, data, dtype, using_array_manager):
242242
df = DataFrame(data, dtype=dtype)
243243

244-
warn = None
245-
if using_array_manager and isinstance(dtype, pd.SparseDtype):
246-
warn = FutureWarning
247-
248-
with tm.assert_produces_warning(warn, match="astype from SparseDtype"):
249-
result = df._append(df.iloc[0]).iloc[-1]
244+
result = df._append(df.iloc[0]).iloc[-1]
250245

251246
expected = Series(data, name=0, dtype=dtype)
252247
tm.assert_series_equal(result, expected)

pandas/tests/indexing/test_loc.py

-4
Original file line numberDiff line numberDiff line change
@@ -1300,10 +1300,6 @@ def test_loc_getitem_time_object(self, frame_or_series):
13001300
@pytest.mark.parametrize("spmatrix_t", ["coo_matrix", "csc_matrix", "csr_matrix"])
13011301
@pytest.mark.parametrize("dtype", [np.int64, np.float64, complex])
13021302
@td.skip_if_no_scipy
1303-
@pytest.mark.filterwarnings(
1304-
# TODO(2.0): remove filtering; note only needed for using_array_manager
1305-
"ignore:The behavior of .astype from SparseDtype.*FutureWarning"
1306-
)
13071303
def test_loc_getitem_range_from_spmatrix(self, spmatrix_t, dtype):
13081304
import scipy.sparse
13091305

pandas/tests/series/test_ufunc.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -85,9 +85,7 @@ def test_binary_ufunc_with_index(flip, sparse, ufunc, arrays_for_binary_ufunc):
8585
name = "name" # op(pd.Series, array) preserves the name.
8686
series = pd.Series(a1, name=name)
8787

88-
warn = None if not sparse else FutureWarning
89-
with tm.assert_produces_warning(warn):
90-
other = pd.Index(a2, name=name).astype("int64")
88+
other = pd.Index(a2, name=name).astype("int64")
9189

9290
array_args = (a1, a2)
9391
series_args = (series, other) # ufunc(series, array)

0 commit comments

Comments
 (0)