diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 62d56f684a11d..64c8f06349449 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -423,6 +423,7 @@ Reshaping Sparse ^^^^^^ +- Bug in :class:`SparseDtype` constructor failing to raise ``TypeError`` when given an incompatible ``dtype`` for its subtype, which must be a ``numpy`` dtype (:issue:`53160`) - Bug in :meth:`arrays.SparseArray.map` allowed the fill value to be included in the sparse values (:issue:`52095`) - diff --git a/pandas/core/arrays/sparse/dtype.py b/pandas/core/arrays/sparse/dtype.py index dadd161ceeb38..5747ff807600d 100644 --- a/pandas/core/arrays/sparse/dtype.py +++ b/pandas/core/arrays/sparse/dtype.py @@ -91,6 +91,9 @@ def __init__(self, dtype: Dtype = np.float64, fill_value: Any = None) -> None: dtype = pandas_dtype(dtype) if is_string_dtype(dtype): dtype = np.dtype("object") + if not isinstance(dtype, np.dtype): + # GH#53160 + raise TypeError("SparseDtype subtype must be a numpy dtype") if fill_value is None: fill_value = na_value_for_dtype(dtype) diff --git a/pandas/tests/arrays/sparse/test_astype.py b/pandas/tests/arrays/sparse/test_astype.py index 86d69610059b3..d729a31668ade 100644 --- a/pandas/tests/arrays/sparse/test_astype.py +++ b/pandas/tests/arrays/sparse/test_astype.py @@ -3,11 +3,7 @@ from pandas._libs.sparse import IntIndex -from pandas import ( - DataFrame, - Series, - Timestamp, -) +from pandas import Timestamp import pandas._testing as tm from pandas.core.arrays.sparse import ( SparseArray, @@ -135,13 +131,3 @@ def test_astype_dt64_to_int64(self): arr3 = SparseArray(values, dtype=dtype) result3 = arr3.astype("int64") tm.assert_numpy_array_equal(result3, expected) - - -def test_dtype_sparse_with_fill_value_not_present_in_data(): - # GH 49987 - df = DataFrame([["a", 0], ["b", 1], ["b", 2]], columns=["A", "B"]) - result = df["A"].astype(SparseDtype("category", fill_value="c")) - expected = Series( - ["a", "b", "b"], name="A", dtype=SparseDtype("object", fill_value="c") - ) - tm.assert_series_equal(result, expected) diff --git a/pandas/tests/arrays/sparse/test_dtype.py b/pandas/tests/arrays/sparse/test_dtype.py index 58fedbd3e4231..88f8577ded5b0 100644 --- a/pandas/tests/arrays/sparse/test_dtype.py +++ b/pandas/tests/arrays/sparse/test_dtype.py @@ -207,3 +207,10 @@ def test_repr(): result = str(SparseDtype(object, fill_value="0")) expected = "Sparse[object, '0']" assert result == expected + + +def test_sparse_dtype_subtype_must_be_numpy_dtype(): + # GH#53160 + msg = "SparseDtype subtype must be a numpy dtype" + with pytest.raises(TypeError, match=msg): + SparseDtype("category", fill_value="c")