pandas-dev · phofl · Apr 22, 2023 · Apr 12, 2023 · Apr 13, 2023 · Apr 13, 2023
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
@@ -5226,6 +5226,7 @@ See the `Full Documentation <https://github.com/wesm/feather>`__.
 Write to a feather file.
 
 .. ipython:: python
+   :okwarning:
 
    df.to_feather("example.feather")
 
@@ -5355,6 +5356,7 @@ Serializing a ``DataFrame`` to parquet may include the implicit index as one or
 more columns in the output file. Thus, this code:
 
 .. ipython:: python
+   :okwarning:
 
     df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
     df.to_parquet("test.parquet", engine="pyarrow")
@@ -5371,6 +5373,7 @@ If you want to omit a dataframe's indexes when writing, pass ``index=False`` to
 :func:`~pandas.DataFrame.to_parquet`:
 
 .. ipython:: python
+   :okwarning:
 
     df.to_parquet("test.parquet", index=False)
 
@@ -5393,6 +5396,7 @@ Partitioning Parquet files
 Parquet supports partitioning of data based on the values of one or more columns.
 
 .. ipython:: python
+   :okwarning:
 
     df = pd.DataFrame({"a": [0, 0, 1, 1], "b": [0, 1, 0, 1]})
     df.to_parquet(path="test", engine="pyarrow", partition_cols=["a"], compression=None)

diff --git a/doc/source/user_guide/scale.rst b/doc/source/user_guide/scale.rst
@@ -42,6 +42,7 @@ Suppose our raw dataset on disk has many columns::
 That can be generated by the following code snippet:
 
 .. ipython:: python
+   :okwarning:
 
    import pandas as pd
    import numpy as np
@@ -106,6 +107,7 @@ referred to as "low-cardinality" data). By using more efficient data types, you
 can store larger datasets in memory.
 
 .. ipython:: python
+   :okwarning:
 
    ts = make_timeseries(freq="30S", seed=0)
    ts.to_parquet("timeseries.parquet")
@@ -183,6 +185,7 @@ Suppose we have an even larger "logical dataset" on disk that's a directory of p
 files. Each file in the directory represents a different year of the entire dataset.
 
 .. ipython:: python
+   :okwarning:
 
    import pathlib
 

diff --git a/doc/source/whatsnew/v0.19.0.rst b/doc/source/whatsnew/v0.19.0.rst
@@ -905,6 +905,7 @@ As a consequence of this change, ``PeriodIndex`` no longer has an integer dtype:
 **New behavior**:
 
 .. ipython:: python
+   :okwarning:
 
    pi = pd.PeriodIndex(["2016-08-01"], freq="D")
    pi

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -234,6 +234,8 @@ Deprecations
 - Deprecated :func:`is_datetime64tz_dtype`, check ``isinstance(dtype, pd.DatetimeTZDtype)`` instead (:issue:`52607`)
 - Deprecated unused "closed" and "normalize" keywords in the :class:`DatetimeIndex` constructor (:issue:`52628`)
 - Deprecated unused "closed" keyword in the :class:`TimedeltaIndex` constructor (:issue:`52628`)
+- Deprecated :func:`is_period_dtype`, check ``isinstance(dtype, pd.PeriodDtype)`` instead (:issue:`52642`)
+- Deprecated :func:`is_sparse`, check ``isinstance(dtype, pd.SparseDtype)`` instead (:issue:`52642`)
 -
 
 .. ---------------------------------------------------------------------------

diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -137,7 +137,10 @@ def pytest_collection_modifyitems(items, config) -> None:
     ignored_doctest_warnings = [
         ("is_int64_dtype", "is_int64_dtype is deprecated"),
         ("is_interval_dtype", "is_interval_dtype is deprecated"),
+        ("is_period_dtype", "is_period_dtype is deprecated"),
         ("is_datetime64tz_dtype", "is_datetime64tz_dtype is deprecated"),
+        ("is_categorical_dtype", "is_categorical_dtype is deprecated"),
+        ("is_sparse", "is_sparse is deprecated"),
         # Docstring divides by zero to show behavior difference
         ("missing.mask_zero_div_zero", "divide by zero encountered"),
         (
@@ -149,7 +152,6 @@ def pytest_collection_modifyitems(items, config) -> None:
             "(Series|DataFrame).bool is now deprecated and will be removed "
             "in future version of pandas",
         ),
-        ("is_categorical_dtype", "is_categorical_dtype is deprecated"),
     ]
 
     for item in items:

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -55,7 +55,6 @@
     is_dtype_equal,
     is_float_dtype,
     is_object_dtype,
-    is_sparse,
     is_string_dtype,
     pandas_dtype,
 )
@@ -68,6 +67,7 @@
 
 from pandas.core.arrays import datetimelike as dtl
 from pandas.core.arrays._ranges import generate_regular_range
+from pandas.core.arrays.sparse.dtype import SparseDtype
 import pandas.core.common as com
 
 from pandas.tseries.frequencies import get_period_alias
@@ -2042,7 +2042,7 @@ def _sequence_to_dt64ns(
     if (
         is_object_dtype(data_dtype)
         or is_string_dtype(data_dtype)
-        or is_sparse(data_dtype)
+        or isinstance(data_dtype, SparseDtype)
     ):
         # TODO: We do not have tests specific to string-dtypes,
         #  also complex or categorical or other extension

diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
@@ -60,7 +60,6 @@
     is_dtype_equal,
     is_float_dtype,
     is_integer_dtype,
-    is_period_dtype,
     pandas_dtype,
 )
 from pandas.core.dtypes.dtypes import PeriodDtype
@@ -173,7 +172,9 @@ class PeriodArray(dtl.DatelikeOps, libperiod.PeriodMixin):  # type: ignore[misc]
     _typ = "periodarray"  # ABCPeriodArray
     _internal_fill_value = np.int64(iNaT)
     _recognized_scalars = (Period,)
-    _is_recognized_dtype = is_period_dtype  # check_compatible_with checks freq match
+    _is_recognized_dtype = lambda x: isinstance(
+        x, PeriodDtype
+    )  # check_compatible_with checks freq match
     _infer_matches = ("period",)
 
     @property

diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
@@ -207,6 +207,12 @@ def is_sparse(arr) -> bool:
 
     Returns `False` if the parameter has more than one dimension.
     """
+    warnings.warn(
+        "is_sparse is deprecated and will be removed in a future "
+        "version. Check `isinstance(dtype, pd.SparseDtype)` instead.",
+        FutureWarning,
+        stacklevel=find_stack_level(),
+    )
     from pandas.core.arrays.sparse import SparseDtype
 
     dtype = getattr(arr, "dtype", arr)
@@ -399,6 +405,12 @@ def is_period_dtype(arr_or_dtype) -> bool:
     >>> is_period_dtype(pd.PeriodIndex([], freq="A"))
     True
     """
+    warnings.warn(
+        "is_period_dtype is deprecated and will be removed in a future version. "
+        "Use `isinstance(dtype, pd.PeriodDtype)` instead",
+        FutureWarning,
+        stacklevel=find_stack_level(),
+    )
     if isinstance(arr_or_dtype, ExtensionDtype):
         # GH#33400 fastpath for dtype object
         return arr_or_dtype.type is Period
@@ -539,7 +551,7 @@ def is_string_dtype(arr_or_dtype) -> bool:
     >>> is_string_dtype(pd.Series([1, 2], dtype=object))
     False
     """
-    if hasattr(arr_or_dtype, "dtype") and get_dtype(arr_or_dtype).kind == "O":
+    if hasattr(arr_or_dtype, "dtype") and _get_dtype(arr_or_dtype).kind == "O":
         return is_all_strings(arr_or_dtype)
 
     def condition(dtype) -> bool:
@@ -585,7 +597,7 @@ def is_dtype_equal(source, target) -> bool:
             # GH#38516 ensure we get the same behavior from
             #  is_dtype_equal(CDT, "category") and CDT == "category"
             try:
-                src = get_dtype(source)
+                src = _get_dtype(source)
                 if isinstance(src, ExtensionDtype):
                     return src == target
             except (TypeError, AttributeError, ImportError):
@@ -594,8 +606,8 @@ def is_dtype_equal(source, target) -> bool:
         return is_dtype_equal(target, source)
 
     try:
-        source = get_dtype(source)
-        target = get_dtype(target)
+        source = _get_dtype(source)
+        target = _get_dtype(target)
         return source == target
     except (TypeError, AttributeError, ImportError):
         # invalid comparison
@@ -870,7 +882,7 @@ def is_datetime64_any_dtype(arr_or_dtype) -> bool:
         return False
 
     try:
-        tipo = get_dtype(arr_or_dtype)
+        tipo = _get_dtype(arr_or_dtype)
     except TypeError:
         return False
     return (isinstance(tipo, np.dtype) and tipo.kind == "M") or isinstance(
@@ -918,7 +930,7 @@ def is_datetime64_ns_dtype(arr_or_dtype) -> bool:
     if arr_or_dtype is None:
         return False
     try:
-        tipo = get_dtype(arr_or_dtype)
+        tipo = _get_dtype(arr_or_dtype)
     except TypeError:
         return False
     return tipo == DT64NS_DTYPE or (
@@ -1247,7 +1259,7 @@ def is_bool_dtype(arr_or_dtype) -> bool:
     if arr_or_dtype is None:
         return False
     try:
-        dtype = get_dtype(arr_or_dtype)
+        dtype = _get_dtype(arr_or_dtype)
     except (TypeError, ValueError):
         return False
 
@@ -1395,13 +1407,13 @@ def _is_dtype(arr_or_dtype, condition) -> bool:
     if arr_or_dtype is None:
         return False
     try:
-        dtype = get_dtype(arr_or_dtype)
+        dtype = _get_dtype(arr_or_dtype)
     except (TypeError, ValueError):
         return False
     return condition(dtype)
 
 
-def get_dtype(arr_or_dtype) -> DtypeObj:
+def _get_dtype(arr_or_dtype) -> DtypeObj:
     """
     Get the dtype instance associated with an array
     or dtype object.
@@ -1532,7 +1544,7 @@ def infer_dtype_from_object(dtype) -> type:
         try:
             return infer_dtype_from_object(getattr(np, dtype))
         except (AttributeError, TypeError):
-            # Handles cases like get_dtype(int) i.e.,
+            # Handles cases like _get_dtype(int) i.e.,
             # Python objects that are valid dtypes
             # (unlike user-defined types, in general)
             #
@@ -1676,7 +1688,6 @@ def is_all_strings(value: ArrayLike) -> bool:
     "ensure_float64",
     "ensure_python_int",
     "ensure_str",
-    "get_dtype",
     "infer_dtype_from_object",
     "INT64_DTYPE",
     "is_1d_only_ea_dtype",

diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py
@@ -177,6 +177,7 @@ def test_get_dtype_error_catch(func):
         or func is com.is_interval_dtype
         or func is com.is_datetime64tz_dtype
         or func is com.is_categorical_dtype
+        or func is com.is_period_dtype
     ):
         warn = FutureWarning
 
@@ -197,14 +198,16 @@ def test_is_object():
     "check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)]
 )
 def test_is_sparse(check_scipy):
-    assert com.is_sparse(SparseArray([1, 2, 3]))
+    msg = "is_sparse is deprecated"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        assert com.is_sparse(SparseArray([1, 2, 3]))
 
-    assert not com.is_sparse(np.array([1, 2, 3]))
+        assert not com.is_sparse(np.array([1, 2, 3]))
 
-    if check_scipy:
-        import scipy.sparse
+        if check_scipy:
+            import scipy.sparse
 
-        assert not com.is_sparse(scipy.sparse.bsr_matrix([1, 2, 3]))
+            assert not com.is_sparse(scipy.sparse.bsr_matrix([1, 2, 3]))
 
 
 @td.skip_if_no_scipy
@@ -264,12 +267,14 @@ def test_is_timedelta64_dtype():
 
 
 def test_is_period_dtype():
-    assert not com.is_period_dtype(object)
-    assert not com.is_period_dtype([1, 2, 3])
-    assert not com.is_period_dtype(pd.Period("2017-01-01"))
+    msg = "is_period_dtype is deprecated"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        assert not com.is_period_dtype(object)
+        assert not com.is_period_dtype([1, 2, 3])
+        assert not com.is_period_dtype(pd.Period("2017-01-01"))
 
-    assert com.is_period_dtype(PeriodDtype(freq="D"))
-    assert com.is_period_dtype(pd.PeriodIndex([], freq="A"))
+        assert com.is_period_dtype(PeriodDtype(freq="D"))
+        assert com.is_period_dtype(pd.PeriodIndex([], freq="A"))
 
 
 def test_is_interval_dtype():
@@ -695,7 +700,7 @@ def test_is_complex_dtype():
     ],
 )
 def test_get_dtype(input_param, result):
-    assert com.get_dtype(input_param) == result
+    assert com._get_dtype(input_param) == result
 
 
 @pytest.mark.parametrize(
@@ -714,7 +719,7 @@ def test_get_dtype_fails(input_param, expected_error_message):
     # 2020-02-02 npdev changed error message
     expected_error_message += f"|Cannot interpret '{input_param}' as a data type"
     with pytest.raises(TypeError, match=expected_error_message):
-        com.get_dtype(input_param)
+        com._get_dtype(input_param)
 
 
 @pytest.mark.parametrize(

diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py
@@ -426,12 +426,10 @@ def test_construction(self):
         for s in ["period[D]", "Period[D]", "D"]:
             dt = PeriodDtype(s)
             assert dt.freq == pd.tseries.offsets.Day()
-            assert is_period_dtype(dt)
 
         for s in ["period[3D]", "Period[3D]", "3D"]:
             dt = PeriodDtype(s)
             assert dt.freq == pd.tseries.offsets.Day(3)
-            assert is_period_dtype(dt)
 
         for s in [
             "period[26H]",
@@ -443,7 +441,6 @@ def test_construction(self):
         ]:
             dt = PeriodDtype(s)
             assert dt.freq == pd.tseries.offsets.Hour(26)
-            assert is_period_dtype(dt)
 
     def test_cannot_use_custom_businessday(self):
         # GH#52534
@@ -529,20 +526,22 @@ def test_equality(self, dtype):
         assert not is_dtype_equal(PeriodDtype("D"), PeriodDtype("2D"))
 
     def test_basic(self, dtype):
-        assert is_period_dtype(dtype)
+        msg = "is_period_dtype is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            assert is_period_dtype(dtype)
 
-        pidx = pd.period_range("2013-01-01 09:00", periods=5, freq="H")
+            pidx = pd.period_range("2013-01-01 09:00", periods=5, freq="H")
 
-        assert is_period_dtype(pidx.dtype)
-        assert is_period_dtype(pidx)
+            assert is_period_dtype(pidx.dtype)
+            assert is_period_dtype(pidx)
 
-        s = Series(pidx, name="A")
+            s = Series(pidx, name="A")
 
-        assert is_period_dtype(s.dtype)
-        assert is_period_dtype(s)
+            assert is_period_dtype(s.dtype)
+            assert is_period_dtype(s)
 
-        assert not is_period_dtype(np.dtype("float64"))
-        assert not is_period_dtype(1.0)
+            assert not is_period_dtype(np.dtype("float64"))
+            assert not is_period_dtype(1.0)
 
     def test_freq_argument_required(self):
         # GH#27388
@@ -1131,6 +1130,7 @@ def test_is_dtype_no_warning(check):
         check is is_categorical_dtype
         or check is is_interval_dtype
         or check is is_datetime64tz_dtype
+        or check is is_period_dtype
     ):
         warn = FutureWarning
 

diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py
@@ -3,7 +3,6 @@
 
 import pandas as pd
 import pandas._testing as tm
-from pandas.api.types import is_sparse
 from pandas.tests.extension.base.base import BaseExtensionTests
 
 
@@ -28,7 +27,7 @@ def test_isna_returns_copy(self, data_missing, na_func):
         result = pd.Series(data_missing)
         expected = result.copy()
         mask = getattr(result, na_func)()
-        if is_sparse(mask):
+        if isinstance(mask.dtype, pd.SparseDtype):
             mask = np.array(mask)
 
         mask[:] = True