diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index c34d5f3c467a2..1405658cb9ba5 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -5226,6 +5226,7 @@ See the `Full Documentation `__. Write to a feather file. .. ipython:: python + :okwarning: df.to_feather("example.feather") @@ -5355,6 +5356,7 @@ Serializing a ``DataFrame`` to parquet may include the implicit index as one or more columns in the output file. Thus, this code: .. ipython:: python + :okwarning: df = pd.DataFrame({"a": [1, 2], "b": [3, 4]}) df.to_parquet("test.parquet", engine="pyarrow") @@ -5371,6 +5373,7 @@ If you want to omit a dataframe's indexes when writing, pass ``index=False`` to :func:`~pandas.DataFrame.to_parquet`: .. ipython:: python + :okwarning: df.to_parquet("test.parquet", index=False) @@ -5393,6 +5396,7 @@ Partitioning Parquet files Parquet supports partitioning of data based on the values of one or more columns. .. ipython:: python + :okwarning: df = pd.DataFrame({"a": [0, 0, 1, 1], "b": [0, 1, 0, 1]}) df.to_parquet(path="test", engine="pyarrow", partition_cols=["a"], compression=None) diff --git a/doc/source/user_guide/scale.rst b/doc/source/user_guide/scale.rst index 182f1ca39e0a8..261852692ab2f 100644 --- a/doc/source/user_guide/scale.rst +++ b/doc/source/user_guide/scale.rst @@ -42,6 +42,7 @@ Suppose our raw dataset on disk has many columns:: That can be generated by the following code snippet: .. ipython:: python + :okwarning: import pandas as pd import numpy as np @@ -106,6 +107,7 @@ referred to as "low-cardinality" data). By using more efficient data types, you can store larger datasets in memory. .. ipython:: python + :okwarning: ts = make_timeseries(freq="30S", seed=0) ts.to_parquet("timeseries.parquet") @@ -183,6 +185,7 @@ Suppose we have an even larger "logical dataset" on disk that's a directory of p files. Each file in the directory represents a different year of the entire dataset. .. ipython:: python + :okwarning: import pathlib diff --git a/doc/source/whatsnew/v0.19.0.rst b/doc/source/whatsnew/v0.19.0.rst index ab17cacd830e5..d4b879f137698 100644 --- a/doc/source/whatsnew/v0.19.0.rst +++ b/doc/source/whatsnew/v0.19.0.rst @@ -905,6 +905,7 @@ As a consequence of this change, ``PeriodIndex`` no longer has an integer dtype: **New behavior**: .. ipython:: python + :okwarning: pi = pd.PeriodIndex(["2016-08-01"], freq="D") pi diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 22a2931519ffd..e11ab88c3cf62 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -231,6 +231,8 @@ Deprecations - Deprecated :func:`is_datetime64tz_dtype`, check ``isinstance(dtype, pd.DatetimeTZDtype)`` instead (:issue:`52607`) - Deprecated :func:`is_int64_dtype`, check ``dtype == np.dtype(np.int64)`` instead (:issue:`52564`) - Deprecated :func:`is_interval_dtype`, check ``isinstance(dtype, pd.IntervalDtype)`` instead (:issue:`52607`) +- Deprecated :func:`is_period_dtype`, check ``isinstance(dtype, pd.PeriodDtype)`` instead (:issue:`52642`) +- Deprecated :func:`is_sparse`, check ``isinstance(dtype, pd.SparseDtype)`` instead (:issue:`52642`) - Deprecated :meth:`DataFrame.applymap`. Use the new :meth:`DataFrame.map` method instead (:issue:`52353`) - Deprecated :meth:`DataFrame.swapaxes` and :meth:`Series.swapaxes`, use :meth:`DataFrame.transpose` or :meth:`Series.transpose` instead (:issue:`51946`) - Deprecated ``freq`` parameter in :class:`PeriodArray` constructor, pass ``dtype`` instead (:issue:`52462`) diff --git a/pandas/conftest.py b/pandas/conftest.py index 7773d8de37705..77d2f4802c08f 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -137,7 +137,10 @@ def pytest_collection_modifyitems(items, config) -> None: ignored_doctest_warnings = [ ("is_int64_dtype", "is_int64_dtype is deprecated"), ("is_interval_dtype", "is_interval_dtype is deprecated"), + ("is_period_dtype", "is_period_dtype is deprecated"), ("is_datetime64tz_dtype", "is_datetime64tz_dtype is deprecated"), + ("is_categorical_dtype", "is_categorical_dtype is deprecated"), + ("is_sparse", "is_sparse is deprecated"), # Docstring divides by zero to show behavior difference ("missing.mask_zero_div_zero", "divide by zero encountered"), ( @@ -149,7 +152,6 @@ def pytest_collection_modifyitems(items, config) -> None: "(Series|DataFrame).bool is now deprecated and will be removed " "in future version of pandas", ), - ("is_categorical_dtype", "is_categorical_dtype is deprecated"), ] for item in items: diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index a6ef01c3a956f..992c75a6300a1 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -53,7 +53,6 @@ is_datetime64_any_dtype, is_dtype_equal, is_float_dtype, - is_sparse, is_string_dtype, pandas_dtype, ) @@ -66,6 +65,7 @@ from pandas.core.arrays import datetimelike as dtl from pandas.core.arrays._ranges import generate_regular_range +from pandas.core.arrays.sparse.dtype import SparseDtype import pandas.core.common as com from pandas.tseries.frequencies import get_period_alias @@ -2037,7 +2037,11 @@ def _sequence_to_dt64ns( if out_unit is not None: out_dtype = np.dtype(f"M8[{out_unit}]") - if data_dtype == object or is_string_dtype(data_dtype) or is_sparse(data_dtype): + if ( + data_dtype == object + or is_string_dtype(data_dtype) + or isinstance(data_dtype, SparseDtype) + ): # TODO: We do not have tests specific to string-dtypes, # also complex or categorical or other extension copy = False diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 6710f092277fd..5421d3b35e8c4 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -57,7 +57,6 @@ ensure_object, is_datetime64_any_dtype, is_dtype_equal, - is_period_dtype, pandas_dtype, ) from pandas.core.dtypes.dtypes import PeriodDtype @@ -170,7 +169,9 @@ class PeriodArray(dtl.DatelikeOps, libperiod.PeriodMixin): # type: ignore[misc] _typ = "periodarray" # ABCPeriodArray _internal_fill_value = np.int64(iNaT) _recognized_scalars = (Period,) - _is_recognized_dtype = is_period_dtype # check_compatible_with checks freq match + _is_recognized_dtype = lambda x: isinstance( + x, PeriodDtype + ) # check_compatible_with checks freq match _infer_matches = ("period",) @property diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 67fb5a81ecabe..f686a8f2960e5 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -207,6 +207,12 @@ def is_sparse(arr) -> bool: Returns `False` if the parameter has more than one dimension. """ + warnings.warn( + "is_sparse is deprecated and will be removed in a future " + "version. Check `isinstance(dtype, pd.SparseDtype)` instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) from pandas.core.arrays.sparse import SparseDtype dtype = getattr(arr, "dtype", arr) @@ -399,6 +405,12 @@ def is_period_dtype(arr_or_dtype) -> bool: >>> is_period_dtype(pd.PeriodIndex([], freq="A")) True """ + warnings.warn( + "is_period_dtype is deprecated and will be removed in a future version. " + "Use `isinstance(dtype, pd.PeriodDtype)` instead", + FutureWarning, + stacklevel=find_stack_level(), + ) if isinstance(arr_or_dtype, ExtensionDtype): # GH#33400 fastpath for dtype object return arr_or_dtype.type is Period @@ -539,7 +551,7 @@ def is_string_dtype(arr_or_dtype) -> bool: >>> is_string_dtype(pd.Series([1, 2], dtype=object)) False """ - if hasattr(arr_or_dtype, "dtype") and get_dtype(arr_or_dtype).kind == "O": + if hasattr(arr_or_dtype, "dtype") and _get_dtype(arr_or_dtype).kind == "O": return is_all_strings(arr_or_dtype) def condition(dtype) -> bool: @@ -585,7 +597,7 @@ def is_dtype_equal(source, target) -> bool: # GH#38516 ensure we get the same behavior from # is_dtype_equal(CDT, "category") and CDT == "category" try: - src = get_dtype(source) + src = _get_dtype(source) if isinstance(src, ExtensionDtype): return src == target except (TypeError, AttributeError, ImportError): @@ -594,8 +606,8 @@ def is_dtype_equal(source, target) -> bool: return is_dtype_equal(target, source) try: - source = get_dtype(source) - target = get_dtype(target) + source = _get_dtype(source) + target = _get_dtype(target) return source == target except (TypeError, AttributeError, ImportError): # invalid comparison @@ -870,7 +882,7 @@ def is_datetime64_any_dtype(arr_or_dtype) -> bool: return False try: - tipo = get_dtype(arr_or_dtype) + tipo = _get_dtype(arr_or_dtype) except TypeError: return False return (isinstance(tipo, np.dtype) and tipo.kind == "M") or isinstance( @@ -918,7 +930,7 @@ def is_datetime64_ns_dtype(arr_or_dtype) -> bool: if arr_or_dtype is None: return False try: - tipo = get_dtype(arr_or_dtype) + tipo = _get_dtype(arr_or_dtype) except TypeError: return False return tipo == DT64NS_DTYPE or ( @@ -1209,7 +1221,7 @@ def is_bool_dtype(arr_or_dtype) -> bool: if arr_or_dtype is None: return False try: - dtype = get_dtype(arr_or_dtype) + dtype = _get_dtype(arr_or_dtype) except (TypeError, ValueError): return False @@ -1368,13 +1380,13 @@ def _is_dtype(arr_or_dtype, condition) -> bool: if arr_or_dtype is None: return False try: - dtype = get_dtype(arr_or_dtype) + dtype = _get_dtype(arr_or_dtype) except (TypeError, ValueError): return False return condition(dtype) -def get_dtype(arr_or_dtype) -> DtypeObj: +def _get_dtype(arr_or_dtype) -> DtypeObj: """ Get the dtype instance associated with an array or dtype object. @@ -1505,7 +1517,7 @@ def infer_dtype_from_object(dtype) -> type: try: return infer_dtype_from_object(getattr(np, dtype)) except (AttributeError, TypeError): - # Handles cases like get_dtype(int) i.e., + # Handles cases like _get_dtype(int) i.e., # Python objects that are valid dtypes # (unlike user-defined types, in general) # @@ -1648,7 +1660,6 @@ def is_all_strings(value: ArrayLike) -> bool: "ensure_float64", "ensure_python_int", "ensure_str", - "get_dtype", "infer_dtype_from_object", "INT64_DTYPE", "is_1d_only_ea_dtype", diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index a28a5b62f4ad0..85fbac186b369 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -177,6 +177,7 @@ def test_get_dtype_error_catch(func): or func is com.is_interval_dtype or func is com.is_datetime64tz_dtype or func is com.is_categorical_dtype + or func is com.is_period_dtype ): warn = FutureWarning @@ -197,14 +198,16 @@ def test_is_object(): "check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)] ) def test_is_sparse(check_scipy): - assert com.is_sparse(SparseArray([1, 2, 3])) + msg = "is_sparse is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + assert com.is_sparse(SparseArray([1, 2, 3])) - assert not com.is_sparse(np.array([1, 2, 3])) + assert not com.is_sparse(np.array([1, 2, 3])) - if check_scipy: - import scipy.sparse + if check_scipy: + import scipy.sparse - assert not com.is_sparse(scipy.sparse.bsr_matrix([1, 2, 3])) + assert not com.is_sparse(scipy.sparse.bsr_matrix([1, 2, 3])) @td.skip_if_no_scipy @@ -264,12 +267,14 @@ def test_is_timedelta64_dtype(): def test_is_period_dtype(): - assert not com.is_period_dtype(object) - assert not com.is_period_dtype([1, 2, 3]) - assert not com.is_period_dtype(pd.Period("2017-01-01")) + msg = "is_period_dtype is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + assert not com.is_period_dtype(object) + assert not com.is_period_dtype([1, 2, 3]) + assert not com.is_period_dtype(pd.Period("2017-01-01")) - assert com.is_period_dtype(PeriodDtype(freq="D")) - assert com.is_period_dtype(pd.PeriodIndex([], freq="A")) + assert com.is_period_dtype(PeriodDtype(freq="D")) + assert com.is_period_dtype(pd.PeriodIndex([], freq="A")) def test_is_interval_dtype(): @@ -681,7 +686,7 @@ def test_is_complex_dtype(): ], ) def test_get_dtype(input_param, result): - assert com.get_dtype(input_param) == result + assert com._get_dtype(input_param) == result @pytest.mark.parametrize( @@ -700,7 +705,7 @@ def test_get_dtype_fails(input_param, expected_error_message): # 2020-02-02 npdev changed error message expected_error_message += f"|Cannot interpret '{input_param}' as a data type" with pytest.raises(TypeError, match=expected_error_message): - com.get_dtype(input_param) + com._get_dtype(input_param) @pytest.mark.parametrize( diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 077cbbc72e048..5b0fcd5383059 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -427,12 +427,10 @@ def test_construction(self): for s in ["period[D]", "Period[D]", "D"]: dt = PeriodDtype(s) assert dt.freq == pd.tseries.offsets.Day() - assert is_period_dtype(dt) for s in ["period[3D]", "Period[3D]", "3D"]: dt = PeriodDtype(s) assert dt.freq == pd.tseries.offsets.Day(3) - assert is_period_dtype(dt) for s in [ "period[26H]", @@ -444,7 +442,6 @@ def test_construction(self): ]: dt = PeriodDtype(s) assert dt.freq == pd.tseries.offsets.Hour(26) - assert is_period_dtype(dt) def test_cannot_use_custom_businessday(self): # GH#52534 @@ -530,20 +527,22 @@ def test_equality(self, dtype): assert not is_dtype_equal(PeriodDtype("D"), PeriodDtype("2D")) def test_basic(self, dtype): - assert is_period_dtype(dtype) + msg = "is_period_dtype is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + assert is_period_dtype(dtype) - pidx = pd.period_range("2013-01-01 09:00", periods=5, freq="H") + pidx = pd.period_range("2013-01-01 09:00", periods=5, freq="H") - assert is_period_dtype(pidx.dtype) - assert is_period_dtype(pidx) + assert is_period_dtype(pidx.dtype) + assert is_period_dtype(pidx) - s = Series(pidx, name="A") + s = Series(pidx, name="A") - assert is_period_dtype(s.dtype) - assert is_period_dtype(s) + assert is_period_dtype(s.dtype) + assert is_period_dtype(s) - assert not is_period_dtype(np.dtype("float64")) - assert not is_period_dtype(1.0) + assert not is_period_dtype(np.dtype("float64")) + assert not is_period_dtype(1.0) def test_freq_argument_required(self): # GH#27388 @@ -1132,6 +1131,7 @@ def test_is_dtype_no_warning(check): check is is_categorical_dtype or check is is_interval_dtype or check is is_datetime64tz_dtype + or check is is_period_dtype ): warn = FutureWarning diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py index cab81f864d8d8..8a53c06e0b7bf 100644 --- a/pandas/tests/extension/base/missing.py +++ b/pandas/tests/extension/base/missing.py @@ -3,7 +3,6 @@ import pandas as pd import pandas._testing as tm -from pandas.api.types import is_sparse from pandas.tests.extension.base.base import BaseExtensionTests @@ -28,7 +27,7 @@ def test_isna_returns_copy(self, data_missing, na_func): result = pd.Series(data_missing) expected = result.copy() mask = getattr(result, na_func)() - if is_sparse(mask): + if isinstance(mask.dtype, pd.SparseDtype): mask = np.array(mask) mask[:] = True