Skip to content

Commit e38d786

Browse files
authored
DEPR: is_period_dtype, is_sparse (#52642)
1 parent 681af4c commit e38d786

File tree

11 files changed

+74
-42
lines changed

11 files changed

+74
-42
lines changed

doc/source/user_guide/io.rst

+4
Original file line numberDiff line numberDiff line change
@@ -5239,6 +5239,7 @@ See the `Full Documentation <https://github.com/wesm/feather>`__.
52395239
Write to a feather file.
52405240

52415241
.. ipython:: python
5242+
:okwarning:
52425243
52435244
df.to_feather("example.feather")
52445245
@@ -5382,6 +5383,7 @@ Serializing a ``DataFrame`` to parquet may include the implicit index as one or
53825383
more columns in the output file. Thus, this code:
53835384

53845385
.. ipython:: python
5386+
:okwarning:
53855387
53865388
df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
53875389
df.to_parquet("test.parquet", engine="pyarrow")
@@ -5398,6 +5400,7 @@ If you want to omit a dataframe's indexes when writing, pass ``index=False`` to
53985400
:func:`~pandas.DataFrame.to_parquet`:
53995401

54005402
.. ipython:: python
5403+
:okwarning:
54015404
54025405
df.to_parquet("test.parquet", index=False)
54035406
@@ -5420,6 +5423,7 @@ Partitioning Parquet files
54205423
Parquet supports partitioning of data based on the values of one or more columns.
54215424

54225425
.. ipython:: python
5426+
:okwarning:
54235427
54245428
df = pd.DataFrame({"a": [0, 0, 1, 1], "b": [0, 1, 0, 1]})
54255429
df.to_parquet(path="test", engine="pyarrow", partition_cols=["a"], compression=None)

doc/source/user_guide/scale.rst

+3
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ Suppose our raw dataset on disk has many columns::
4242
That can be generated by the following code snippet:
4343

4444
.. ipython:: python
45+
:okwarning:
4546
4647
import pandas as pd
4748
import numpy as np
@@ -106,6 +107,7 @@ referred to as "low-cardinality" data). By using more efficient data types, you
106107
can store larger datasets in memory.
107108

108109
.. ipython:: python
110+
:okwarning:
109111
110112
ts = make_timeseries(freq="30S", seed=0)
111113
ts.to_parquet("timeseries.parquet")
@@ -183,6 +185,7 @@ Suppose we have an even larger "logical dataset" on disk that's a directory of p
183185
files. Each file in the directory represents a different year of the entire dataset.
184186

185187
.. ipython:: python
188+
:okwarning:
186189
187190
import pathlib
188191

doc/source/whatsnew/v0.19.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -905,6 +905,7 @@ As a consequence of this change, ``PeriodIndex`` no longer has an integer dtype:
905905
**New behavior**:
906906

907907
.. ipython:: python
908+
:okwarning:
908909
909910
pi = pd.PeriodIndex(["2016-08-01"], freq="D")
910911
pi

doc/source/whatsnew/v2.1.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,8 @@ Deprecations
234234
- Deprecated :func:`is_datetime64tz_dtype`, check ``isinstance(dtype, pd.DatetimeTZDtype)`` instead (:issue:`52607`)
235235
- Deprecated :func:`is_int64_dtype`, check ``dtype == np.dtype(np.int64)`` instead (:issue:`52564`)
236236
- Deprecated :func:`is_interval_dtype`, check ``isinstance(dtype, pd.IntervalDtype)`` instead (:issue:`52607`)
237+
- Deprecated :func:`is_period_dtype`, check ``isinstance(dtype, pd.PeriodDtype)`` instead (:issue:`52642`)
238+
- Deprecated :func:`is_sparse`, check ``isinstance(dtype, pd.SparseDtype)`` instead (:issue:`52642`)
237239
- Deprecated :meth:`DataFrame.applymap`. Use the new :meth:`DataFrame.map` method instead (:issue:`52353`)
238240
- Deprecated :meth:`DataFrame.swapaxes` and :meth:`Series.swapaxes`, use :meth:`DataFrame.transpose` or :meth:`Series.transpose` instead (:issue:`51946`)
239241
- Deprecated ``freq`` parameter in :class:`PeriodArray` constructor, pass ``dtype`` instead (:issue:`52462`)

pandas/conftest.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,10 @@ def pytest_collection_modifyitems(items, config) -> None:
137137
ignored_doctest_warnings = [
138138
("is_int64_dtype", "is_int64_dtype is deprecated"),
139139
("is_interval_dtype", "is_interval_dtype is deprecated"),
140+
("is_period_dtype", "is_period_dtype is deprecated"),
140141
("is_datetime64tz_dtype", "is_datetime64tz_dtype is deprecated"),
142+
("is_categorical_dtype", "is_categorical_dtype is deprecated"),
143+
("is_sparse", "is_sparse is deprecated"),
141144
# Docstring divides by zero to show behavior difference
142145
("missing.mask_zero_div_zero", "divide by zero encountered"),
143146
(
@@ -149,7 +152,6 @@ def pytest_collection_modifyitems(items, config) -> None:
149152
"(Series|DataFrame).bool is now deprecated and will be removed "
150153
"in future version of pandas",
151154
),
152-
("is_categorical_dtype", "is_categorical_dtype is deprecated"),
153155
]
154156

155157
for item in items:

pandas/core/arrays/datetimes.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@
5252
is_bool_dtype,
5353
is_dtype_equal,
5454
is_float_dtype,
55-
is_sparse,
5655
is_string_dtype,
5756
pandas_dtype,
5857
)
@@ -65,6 +64,7 @@
6564

6665
from pandas.core.arrays import datetimelike as dtl
6766
from pandas.core.arrays._ranges import generate_regular_range
67+
from pandas.core.arrays.sparse.dtype import SparseDtype
6868
import pandas.core.common as com
6969

7070
from pandas.tseries.frequencies import get_period_alias
@@ -2038,7 +2038,11 @@ def _sequence_to_dt64ns(
20382038
if out_unit is not None:
20392039
out_dtype = np.dtype(f"M8[{out_unit}]")
20402040

2041-
if data_dtype == object or is_string_dtype(data_dtype) or is_sparse(data_dtype):
2041+
if (
2042+
data_dtype == object
2043+
or is_string_dtype(data_dtype)
2044+
or isinstance(data_dtype, SparseDtype)
2045+
):
20422046
# TODO: We do not have tests specific to string-dtypes,
20432047
# also complex or categorical or other extension
20442048
copy = False

pandas/core/arrays/period.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@
5656
from pandas.core.dtypes.common import (
5757
ensure_object,
5858
is_dtype_equal,
59-
is_period_dtype,
6059
pandas_dtype,
6160
)
6261
from pandas.core.dtypes.dtypes import (
@@ -172,7 +171,9 @@ class PeriodArray(dtl.DatelikeOps, libperiod.PeriodMixin): # type: ignore[misc]
172171
_typ = "periodarray" # ABCPeriodArray
173172
_internal_fill_value = np.int64(iNaT)
174173
_recognized_scalars = (Period,)
175-
_is_recognized_dtype = is_period_dtype # check_compatible_with checks freq match
174+
_is_recognized_dtype = lambda x: isinstance(
175+
x, PeriodDtype
176+
) # check_compatible_with checks freq match
176177
_infer_matches = ("period",)
177178

178179
@property

pandas/core/dtypes/common.py

+22-11
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,12 @@ def is_sparse(arr) -> bool:
207207
208208
Returns `False` if the parameter has more than one dimension.
209209
"""
210+
warnings.warn(
211+
"is_sparse is deprecated and will be removed in a future "
212+
"version. Check `isinstance(dtype, pd.SparseDtype)` instead.",
213+
FutureWarning,
214+
stacklevel=find_stack_level(),
215+
)
210216
from pandas.core.arrays.sparse import SparseDtype
211217

212218
dtype = getattr(arr, "dtype", arr)
@@ -399,6 +405,12 @@ def is_period_dtype(arr_or_dtype) -> bool:
399405
>>> is_period_dtype(pd.PeriodIndex([], freq="A"))
400406
True
401407
"""
408+
warnings.warn(
409+
"is_period_dtype is deprecated and will be removed in a future version. "
410+
"Use `isinstance(dtype, pd.PeriodDtype)` instead",
411+
FutureWarning,
412+
stacklevel=find_stack_level(),
413+
)
402414
if isinstance(arr_or_dtype, ExtensionDtype):
403415
# GH#33400 fastpath for dtype object
404416
return arr_or_dtype.type is Period
@@ -539,7 +551,7 @@ def is_string_dtype(arr_or_dtype) -> bool:
539551
>>> is_string_dtype(pd.Series([1, 2], dtype=object))
540552
False
541553
"""
542-
if hasattr(arr_or_dtype, "dtype") and get_dtype(arr_or_dtype).kind == "O":
554+
if hasattr(arr_or_dtype, "dtype") and _get_dtype(arr_or_dtype).kind == "O":
543555
return is_all_strings(arr_or_dtype)
544556

545557
def condition(dtype) -> bool:
@@ -585,7 +597,7 @@ def is_dtype_equal(source, target) -> bool:
585597
# GH#38516 ensure we get the same behavior from
586598
# is_dtype_equal(CDT, "category") and CDT == "category"
587599
try:
588-
src = get_dtype(source)
600+
src = _get_dtype(source)
589601
if isinstance(src, ExtensionDtype):
590602
return src == target
591603
except (TypeError, AttributeError, ImportError):
@@ -594,8 +606,8 @@ def is_dtype_equal(source, target) -> bool:
594606
return is_dtype_equal(target, source)
595607

596608
try:
597-
source = get_dtype(source)
598-
target = get_dtype(target)
609+
source = _get_dtype(source)
610+
target = _get_dtype(target)
599611
return source == target
600612
except (TypeError, AttributeError, ImportError):
601613
# invalid comparison
@@ -875,7 +887,7 @@ def is_datetime64_any_dtype(arr_or_dtype) -> bool:
875887
return False
876888

877889
try:
878-
tipo = get_dtype(arr_or_dtype)
890+
tipo = _get_dtype(arr_or_dtype)
879891
except TypeError:
880892
return False
881893
return (isinstance(tipo, np.dtype) and tipo.kind == "M") or isinstance(
@@ -923,7 +935,7 @@ def is_datetime64_ns_dtype(arr_or_dtype) -> bool:
923935
if arr_or_dtype is None:
924936
return False
925937
try:
926-
tipo = get_dtype(arr_or_dtype)
938+
tipo = _get_dtype(arr_or_dtype)
927939
except TypeError:
928940
return False
929941
return tipo == DT64NS_DTYPE or (
@@ -1214,7 +1226,7 @@ def is_bool_dtype(arr_or_dtype) -> bool:
12141226
if arr_or_dtype is None:
12151227
return False
12161228
try:
1217-
dtype = get_dtype(arr_or_dtype)
1229+
dtype = _get_dtype(arr_or_dtype)
12181230
except (TypeError, ValueError):
12191231
return False
12201232

@@ -1373,13 +1385,13 @@ def _is_dtype(arr_or_dtype, condition) -> bool:
13731385
if arr_or_dtype is None:
13741386
return False
13751387
try:
1376-
dtype = get_dtype(arr_or_dtype)
1388+
dtype = _get_dtype(arr_or_dtype)
13771389
except (TypeError, ValueError):
13781390
return False
13791391
return condition(dtype)
13801392

13811393

1382-
def get_dtype(arr_or_dtype) -> DtypeObj:
1394+
def _get_dtype(arr_or_dtype) -> DtypeObj:
13831395
"""
13841396
Get the dtype instance associated with an array
13851397
or dtype object.
@@ -1510,7 +1522,7 @@ def infer_dtype_from_object(dtype) -> type:
15101522
try:
15111523
return infer_dtype_from_object(getattr(np, dtype))
15121524
except (AttributeError, TypeError):
1513-
# Handles cases like get_dtype(int) i.e.,
1525+
# Handles cases like _get_dtype(int) i.e.,
15141526
# Python objects that are valid dtypes
15151527
# (unlike user-defined types, in general)
15161528
#
@@ -1653,7 +1665,6 @@ def is_all_strings(value: ArrayLike) -> bool:
16531665
"ensure_float64",
16541666
"ensure_python_int",
16551667
"ensure_str",
1656-
"get_dtype",
16571668
"infer_dtype_from_object",
16581669
"INT64_DTYPE",
16591670
"is_1d_only_ea_dtype",

pandas/tests/dtypes/test_common.py

+17-12
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ def test_get_dtype_error_catch(func):
177177
or func is com.is_interval_dtype
178178
or func is com.is_datetime64tz_dtype
179179
or func is com.is_categorical_dtype
180+
or func is com.is_period_dtype
180181
):
181182
warn = FutureWarning
182183

@@ -197,14 +198,16 @@ def test_is_object():
197198
"check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)]
198199
)
199200
def test_is_sparse(check_scipy):
200-
assert com.is_sparse(SparseArray([1, 2, 3]))
201+
msg = "is_sparse is deprecated"
202+
with tm.assert_produces_warning(FutureWarning, match=msg):
203+
assert com.is_sparse(SparseArray([1, 2, 3]))
201204

202-
assert not com.is_sparse(np.array([1, 2, 3]))
205+
assert not com.is_sparse(np.array([1, 2, 3]))
203206

204-
if check_scipy:
205-
import scipy.sparse
207+
if check_scipy:
208+
import scipy.sparse
206209

207-
assert not com.is_sparse(scipy.sparse.bsr_matrix([1, 2, 3]))
210+
assert not com.is_sparse(scipy.sparse.bsr_matrix([1, 2, 3]))
208211

209212

210213
@td.skip_if_no_scipy
@@ -264,12 +267,14 @@ def test_is_timedelta64_dtype():
264267

265268

266269
def test_is_period_dtype():
267-
assert not com.is_period_dtype(object)
268-
assert not com.is_period_dtype([1, 2, 3])
269-
assert not com.is_period_dtype(pd.Period("2017-01-01"))
270+
msg = "is_period_dtype is deprecated"
271+
with tm.assert_produces_warning(FutureWarning, match=msg):
272+
assert not com.is_period_dtype(object)
273+
assert not com.is_period_dtype([1, 2, 3])
274+
assert not com.is_period_dtype(pd.Period("2017-01-01"))
270275

271-
assert com.is_period_dtype(PeriodDtype(freq="D"))
272-
assert com.is_period_dtype(pd.PeriodIndex([], freq="A"))
276+
assert com.is_period_dtype(PeriodDtype(freq="D"))
277+
assert com.is_period_dtype(pd.PeriodIndex([], freq="A"))
273278

274279

275280
def test_is_interval_dtype():
@@ -681,7 +686,7 @@ def test_is_complex_dtype():
681686
],
682687
)
683688
def test_get_dtype(input_param, result):
684-
assert com.get_dtype(input_param) == result
689+
assert com._get_dtype(input_param) == result
685690

686691

687692
@pytest.mark.parametrize(
@@ -700,7 +705,7 @@ def test_get_dtype_fails(input_param, expected_error_message):
700705
# 2020-02-02 npdev changed error message
701706
expected_error_message += f"|Cannot interpret '{input_param}' as a data type"
702707
with pytest.raises(TypeError, match=expected_error_message):
703-
com.get_dtype(input_param)
708+
com._get_dtype(input_param)
704709

705710

706711
@pytest.mark.parametrize(

pandas/tests/dtypes/test_dtypes.py

+12-12
Original file line numberDiff line numberDiff line change
@@ -427,12 +427,10 @@ def test_construction(self):
427427
for s in ["period[D]", "Period[D]", "D"]:
428428
dt = PeriodDtype(s)
429429
assert dt.freq == pd.tseries.offsets.Day()
430-
assert is_period_dtype(dt)
431430

432431
for s in ["period[3D]", "Period[3D]", "3D"]:
433432
dt = PeriodDtype(s)
434433
assert dt.freq == pd.tseries.offsets.Day(3)
435-
assert is_period_dtype(dt)
436434

437435
for s in [
438436
"period[26H]",
@@ -444,7 +442,6 @@ def test_construction(self):
444442
]:
445443
dt = PeriodDtype(s)
446444
assert dt.freq == pd.tseries.offsets.Hour(26)
447-
assert is_period_dtype(dt)
448445

449446
def test_cannot_use_custom_businessday(self):
450447
# GH#52534
@@ -530,20 +527,22 @@ def test_equality(self, dtype):
530527
assert not is_dtype_equal(PeriodDtype("D"), PeriodDtype("2D"))
531528

532529
def test_basic(self, dtype):
533-
assert is_period_dtype(dtype)
530+
msg = "is_period_dtype is deprecated"
531+
with tm.assert_produces_warning(FutureWarning, match=msg):
532+
assert is_period_dtype(dtype)
534533

535-
pidx = pd.period_range("2013-01-01 09:00", periods=5, freq="H")
534+
pidx = pd.period_range("2013-01-01 09:00", periods=5, freq="H")
536535

537-
assert is_period_dtype(pidx.dtype)
538-
assert is_period_dtype(pidx)
536+
assert is_period_dtype(pidx.dtype)
537+
assert is_period_dtype(pidx)
539538

540-
s = Series(pidx, name="A")
539+
s = Series(pidx, name="A")
541540

542-
assert is_period_dtype(s.dtype)
543-
assert is_period_dtype(s)
541+
assert is_period_dtype(s.dtype)
542+
assert is_period_dtype(s)
544543

545-
assert not is_period_dtype(np.dtype("float64"))
546-
assert not is_period_dtype(1.0)
544+
assert not is_period_dtype(np.dtype("float64"))
545+
assert not is_period_dtype(1.0)
547546

548547
def test_freq_argument_required(self):
549548
# GH#27388
@@ -1132,6 +1131,7 @@ def test_is_dtype_no_warning(check):
11321131
check is is_categorical_dtype
11331132
or check is is_interval_dtype
11341133
or check is is_datetime64tz_dtype
1134+
or check is is_period_dtype
11351135
):
11361136
warn = FutureWarning
11371137

0 commit comments

Comments
 (0)