Skip to content

Commit 658ac5b

Browse files
authored
DEPR: is_categorical_dtype (#52527)
* DEPR: is_categorical_dtype * GH ref * suppress warning in doctest * okwarning from dask * update test
1 parent e616938 commit 658ac5b

File tree

22 files changed

+100
-78
lines changed

22 files changed

+100
-78
lines changed

doc/source/user_guide/scale.rst

+4
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,7 @@ We'll import ``dask.dataframe`` and notice that the API feels similar to pandas.
257257
We can use Dask's ``read_parquet`` function, but provide a globstring of files to read in.
258258

259259
.. ipython:: python
260+
:okwarning:
260261
261262
import dask.dataframe as dd
262263
@@ -286,6 +287,7 @@ column names and dtypes. That's because Dask hasn't actually read the data yet.
286287
Rather than executing immediately, doing operations build up a **task graph**.
287288

288289
.. ipython:: python
290+
:okwarning:
289291
290292
ddf
291293
ddf["name"]
@@ -300,6 +302,7 @@ returns a Dask Series with the same dtype and the same name.
300302
To get the actual result you can call ``.compute()``.
301303

302304
.. ipython:: python
305+
:okwarning:
303306
304307
%time ddf["name"].value_counts().compute()
305308
@@ -345,6 +348,7 @@ known automatically. In this case, since we created the parquet files manually,
345348
we need to supply the divisions manually.
346349

347350
.. ipython:: python
351+
:okwarning:
348352
349353
N = 12
350354
starts = [f"20{i:>02d}-01-01" for i in range(N)]

doc/source/whatsnew/v2.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ Deprecations
226226
- Deprecated making :meth:`Series.apply` return a :class:`DataFrame` when the passed-in callable returns a :class:`Series` object. In the future this will return a :class:`Series` whose values are themselves :class:`Series`. This pattern was very slow and it's recommended to use alternative methods to archive the same goal (:issue:`52116`)
227227
- Deprecated parameter ``convert_type`` in :meth:`Series.apply` (:issue:`52140`)
228228
- Deprecated ``freq`` parameter in :class:`PeriodArray` constructor, pass ``dtype`` instead (:issue:`52462`)
229+
- Deprecated :func:`is_categorical_dtype`, use ``isinstance(obj.dtype, pd.CategoricalDtype)`` instead (:issue:`52527`)
229230
- Deprecated :func:`is_int64_dtype`, check ``dtype == np.dtype(np.int64)`` instead (:issue:`52564`)
230231
-
231232

pandas/conftest.py

+1
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ def pytest_collection_modifyitems(items, config) -> None:
147147
"(Series|DataFrame).bool is now deprecated and will be removed "
148148
"in future version of pandas",
149149
),
150+
("is_categorical_dtype", "is_categorical_dtype is deprecated"),
150151
]
151152

152153
for item in items:

pandas/core/apply.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,14 @@
4141

4242
from pandas.core.dtypes.cast import is_nested_object
4343
from pandas.core.dtypes.common import (
44-
is_categorical_dtype,
4544
is_dict_like,
4645
is_list_like,
4746
is_sequence,
4847
)
49-
from pandas.core.dtypes.dtypes import ExtensionDtype
48+
from pandas.core.dtypes.dtypes import (
49+
CategoricalDtype,
50+
ExtensionDtype,
51+
)
5052
from pandas.core.dtypes.generic import (
5153
ABCDataFrame,
5254
ABCNDFrame,
@@ -1115,7 +1117,7 @@ def apply_standard(self) -> DataFrame | Series:
11151117
# we need to give `na_action="ignore"` for categorical data.
11161118
# TODO: remove the `na_action="ignore"` when that default has been changed in
11171119
# Categorical (GH51645).
1118-
action = "ignore" if is_categorical_dtype(obj) else None
1120+
action = "ignore" if isinstance(obj.dtype, CategoricalDtype) else None
11191121
mapped = obj._map_values(mapper=f, na_action=action, convert=self.convert_dtype)
11201122

11211123
if len(mapped) and isinstance(mapped[0], ABCSeries):

pandas/core/arrays/categorical.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@
3838
ensure_platform_int,
3939
is_any_real_numeric_dtype,
4040
is_bool_dtype,
41-
is_categorical_dtype,
4241
is_datetime64_dtype,
4342
is_dict_like,
4443
is_dtype_equal,
@@ -409,7 +408,8 @@ def __init__(
409408
null_mask = np.array(False)
410409

411410
# sanitize input
412-
if is_categorical_dtype(values):
411+
vdtype = getattr(values, "dtype", None)
412+
if isinstance(vdtype, CategoricalDtype):
413413
if dtype.categories is None:
414414
dtype = CategoricalDtype(values.categories, dtype.ordered)
415415
elif not isinstance(values, (ABCIndex, ABCSeries, ExtensionArray)):
@@ -2721,7 +2721,9 @@ def factorize_from_iterable(values) -> tuple[np.ndarray, Index]:
27212721
raise TypeError("Input must be list-like")
27222722

27232723
categories: Index
2724-
if is_categorical_dtype(values):
2724+
2725+
vdtype = getattr(values, "dtype", None)
2726+
if isinstance(vdtype, CategoricalDtype):
27252727
values = extract_array(values)
27262728
# The Categorical we want to build has the same categories
27272729
# as values but its codes are by def [0, ..., len(n_categories) - 1]

pandas/core/arrays/interval.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@
5050
maybe_upcast_numeric_to_64bit,
5151
)
5252
from pandas.core.dtypes.common import (
53-
is_categorical_dtype,
5453
is_dtype_equal,
5554
is_float_dtype,
5655
is_integer_dtype,
@@ -1772,7 +1771,7 @@ def _maybe_convert_platform_interval(values) -> ArrayLike:
17721771
elif not is_list_like(values) or isinstance(values, ABCDataFrame):
17731772
# This will raise later, but we avoid passing to maybe_convert_platform
17741773
return values
1775-
elif is_categorical_dtype(values):
1774+
elif isinstance(getattr(values, "dtype", None), CategoricalDtype):
17761775
values = np.asarray(values)
17771776
elif not hasattr(values, "dtype") and not isinstance(values, (list, tuple, range)):
17781777
# TODO: should we just cast these to list?

pandas/core/dtypes/common.py

+7
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,13 @@ def is_categorical_dtype(arr_or_dtype) -> bool:
469469
>>> is_categorical_dtype(pd.CategoricalIndex([1, 2, 3]))
470470
True
471471
"""
472+
# GH#52527
473+
warnings.warn(
474+
"is_categorical_dtype is deprecated and will be removed in a future "
475+
"version. Use isinstance(dtype, CategoricalDtype) instead",
476+
FutureWarning,
477+
stacklevel=find_stack_level(),
478+
)
472479
if isinstance(arr_or_dtype, ExtensionDtype):
473480
# GH#33400 fastpath for dtype object
474481
return arr_or_dtype.name == "category"

pandas/core/groupby/grouper.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,10 @@
2222
from pandas.util._exceptions import find_stack_level
2323

2424
from pandas.core.dtypes.common import (
25-
is_categorical_dtype,
2625
is_list_like,
2726
is_scalar,
2827
)
28+
from pandas.core.dtypes.dtypes import CategoricalDtype
2929

3030
from pandas.core import algorithms
3131
from pandas.core.arrays import (
@@ -618,7 +618,7 @@ def __init__(
618618
# TODO 2022-10-08 we only have one test that gets here and
619619
# values are already in nanoseconds in that case.
620620
grouping_vector = Series(grouping_vector).to_numpy()
621-
elif is_categorical_dtype(grouping_vector):
621+
elif isinstance(getattr(grouping_vector, "dtype", None), CategoricalDtype):
622622
# a passed Categorical
623623
self._orig_cats = grouping_vector.categories
624624
grouping_vector, self._all_grouper = recode_for_groupby(
@@ -635,7 +635,8 @@ def __iter__(self) -> Iterator:
635635

636636
@cache_readonly
637637
def _passed_categorical(self) -> bool:
638-
return is_categorical_dtype(self.grouping_vector)
638+
dtype = getattr(self.grouping_vector, "dtype", None)
639+
return isinstance(dtype, CategoricalDtype)
639640

640641
@cache_readonly
641642
def name(self) -> Hashable:

pandas/core/interchange/column.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,7 @@
1010
from pandas.util._decorators import cache_readonly
1111

1212
import pandas as pd
13-
from pandas.api.types import (
14-
is_categorical_dtype,
15-
is_string_dtype,
16-
)
13+
from pandas.api.types import is_string_dtype
1714
from pandas.core.interchange.buffer import PandasBuffer
1815
from pandas.core.interchange.dataframe_protocol import (
1916
Column,
@@ -99,7 +96,7 @@ def offset(self) -> int:
9996
def dtype(self) -> tuple[DtypeKind, int, str, str]:
10097
dtype = self._col.dtype
10198

102-
if is_categorical_dtype(dtype):
99+
if isinstance(dtype, pd.CategoricalDtype):
103100
codes = self._col.values.codes
104101
(
105102
_,

pandas/core/reshape/tile.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
DT64NS_DTYPE,
2323
ensure_platform_int,
2424
is_bool_dtype,
25-
is_categorical_dtype,
2625
is_datetime64_dtype,
2726
is_datetime64tz_dtype,
2827
is_datetime_or_timedelta_dtype,
@@ -33,6 +32,7 @@
3332
is_timedelta64_dtype,
3433
)
3534
from pandas.core.dtypes.dtypes import (
35+
CategoricalDtype,
3636
DatetimeTZDtype,
3737
ExtensionDtype,
3838
)
@@ -458,7 +458,8 @@ def _bins_to_cuts(
458458
raise ValueError(
459459
"Bin labels must be one fewer than the number of bin edges"
460460
)
461-
if not is_categorical_dtype(labels):
461+
462+
if not isinstance(getattr(labels, "dtype", None), CategoricalDtype):
462463
labels = Categorical(
463464
labels,
464465
categories=labels if len(set(labels)) == len(labels) else None,

pandas/plotting/_matplotlib/core.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222

2323
from pandas.core.dtypes.common import (
2424
is_any_real_numeric_dtype,
25-
is_categorical_dtype,
2625
is_extension_array_dtype,
2726
is_float,
2827
is_float_dtype,
@@ -34,6 +33,7 @@
3433
is_number,
3534
is_numeric_dtype,
3635
)
36+
from pandas.core.dtypes.dtypes import CategoricalDtype
3737
from pandas.core.dtypes.generic import (
3838
ABCDataFrame,
3939
ABCIndex,
@@ -563,7 +563,7 @@ def result(self):
563563

564564
def _convert_to_ndarray(self, data):
565565
# GH31357: categorical columns are processed separately
566-
if is_categorical_dtype(data):
566+
if isinstance(data.dtype, CategoricalDtype):
567567
return data
568568

569569
# GH32073: cast to float if values contain nulled integers
@@ -1211,7 +1211,9 @@ def _make_plot(self):
12111211

12121212
c_is_column = is_hashable(c) and c in self.data.columns
12131213

1214-
color_by_categorical = c_is_column and is_categorical_dtype(self.data[c])
1214+
color_by_categorical = c_is_column and isinstance(
1215+
self.data[c].dtype, CategoricalDtype
1216+
)
12151217

12161218
color = self.kwds.pop("color", None)
12171219
if c is not None and color is not None:

pandas/tests/base/test_misc.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
from pandas.compat import PYPY
77

88
from pandas.core.dtypes.common import (
9-
is_categorical_dtype,
109
is_dtype_equal,
1110
is_object_dtype,
1211
)
@@ -96,8 +95,8 @@ def test_memory_usage(index_or_series_memory_obj):
9695
res_deep = obj.memory_usage(deep=True)
9796

9897
is_object = is_object_dtype(obj) or (is_ser and is_object_dtype(obj.index))
99-
is_categorical = is_categorical_dtype(obj.dtype) or (
100-
is_ser and is_categorical_dtype(obj.index.dtype)
98+
is_categorical = isinstance(obj.dtype, pd.CategoricalDtype) or (
99+
is_ser and isinstance(obj.index.dtype, pd.CategoricalDtype)
101100
)
102101
is_object_string = is_dtype_equal(obj, "string[python]") or (
103102
is_ser and is_dtype_equal(obj.index.dtype, "string[python]")

pandas/tests/dtypes/test_common.py

+9-6
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ def get_is_dtype_funcs():
163163
return [getattr(com, fname) for fname in fnames]
164164

165165

166+
@pytest.mark.filterwarnings("ignore:is_categorical_dtype is deprecated:FutureWarning")
166167
@pytest.mark.parametrize("func", get_is_dtype_funcs(), ids=lambda x: x.__name__)
167168
def test_get_dtype_error_catch(func):
168169
# see gh-15941
@@ -171,7 +172,7 @@ def test_get_dtype_error_catch(func):
171172

172173
msg = f"{func.__name__} is deprecated"
173174
warn = None
174-
if func is com.is_int64_dtype:
175+
if func is com.is_int64_dtype or func is com.is_categorical_dtype:
175176
warn = FutureWarning
176177

177178
with tm.assert_produces_warning(warn, match=msg):
@@ -274,12 +275,14 @@ def test_is_interval_dtype():
274275

275276

276277
def test_is_categorical_dtype():
277-
assert not com.is_categorical_dtype(object)
278-
assert not com.is_categorical_dtype([1, 2, 3])
278+
msg = "is_categorical_dtype is deprecated"
279+
with tm.assert_produces_warning(FutureWarning, match=msg):
280+
assert not com.is_categorical_dtype(object)
281+
assert not com.is_categorical_dtype([1, 2, 3])
279282

280-
assert com.is_categorical_dtype(CategoricalDtype())
281-
assert com.is_categorical_dtype(pd.Categorical([1, 2, 3]))
282-
assert com.is_categorical_dtype(pd.CategoricalIndex([1, 2, 3]))
283+
assert com.is_categorical_dtype(CategoricalDtype())
284+
assert com.is_categorical_dtype(pd.Categorical([1, 2, 3]))
285+
assert com.is_categorical_dtype(pd.CategoricalIndex([1, 2, 3]))
283286

284287

285288
def test_is_string_dtype():

pandas/tests/dtypes/test_dtypes.py

+16-9
Original file line numberDiff line numberDiff line change
@@ -166,16 +166,18 @@ def test_is_dtype(self, dtype):
166166
assert not CategoricalDtype.is_dtype(np.float64)
167167

168168
def test_basic(self, dtype):
169-
assert is_categorical_dtype(dtype)
169+
msg = "is_categorical_dtype is deprecated"
170+
with tm.assert_produces_warning(FutureWarning, match=msg):
171+
assert is_categorical_dtype(dtype)
170172

171-
factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"])
173+
factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"])
172174

173-
s = Series(factor, name="A")
175+
s = Series(factor, name="A")
174176

175-
# dtypes
176-
assert is_categorical_dtype(s.dtype)
177-
assert is_categorical_dtype(s)
178-
assert not is_categorical_dtype(np.dtype("float64"))
177+
# dtypes
178+
assert is_categorical_dtype(s.dtype)
179+
assert is_categorical_dtype(s)
180+
assert not is_categorical_dtype(np.dtype("float64"))
179181

180182
def test_tuple_categories(self):
181183
categories = [(1, "a"), (2, "b"), (3, "c")]
@@ -1109,10 +1111,15 @@ def test_is_bool_dtype_sparse():
11091111
)
11101112
def test_is_dtype_no_warning(check):
11111113
data = pd.DataFrame({"A": [1, 2]})
1112-
with tm.assert_produces_warning(None):
1114+
1115+
warn = None
1116+
msg = "is_categorical_dtype is deprecated"
1117+
if check is is_categorical_dtype:
1118+
warn = FutureWarning
1119+
with tm.assert_produces_warning(warn, match=msg):
11131120
check(data)
11141121

1115-
with tm.assert_produces_warning(None):
1122+
with tm.assert_produces_warning(warn, match=msg):
11161123
check(data["A"])
11171124

11181125

pandas/tests/frame/indexing/test_setitem.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88
from pandas.core.dtypes.base import _registry as ea_registry
99
from pandas.core.dtypes.common import (
10-
is_categorical_dtype,
1110
is_interval_dtype,
1211
is_object_dtype,
1312
)
@@ -484,9 +483,9 @@ def test_setitem_intervals(self):
484483
df["E"] = np.array(ser.values)
485484
df["F"] = ser.astype(object)
486485

487-
assert is_categorical_dtype(df["B"].dtype)
486+
assert isinstance(df["B"].dtype, CategoricalDtype)
488487
assert is_interval_dtype(df["B"].cat.categories)
489-
assert is_categorical_dtype(df["D"].dtype)
488+
assert isinstance(df["D"].dtype, CategoricalDtype)
490489
assert is_interval_dtype(df["D"].cat.categories)
491490

492491
# These go through the Series constructor and so get inferred back

pandas/tests/frame/test_reductions.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,10 @@
99
from pandas.compat import is_platform_windows
1010
import pandas.util._test_decorators as td
1111

12-
from pandas.core.dtypes.common import is_categorical_dtype
13-
1412
import pandas as pd
1513
from pandas import (
1614
Categorical,
15+
CategoricalDtype,
1716
DataFrame,
1817
Index,
1918
Series,
@@ -1280,7 +1279,7 @@ def test_any_all_np_func(self, func, data, expected):
12801279
# GH 19976
12811280
data = DataFrame(data)
12821281

1283-
if any(is_categorical_dtype(x) for x in data.dtypes):
1282+
if any(isinstance(x, CategoricalDtype) for x in data.dtypes):
12841283
with pytest.raises(
12851284
TypeError, match="dtype category does not support reduction"
12861285
):

0 commit comments

Comments
 (0)