Skip to content

Commit e5b0032

Browse files
authored
DEPR: Categorical fastpath (#52472)
* DEPR: Categorical fastpath * mypy fixup * mypy fixup
1 parent 79e3ee6 commit e5b0032

File tree

12 files changed

+56
-15
lines changed

12 files changed

+56
-15
lines changed

asv_bench/benchmarks/categoricals.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ def time_regular(self):
4242
pd.Categorical(self.values, self.categories)
4343

4444
def time_fastpath(self):
45-
pd.Categorical(self.codes, self.cat_idx, fastpath=True)
45+
dtype = pd.CategoricalDtype(categories=self.cat_idx)
46+
pd.Categorical._simple_new(self.codes, dtype)
4647

4748
def time_datetimes(self):
4849
pd.Categorical(self.datetimes)

doc/source/whatsnew/v2.1.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -167,12 +167,12 @@ Deprecations
167167
- Deprecated 'method', 'limit', and 'fill_axis' keywords in :meth:`DataFrame.align` and :meth:`Series.align`, explicitly call ``fillna`` on the alignment results instead (:issue:`51856`)
168168
- Deprecated 'broadcast_axis' keyword in :meth:`Series.align` and :meth:`DataFrame.align`, upcast before calling ``align`` with ``left = DataFrame({col: left for col in right.columns}, index=right.index)`` (:issue:`51856`)
169169
- Deprecated the 'axis' keyword in :meth:`.GroupBy.idxmax`, :meth:`.GroupBy.idxmin`, :meth:`.GroupBy.fillna`, :meth:`.GroupBy.take`, :meth:`.GroupBy.skew`, :meth:`.GroupBy.rank`, :meth:`.GroupBy.cumprod`, :meth:`.GroupBy.cumsum`, :meth:`.GroupBy.cummax`, :meth:`.GroupBy.cummin`, :meth:`.GroupBy.pct_change`, :meth:`GroupBy.diff`, :meth:`.GroupBy.shift`, and :meth:`DataFrameGroupBy.corrwith`; for ``axis=1`` operate on the underlying :class:`DataFrame` instead (:issue:`50405`, :issue:`51046`)
170+
- Deprecated the "fastpath" keyword in :class:`Categorical` constructor, use :meth:`Categorical.from_codes` instead (:issue:`20110`)
170171
- Deprecated passing a dictionary to :meth:`.SeriesGroupBy.agg`; pass a list of aggregations instead (:issue:`50684`)
171172
- Deprecated logical operations (``|``, ``&``, ``^``) between pandas objects and dtype-less sequences (e.g. ``list``, ``tuple``), wrap a sequence in a :class:`Series` or numpy array before operating instead (:issue:`51521`)
172173
- Deprecated the methods :meth:`Series.bool` and :meth:`DataFrame.bool` (:issue:`51749`)
173174
- Deprecated :meth:`DataFrame.swapaxes` and :meth:`Series.swapaxes`, use :meth:`DataFrame.transpose` or :meth:`Series.transpose` instead (:issue:`51946`)
174175
- Deprecated parameter ``convert_type`` in :meth:`Series.apply` (:issue:`52140`)
175-
-
176176

177177

178178
.. ---------------------------------------------------------------------------

pandas/core/arrays/categorical.py

+27-4
Original file line numberDiff line numberDiff line change
@@ -355,15 +355,38 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMi
355355

356356
_dtype: CategoricalDtype
357357

358+
@classmethod
359+
# error: Argument 2 of "_simple_new" is incompatible with supertype
360+
# "NDArrayBacked"; supertype defines the argument type as
361+
# "Union[dtype[Any], ExtensionDtype]"
362+
def _simple_new( # type: ignore[override]
363+
cls, codes: np.ndarray, dtype: CategoricalDtype
364+
) -> Self:
365+
# NB: This is not _quite_ as simple as the "usual" _simple_new
366+
codes = coerce_indexer_dtype(codes, dtype.categories)
367+
dtype = CategoricalDtype(ordered=False).update_dtype(dtype)
368+
return super()._simple_new(codes, dtype)
369+
358370
def __init__(
359371
self,
360372
values,
361373
categories=None,
362374
ordered=None,
363375
dtype: Dtype | None = None,
364-
fastpath: bool = False,
376+
fastpath: bool | lib.NoDefault = lib.no_default,
365377
copy: bool = True,
366378
) -> None:
379+
if fastpath is not lib.no_default:
380+
# GH#20110
381+
warnings.warn(
382+
"The 'fastpath' keyword in Categorical is deprecated and will "
383+
"be removed in a future version. Use Categorical.from_codes instead",
384+
FutureWarning,
385+
stacklevel=find_stack_level(),
386+
)
387+
else:
388+
fastpath = False
389+
367390
dtype = CategoricalDtype._from_values_or_dtype(
368391
values, categories, ordered, dtype
369392
)
@@ -626,7 +649,7 @@ def _from_inferred_categories(
626649
dtype = CategoricalDtype(cats, ordered=False)
627650
codes = inferred_codes
628651

629-
return cls(codes, dtype=dtype, fastpath=True)
652+
return cls._simple_new(codes, dtype=dtype)
630653

631654
@classmethod
632655
def from_codes(
@@ -693,7 +716,7 @@ def from_codes(
693716
if len(codes) and (codes.max() >= len(dtype.categories) or codes.min() < -1):
694717
raise ValueError("codes need to be between -1 and len(categories)-1")
695718

696-
return cls(codes, dtype=dtype, fastpath=True)
719+
return cls._simple_new(codes, dtype=dtype)
697720

698721
# ------------------------------------------------------------------
699722
# Categories/Codes/Ordered
@@ -805,7 +828,7 @@ def _set_dtype(self, dtype: CategoricalDtype) -> Self:
805828
a (valid) instance of `CategoricalDtype`.
806829
"""
807830
codes = recode_for_categories(self.codes, self.categories, dtype.categories)
808-
return type(self)(codes, dtype=dtype, fastpath=True)
831+
return type(self)._simple_new(codes, dtype=dtype)
809832

810833
def set_ordered(self, value: bool) -> Self:
811834
"""

pandas/core/dtypes/concat.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
)
2121
from pandas.core.dtypes.common import is_dtype_equal
2222
from pandas.core.dtypes.dtypes import (
23+
CategoricalDtype,
2324
DatetimeTZDtype,
2425
ExtensionDtype,
2526
)
@@ -323,7 +324,8 @@ def _maybe_unwrap(x):
323324
if ignore_order:
324325
ordered = False
325326

326-
return Categorical(new_codes, categories=categories, ordered=ordered, fastpath=True)
327+
dtype = CategoricalDtype(categories=categories, ordered=ordered)
328+
return Categorical._simple_new(new_codes, dtype=dtype)
327329

328330

329331
def _concatenate_2d(to_concat: Sequence[np.ndarray], axis: AxisInt) -> np.ndarray:

pandas/core/dtypes/dtypes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ def _from_values_or_dtype(
260260
CategoricalDtype(categories=['a', 'b'], ordered=True, categories_dtype=object)
261261
>>> dtype1 = pd.CategoricalDtype(['a', 'b'], ordered=True)
262262
>>> dtype2 = pd.CategoricalDtype(['x', 'y'], ordered=False)
263-
>>> c = pd.Categorical([0, 1], dtype=dtype1, fastpath=True)
263+
>>> c = pd.Categorical([0, 1], dtype=dtype1)
264264
>>> pd.CategoricalDtype._from_values_or_dtype(
265265
... c, ['x', 'y'], ordered=True, dtype=dtype2
266266
... )

pandas/core/groupby/categorical.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def recode_for_groupby(
6363
# return a new categorical that maps our new codes
6464
# and categories
6565
dtype = CategoricalDtype(categories, ordered=c.ordered)
66-
return Categorical(codes, dtype=dtype, fastpath=True), c
66+
return Categorical._simple_new(codes, dtype=dtype), c
6767

6868
# Already sorted according to c.categories; all is fine
6969
if sort:

pandas/core/util/hashing.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from pandas._libs.hashing import hash_object_array
1717

1818
from pandas.core.dtypes.common import is_list_like
19+
from pandas.core.dtypes.dtypes import CategoricalDtype
1920
from pandas.core.dtypes.generic import (
2021
ABCDataFrame,
2122
ABCExtensionArray,
@@ -203,7 +204,10 @@ def hash_tuples(
203204

204205
# create a list-of-Categoricals
205206
cat_vals = [
206-
Categorical(mi.codes[level], mi.levels[level], ordered=False, fastpath=True)
207+
Categorical._simple_new(
208+
mi.codes[level],
209+
CategoricalDtype(categories=mi.levels[level], ordered=False),
210+
)
207211
for level in range(mi.nlevels)
208212
]
209213

@@ -296,7 +300,8 @@ def _hash_ndarray(
296300
)
297301

298302
codes, categories = factorize(vals, sort=False)
299-
cat = Categorical(codes, Index(categories), ordered=False, fastpath=True)
303+
dtype = CategoricalDtype(categories=Index(categories), ordered=False)
304+
cat = Categorical._simple_new(codes, dtype)
300305
return cat._hash_pandas_object(
301306
encoding=encoding, hash_key=hash_key, categorize=False
302307
)

pandas/tests/arrays/categorical/test_constructors.py

+7
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,13 @@
3232

3333

3434
class TestCategoricalConstructors:
35+
def test_fastpath_deprecated(self):
36+
codes = np.array([1, 2, 3])
37+
dtype = CategoricalDtype(categories=["a", "b", "c", "d"], ordered=False)
38+
msg = "The 'fastpath' keyword in Categorical is deprecated"
39+
with tm.assert_produces_warning(FutureWarning, match=msg):
40+
Categorical(codes, dtype=dtype, fastpath=True)
41+
3542
def test_categorical_from_cat_and_dtype_str_preserve_ordered(self):
3643
# GH#49309 we should preserve orderedness in `res`
3744
cat = Categorical([3, 1], categories=[3, 2, 1], ordered=True)

pandas/tests/arrays/categorical/test_missing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def test_na_flags_int_categories(self):
3131
labels = np.random.randint(0, 10, 20)
3232
labels[::5] = -1
3333

34-
cat = Categorical(labels, categories, fastpath=True)
34+
cat = Categorical(labels, categories)
3535
repr(cat)
3636

3737
tm.assert_numpy_array_equal(isna(cat), labels == -1)

pandas/tests/arrays/categorical/test_repr.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from pandas import (
44
Categorical,
5+
CategoricalDtype,
56
CategoricalIndex,
67
Series,
78
date_range,
@@ -24,7 +25,9 @@ def test_print(self, factor):
2425

2526
class TestCategoricalRepr:
2627
def test_big_print(self):
27-
factor = Categorical([0, 1, 2, 0, 1, 2] * 100, ["a", "b", "c"], fastpath=True)
28+
codes = np.array([0, 1, 2, 0, 1, 2] * 100)
29+
dtype = CategoricalDtype(categories=["a", "b", "c"])
30+
factor = Categorical.from_codes(codes, dtype=dtype)
2831
expected = [
2932
"['a', 'b', 'c', 'a', 'b', ..., 'b', 'c', 'a', 'b', 'c']",
3033
"Length: 600",

pandas/tests/dtypes/test_dtypes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ def test_constructor_invalid(self):
123123

124124
dtype1 = CategoricalDtype(["a", "b"], ordered=True)
125125
dtype2 = CategoricalDtype(["x", "y"], ordered=False)
126-
c = Categorical([0, 1], dtype=dtype1, fastpath=True)
126+
c = Categorical([0, 1], dtype=dtype1)
127127

128128
@pytest.mark.parametrize(
129129
"values, categories, ordered, dtype, expected",

pandas/tests/series/test_constructors.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -385,7 +385,7 @@ def test_constructor_map(self):
385385
tm.assert_series_equal(result, exp)
386386

387387
def test_constructor_categorical(self):
388-
cat = Categorical([0, 1, 2, 0, 1, 2], ["a", "b", "c"], fastpath=True)
388+
cat = Categorical([0, 1, 2, 0, 1, 2], ["a", "b", "c"])
389389
res = Series(cat)
390390
tm.assert_categorical_equal(res.values, cat)
391391

0 commit comments

Comments
 (0)