Skip to content

Commit 3ab8d2f

Browse files
jschendelproost
authored andcommitted
DEPR: Change default value for CategoricalDtype.ordered from None to False (pandas-dev#29955)
1 parent c459c43 commit 3ab8d2f

File tree

11 files changed

+67
-168
lines changed

11 files changed

+67
-168
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -622,6 +622,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
622622
- Changed :meth:`Timedelta.resolution` to match the behavior of the standard library ``datetime.timedelta.resolution``, for the old behavior, use :meth:`Timedelta.resolution_string` (:issue:`26839`)
623623
- Removed previously deprecated :attr:`Timestamp.weekday_name`, :attr:`DatetimeIndex.weekday_name`, and :attr:`Series.dt.weekday_name` (:issue:`18164`)
624624
- Removed previously deprecated ``errors`` argument in :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` (:issue:`22644`)
625+
- Changed the default value for ``ordered`` in :class:`CategoricalDtype` from ``None`` to ``False`` (:issue:`26336`)
625626
- :meth:`Series.set_axis` and :meth:`DataFrame.set_axis` now require "labels" as the first argument and "axis" as an optional named parameter (:issue:`30089`)
626627
-
627628

pandas/core/arrays/categorical.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ def __init__(
328328
# sanitize input
329329
if is_categorical_dtype(values):
330330
if dtype.categories is None:
331-
dtype = CategoricalDtype(values.categories, dtype._ordered)
331+
dtype = CategoricalDtype(values.categories, dtype.ordered)
332332
elif not isinstance(values, (ABCIndexClass, ABCSeries)):
333333
# sanitize_array coerces np.nan to a string under certain versions
334334
# of numpy
@@ -351,7 +351,7 @@ def __init__(
351351
codes, categories = factorize(values, sort=True)
352352
except TypeError:
353353
codes, categories = factorize(values, sort=False)
354-
if dtype._ordered:
354+
if dtype.ordered:
355355
# raise, as we don't have a sortable data structure and so
356356
# the user should give us one by specifying categories
357357
raise TypeError(
@@ -367,7 +367,7 @@ def __init__(
367367
)
368368

369369
# we're inferring from values
370-
dtype = CategoricalDtype(categories, dtype._ordered)
370+
dtype = CategoricalDtype(categories, dtype.ordered)
371371

372372
elif is_categorical_dtype(values):
373373
old_codes = (
@@ -437,7 +437,7 @@ def ordered(self) -> Ordered:
437437
"""
438438
Whether the categories have an ordered relationship.
439439
"""
440-
return self.dtype._ordered
440+
return self.dtype.ordered
441441

442442
@property
443443
def dtype(self) -> CategoricalDtype:
@@ -833,7 +833,7 @@ def set_categories(self, new_categories, ordered=None, rename=False, inplace=Fal
833833
"""
834834
inplace = validate_bool_kwarg(inplace, "inplace")
835835
if ordered is None:
836-
ordered = self.dtype._ordered
836+
ordered = self.dtype.ordered
837837
new_dtype = CategoricalDtype(new_categories, ordered=ordered)
838838

839839
cat = self if inplace else self.copy()

pandas/core/construction.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -558,7 +558,7 @@ def _try_cast(
558558
# that Categorical is the only array type for 'category'.
559559
dtype = cast(CategoricalDtype, dtype)
560560
subarr = dtype.construct_array_type()(
561-
arr, dtype.categories, ordered=dtype._ordered
561+
arr, dtype.categories, ordered=dtype.ordered
562562
)
563563
elif is_extension_array_dtype(dtype):
564564
# create an extension array from its dtype

pandas/core/dtypes/dtypes.py

+49-52
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
""" define extension dtypes """
22
import re
33
from typing import Any, Dict, List, MutableMapping, Optional, Tuple, Type, Union, cast
4-
import warnings
54

65
import numpy as np
76
import pytz
@@ -18,10 +17,6 @@
1817

1918
str_type = str
2019

21-
# GH26403: sentinel value used for the default value of ordered in the
22-
# CategoricalDtype constructor to detect when ordered=None is explicitly passed
23-
ordered_sentinel: object = object()
24-
2520

2621
def register_extension_dtype(cls: Type[ExtensionDtype]) -> Type[ExtensionDtype]:
2722
"""
@@ -179,7 +174,11 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype):
179174
----------
180175
categories : sequence, optional
181176
Must be unique, and must not contain any nulls.
182-
ordered : bool, default False
177+
ordered : bool or None, default False
178+
Whether or not this categorical is treated as a ordered categorical.
179+
None can be used to maintain the ordered value of existing categoricals when
180+
used in operations that combine categoricals, e.g. astype, and will resolve to
181+
False if there is no existing ordered to maintain.
183182
184183
Attributes
185184
----------
@@ -218,14 +217,10 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype):
218217
kind: str_type = "O"
219218
str = "|O08"
220219
base = np.dtype("O")
221-
_metadata = ("categories", "ordered", "_ordered_from_sentinel")
220+
_metadata = ("categories", "ordered")
222221
_cache: Dict[str_type, PandasExtensionDtype] = {}
223222

224-
def __init__(
225-
self, categories=None, ordered: Union[Ordered, object] = ordered_sentinel
226-
):
227-
# TODO(GH26403): Set type of ordered to Ordered
228-
ordered = cast(Ordered, ordered)
223+
def __init__(self, categories=None, ordered: Ordered = False):
229224
self._finalize(categories, ordered, fastpath=False)
230225

231226
@classmethod
@@ -338,36 +333,63 @@ def _from_values_or_dtype(
338333

339334
return dtype
340335

336+
@classmethod
337+
def construct_from_string(cls, string: str_type) -> "CategoricalDtype":
338+
"""
339+
Construct a CategoricalDtype from a string.
340+
341+
Parameters
342+
----------
343+
string : str
344+
Must be the string "category" in order to be successfully constructed.
345+
346+
Returns
347+
-------
348+
CategoricalDtype
349+
Instance of the dtype.
350+
351+
Raises
352+
------
353+
TypeError
354+
If a CategoricalDtype cannot be constructed from the input.
355+
"""
356+
if not isinstance(string, str):
357+
raise TypeError(f"Expects a string, got {type(string)}")
358+
if string != cls.name:
359+
raise TypeError(f"Cannot construct a 'CategoricalDtype' from '{string}'")
360+
361+
# need ordered=None to ensure that operations specifying dtype="category" don't
362+
# override the ordered value for existing categoricals
363+
return cls(ordered=None)
364+
341365
def _finalize(self, categories, ordered: Ordered, fastpath: bool = False) -> None:
342366

343-
if ordered is not None and ordered is not ordered_sentinel:
367+
if ordered is not None:
344368
self.validate_ordered(ordered)
345369

346370
if categories is not None:
347371
categories = self.validate_categories(categories, fastpath=fastpath)
348372

349373
self._categories = categories
350-
self._ordered = ordered if ordered is not ordered_sentinel else None
351-
self._ordered_from_sentinel = ordered is ordered_sentinel
374+
self._ordered = ordered
352375

353376
def __setstate__(self, state: MutableMapping[str_type, Any]) -> None:
354377
# for pickle compat. __get_state__ is defined in the
355378
# PandasExtensionDtype superclass and uses the public properties to
356379
# pickle -> need to set the settable private ones here (see GH26067)
357380
self._categories = state.pop("categories", None)
358381
self._ordered = state.pop("ordered", False)
359-
self._ordered_from_sentinel = state.pop("_ordered_from_sentinel", False)
360382

361383
def __hash__(self) -> int:
362384
# _hash_categories returns a uint64, so use the negative
363385
# space for when we have unknown categories to avoid a conflict
364386
if self.categories is None:
365-
if self._ordered:
387+
if self.ordered:
366388
return -1
367389
else:
368390
return -2
369391
# We *do* want to include the real self.ordered here
370-
return int(self._hash_categories(self.categories, self._ordered))
392+
return int(self._hash_categories(self.categories, self.ordered))
371393

372394
def __eq__(self, other: Any) -> bool:
373395
"""
@@ -386,7 +408,7 @@ def __eq__(self, other: Any) -> bool:
386408
return other == self.name
387409
elif other is self:
388410
return True
389-
elif not (hasattr(other, "_ordered") and hasattr(other, "categories")):
411+
elif not (hasattr(other, "ordered") and hasattr(other, "categories")):
390412
return False
391413
elif self.categories is None or other.categories is None:
392414
# We're forced into a suboptimal corner thanks to math and
@@ -395,10 +417,10 @@ def __eq__(self, other: Any) -> bool:
395417
# CDT(., .) = CDT(None, False) and *all*
396418
# CDT(., .) = CDT(None, True).
397419
return True
398-
elif self._ordered or other._ordered:
420+
elif self.ordered or other.ordered:
399421
# At least one has ordered=True; equal if both have ordered=True
400422
# and the same values for categories in the same order.
401-
return (self._ordered == other._ordered) and self.categories.equals(
423+
return (self.ordered == other.ordered) and self.categories.equals(
402424
other.categories
403425
)
404426
else:
@@ -420,7 +442,7 @@ def __repr__(self) -> str_type:
420442
data = "None, "
421443
else:
422444
data = self.categories._format_data(name=type(self).__name__)
423-
return tpl.format(data=data, ordered=self._ordered)
445+
return tpl.format(data=data, ordered=self.ordered)
424446

425447
@staticmethod
426448
def _hash_categories(categories, ordered: Ordered = True) -> int:
@@ -557,26 +579,11 @@ def update_dtype(
557579
# from here on, dtype is a CategoricalDtype
558580
dtype = cast(CategoricalDtype, dtype)
559581

560-
# dtype is CDT: keep current categories/ordered if None
561-
new_categories = dtype.categories
562-
if new_categories is None:
563-
new_categories = self.categories
564-
565-
new_ordered = dtype._ordered
566-
new_ordered_from_sentinel = dtype._ordered_from_sentinel
567-
if new_ordered is None:
568-
# maintain existing ordered if new dtype has ordered=None
569-
new_ordered = self._ordered
570-
if self._ordered and new_ordered_from_sentinel:
571-
# only warn if we'd actually change the existing behavior
572-
msg = (
573-
"Constructing a CategoricalDtype without specifying "
574-
"`ordered` will default to `ordered=False` in a future "
575-
"version, which will cause the resulting categorical's "
576-
"`ordered` attribute to change to False; `ordered=True` "
577-
"must be explicitly passed in order to be retained"
578-
)
579-
warnings.warn(msg, FutureWarning, stacklevel=3)
582+
# update categories/ordered unless they've been explicitly passed as None
583+
new_categories = (
584+
dtype.categories if dtype.categories is not None else self.categories
585+
)
586+
new_ordered = dtype.ordered if dtype.ordered is not None else self.ordered
580587

581588
return CategoricalDtype(new_categories, new_ordered)
582589

@@ -592,16 +599,6 @@ def ordered(self) -> Ordered:
592599
"""
593600
Whether the categories have an ordered relationship.
594601
"""
595-
# TODO: remove if block when ordered=None as default is deprecated
596-
if self._ordered_from_sentinel and self._ordered is None:
597-
# warn when accessing ordered if ordered=None and None was not
598-
# explicitly passed to the constructor
599-
msg = (
600-
"Constructing a CategoricalDtype without specifying "
601-
"`ordered` will default to `ordered=False` in a future "
602-
"version; `ordered=None` must be explicitly passed."
603-
)
604-
warnings.warn(msg, FutureWarning, stacklevel=2)
605602
return self._ordered
606603

607604
@property

pandas/core/series.py

-10
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
_is_unorderable_exception,
2121
ensure_platform_int,
2222
is_bool,
23-
is_categorical,
2423
is_categorical_dtype,
2524
is_datetime64_dtype,
2625
is_dict_like,
@@ -213,15 +212,6 @@ def __init__(
213212
if data is None:
214213
data = {}
215214
if dtype is not None:
216-
# GH 26336: explicitly handle 'category' to avoid warning
217-
# TODO: Remove after CategoricalDtype defaults to ordered=False
218-
if (
219-
isinstance(dtype, str)
220-
and dtype == "category"
221-
and is_categorical(data)
222-
):
223-
dtype = data.dtype
224-
225215
dtype = self._validate_dtype(dtype)
226216

227217
if isinstance(data, MultiIndex):

pandas/tests/arrays/categorical/test_dtypes.py

-8
Original file line numberDiff line numberDiff line change
@@ -161,14 +161,6 @@ def test_astype_category(self, dtype_ordered, cat_ordered):
161161
expected = cat
162162
tm.assert_categorical_equal(result, expected)
163163

164-
def test_astype_category_ordered_none_deprecated(self):
165-
# GH 26336
166-
cdt1 = CategoricalDtype(categories=list("cdab"), ordered=True)
167-
cdt2 = CategoricalDtype(categories=list("cedafb"))
168-
cat = Categorical(list("abcdaba"), dtype=cdt1)
169-
with tm.assert_produces_warning(FutureWarning):
170-
cat.astype(cdt2)
171-
172164
def test_iter_python_types(self):
173165
# GH-19909
174166
cat = Categorical([1, 2])

pandas/tests/dtypes/test_dtypes.py

+8-42
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
DatetimeTZDtype,
2323
IntervalDtype,
2424
PeriodDtype,
25-
ordered_sentinel,
2625
registry,
2726
)
2827

@@ -65,8 +64,7 @@ def test_pickle(self):
6564

6665
class TestCategoricalDtype(Base):
6766
def create(self):
68-
# TODO(GH 26403): Remove when default ordered becomes False
69-
return CategoricalDtype(ordered=None)
67+
return CategoricalDtype()
7068

7169
def test_pickle(self):
7270
# make sure our cache is NOT pickled
@@ -721,8 +719,7 @@ def test_unordered_same(self, ordered):
721719
def test_categories(self):
722720
result = CategoricalDtype(["a", "b", "c"])
723721
tm.assert_index_equal(result.categories, pd.Index(["a", "b", "c"]))
724-
with tm.assert_produces_warning(FutureWarning):
725-
assert result.ordered is None
722+
assert result.ordered is False
726723

727724
def test_equal_but_different(self, ordered_fixture):
728725
c1 = CategoricalDtype([1, 2, 3])
@@ -847,25 +844,15 @@ def test_categorical_categories(self):
847844
@pytest.mark.parametrize(
848845
"new_categories", [list("abc"), list("cba"), list("wxyz"), None]
849846
)
850-
@pytest.mark.parametrize("new_ordered", [True, False, None, ordered_sentinel])
847+
@pytest.mark.parametrize("new_ordered", [True, False, None])
851848
def test_update_dtype(self, ordered_fixture, new_categories, new_ordered):
852-
dtype = CategoricalDtype(list("abc"), ordered_fixture)
849+
original_categories = list("abc")
850+
dtype = CategoricalDtype(original_categories, ordered_fixture)
853851
new_dtype = CategoricalDtype(new_categories, new_ordered)
854852

855-
expected_categories = new_dtype.categories
856-
if expected_categories is None:
857-
expected_categories = dtype.categories
858-
859-
expected_ordered = new_ordered
860-
if new_ordered is ordered_sentinel or new_ordered is None:
861-
expected_ordered = dtype.ordered
862-
863-
# GH 26336
864-
if new_ordered is ordered_sentinel and ordered_fixture is True:
865-
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
866-
result = dtype.update_dtype(new_dtype)
867-
else:
868-
result = dtype.update_dtype(new_dtype)
853+
result = dtype.update_dtype(new_dtype)
854+
expected_categories = pd.Index(new_categories or original_categories)
855+
expected_ordered = new_ordered if new_ordered is not None else dtype.ordered
869856

870857
tm.assert_index_equal(result.categories, expected_categories)
871858
assert result.ordered is expected_ordered
@@ -885,27 +872,6 @@ def test_update_dtype_errors(self, bad_dtype):
885872
with pytest.raises(ValueError, match=msg):
886873
dtype.update_dtype(bad_dtype)
887874

888-
@pytest.mark.parametrize("ordered", [ordered_sentinel, None, True, False])
889-
def test_ordered_none_default_deprecated(self, ordered):
890-
# GH 26403: CDT.ordered only warns if ordered is not explicitly passed
891-
dtype = CategoricalDtype(list("abc"), ordered=ordered)
892-
warning = FutureWarning if ordered is ordered_sentinel else None
893-
with tm.assert_produces_warning(warning):
894-
dtype.ordered
895-
896-
@pytest.mark.parametrize("ordered", [True, False, None, ordered_sentinel])
897-
def test_pickle_ordered_from_sentinel(self, ordered):
898-
# GH 27295: can remove test when _ordered_from_sentinel is removed (GH 26403)
899-
dtype = CategoricalDtype(categories=list("abc"), ordered=ordered)
900-
901-
warning = FutureWarning if ordered is ordered_sentinel else None
902-
with tm.assert_produces_warning(warning, check_stacklevel=False):
903-
dtype_from_pickle = tm.round_trip_pickle(dtype)
904-
905-
result = dtype_from_pickle._ordered_from_sentinel
906-
expected = ordered is ordered_sentinel
907-
assert result is expected
908-
909875

910876
@pytest.mark.parametrize(
911877
"dtype", [CategoricalDtype, IntervalDtype, DatetimeTZDtype, PeriodDtype]

0 commit comments

Comments
 (0)