-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
DEPR: Change default value for CategoricalDtype.ordered from None to False #29955
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
42eb1a4
4624065
462c383
5cae1ab
407375f
3f633bd
a3c4e63
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,6 @@ | ||
""" define extension dtypes """ | ||
import re | ||
from typing import Any, Dict, List, MutableMapping, Optional, Tuple, Type, Union, cast | ||
import warnings | ||
|
||
import numpy as np | ||
import pytz | ||
|
@@ -18,10 +17,6 @@ | |
|
||
str_type = str | ||
|
||
# GH26403: sentinel value used for the default value of ordered in the | ||
# CategoricalDtype constructor to detect when ordered=None is explicitly passed | ||
ordered_sentinel: object = object() | ||
|
||
|
||
def register_extension_dtype(cls: Type[ExtensionDtype]) -> Type[ExtensionDtype]: | ||
""" | ||
|
@@ -218,14 +213,10 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype): | |
kind: str_type = "O" | ||
str = "|O08" | ||
base = np.dtype("O") | ||
_metadata = ("categories", "ordered", "_ordered_from_sentinel") | ||
_metadata = ("categories", "ordered") | ||
_cache: Dict[str_type, PandasExtensionDtype] = {} | ||
|
||
def __init__( | ||
self, categories=None, ordered: Union[Ordered, object] = ordered_sentinel | ||
): | ||
# TODO(GH26403): Set type of ordered to Ordered | ||
ordered = cast(Ordered, ordered) | ||
def __init__(self, categories=None, ordered: Ordered = False): | ||
self._finalize(categories, ordered, fastpath=False) | ||
|
||
@classmethod | ||
|
@@ -338,36 +329,63 @@ def _from_values_or_dtype( | |
|
||
return dtype | ||
|
||
@classmethod | ||
def construct_from_string(cls, string: str_type) -> "CategoricalDtype": | ||
""" | ||
Construct a CategoricalDtype from a string. | ||
|
||
Parameters | ||
---------- | ||
string : str | ||
Must be the string "category" in order to be successfully constructed. | ||
|
||
Returns | ||
------- | ||
CategoricalDtype | ||
Instance of the dtype. | ||
|
||
Raises | ||
------ | ||
TypeError | ||
If a CategoricalDtype cannot be constructed from the input. | ||
""" | ||
if not isinstance(string, str): | ||
raise TypeError(f"Expects a string, got {type(string)}") | ||
if string != cls.name: | ||
raise TypeError(f"Cannot construct a 'CategoricalDtype' from '{string}'") | ||
|
||
# need ordered=None to ensure that operations specifying dtype="category" don't | ||
# override the ordered value for existing categoricals | ||
return cls(ordered=None) | ||
|
||
def _finalize(self, categories, ordered: Ordered, fastpath: bool = False) -> None: | ||
|
||
if ordered is not None and ordered is not ordered_sentinel: | ||
if ordered is not None: | ||
self.validate_ordered(ordered) | ||
|
||
if categories is not None: | ||
categories = self.validate_categories(categories, fastpath=fastpath) | ||
|
||
self._categories = categories | ||
self._ordered = ordered if ordered is not ordered_sentinel else None | ||
self._ordered_from_sentinel = ordered is ordered_sentinel | ||
self._ordered = ordered | ||
|
||
def __setstate__(self, state: MutableMapping[str_type, Any]) -> None: | ||
# for pickle compat. __get_state__ is defined in the | ||
# PandasExtensionDtype superclass and uses the public properties to | ||
# pickle -> need to set the settable private ones here (see GH26067) | ||
self._categories = state.pop("categories", None) | ||
self._ordered = state.pop("ordered", False) | ||
self._ordered_from_sentinel = state.pop("_ordered_from_sentinel", False) | ||
|
||
def __hash__(self) -> int: | ||
# _hash_categories returns a uint64, so use the negative | ||
# space for when we have unknown categories to avoid a conflict | ||
if self.categories is None: | ||
if self._ordered: | ||
if self.ordered: | ||
return -1 | ||
else: | ||
return -2 | ||
# We *do* want to include the real self.ordered here | ||
return int(self._hash_categories(self.categories, self._ordered)) | ||
return int(self._hash_categories(self.categories, self.ordered)) | ||
|
||
def __eq__(self, other: Any) -> bool: | ||
""" | ||
|
@@ -386,7 +404,7 @@ def __eq__(self, other: Any) -> bool: | |
return other == self.name | ||
elif other is self: | ||
return True | ||
elif not (hasattr(other, "_ordered") and hasattr(other, "categories")): | ||
elif not (hasattr(other, "ordered") and hasattr(other, "categories")): | ||
return False | ||
elif self.categories is None or other.categories is None: | ||
# We're forced into a suboptimal corner thanks to math and | ||
|
@@ -395,10 +413,10 @@ def __eq__(self, other: Any) -> bool: | |
# CDT(., .) = CDT(None, False) and *all* | ||
# CDT(., .) = CDT(None, True). | ||
return True | ||
elif self._ordered or other._ordered: | ||
elif self.ordered or other.ordered: | ||
# At least one has ordered=True; equal if both have ordered=True | ||
# and the same values for categories in the same order. | ||
return (self._ordered == other._ordered) and self.categories.equals( | ||
return (self.ordered == other.ordered) and self.categories.equals( | ||
other.categories | ||
) | ||
else: | ||
|
@@ -420,7 +438,7 @@ def __repr__(self) -> str_type: | |
data = "None, " | ||
else: | ||
data = self.categories._format_data(name=type(self).__name__) | ||
return tpl.format(data=data, ordered=self._ordered) | ||
return tpl.format(data=data, ordered=self.ordered) | ||
|
||
@staticmethod | ||
def _hash_categories(categories, ordered: Ordered = True) -> int: | ||
|
@@ -562,21 +580,9 @@ def update_dtype( | |
if new_categories is None: | ||
new_categories = self.categories | ||
|
||
new_ordered = dtype._ordered | ||
new_ordered_from_sentinel = dtype._ordered_from_sentinel | ||
new_ordered = dtype.ordered | ||
if new_ordered is None: | ||
# maintain existing ordered if new dtype has ordered=None | ||
new_ordered = self._ordered | ||
if self._ordered and new_ordered_from_sentinel: | ||
# only warn if we'd actually change the existing behavior | ||
msg = ( | ||
"Constructing a CategoricalDtype without specifying " | ||
"`ordered` will default to `ordered=False` in a future " | ||
"version, which will cause the resulting categorical's " | ||
"`ordered` attribute to change to False; `ordered=True` " | ||
"must be explicitly passed in order to be retained" | ||
) | ||
warnings.warn(msg, FutureWarning, stacklevel=3) | ||
new_ordered = self.ordered | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add a comment here why ordered can be None? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's a comment a little further up that actually addresses this (above the |
||
|
||
return CategoricalDtype(new_categories, new_ordered) | ||
|
||
|
@@ -592,16 +598,6 @@ def ordered(self) -> Ordered: | |
""" | ||
Whether the categories have an ordered relationship. | ||
""" | ||
# TODO: remove if block when ordered=None as default is deprecated | ||
if self._ordered_from_sentinel and self._ordered is None: | ||
# warn when accessing ordered if ordered=None and None was not | ||
# explicitly passed to the constructor | ||
msg = ( | ||
"Constructing a CategoricalDtype without specifying " | ||
"`ordered` will default to `ordered=False` in a future " | ||
"version; `ordered=None` must be explicitly passed." | ||
) | ||
warnings.warn(msg, FutureWarning, stacklevel=2) | ||
return self._ordered | ||
|
||
@property | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(that's the public path as in the api docs: https://dev.pandas.io/docs/reference/api/pandas.CategoricalDtype.html)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, updated to
:class:`CategoricalDtype`