From 0a3b2f70c6bd1cb306b5b96f46894b83dbb8274d Mon Sep 17 00:00:00 2001 From: tp Date: Tue, 9 Jul 2019 00:21:20 +0100 Subject: [PATCH 1/2] TYPIMG: Partial typing of Categorical --- pandas/_typing.py | 3 +++ pandas/core/arrays/categorical.py | 26 +++++++++++++++----------- pandas/core/dtypes/dtypes.py | 13 ++++++++----- 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index 45c43fa958caa..a185513e37105 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -27,3 +27,6 @@ FrameOrSeries = TypeVar("FrameOrSeries", "Series", "DataFrame") Scalar = Union[str, int, float] Axis = Union[str, int] + +# TODO(GH26403): Replace with Optional[bool] or bool +OrderedType = Union[None, bool, object] diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 6200cd14663f8..53ea26deddad0 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1,5 +1,6 @@ from shutil import get_terminal_size import textwrap +from typing import Type, Union, cast from warnings import warn import numpy as np @@ -47,6 +48,7 @@ from pandas.core.dtypes.inference import is_hashable from pandas.core.dtypes.missing import isna, notna +from pandas._typing import ArrayLike, Dtype, OrderedType from pandas.core import ops from pandas.core.accessor import PandasDelegate, delegate_names import pandas.core.algorithms as algorithms @@ -473,7 +475,7 @@ def categories(self, categories): self._dtype = new_dtype @property - def ordered(self): + def ordered(self) -> OrderedType: """ Whether the categories have an ordered relationship. """ @@ -487,11 +489,11 @@ def dtype(self) -> CategoricalDtype: return self._dtype @property - def _ndarray_values(self): + def _ndarray_values(self) -> np.ndarray: return self.codes @property - def _constructor(self): + def _constructor(self) -> Type["Categorical"]: return Categorical @classmethod @@ -502,7 +504,7 @@ def _formatter(self, boxed=False): # Defer to CategoricalFormatter's formatter. return None - def copy(self): + def copy(self) -> "Categorical": """ Copy constructor. """ @@ -510,7 +512,7 @@ def copy(self): values=self._codes.copy(), dtype=self.dtype, fastpath=True ) - def astype(self, dtype, copy=True): + def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: """ Coerce this type to another dtype @@ -523,6 +525,8 @@ def astype(self, dtype, copy=True): object is returned. """ if is_categorical_dtype(dtype): + dtype = cast(Union[str, CategoricalDtype], dtype) + # GH 10696/18593 dtype = self.dtype.update_dtype(dtype) self = self.copy() if copy else self @@ -532,27 +536,27 @@ def astype(self, dtype, copy=True): return np.array(self, dtype=dtype, copy=copy) @cache_readonly - def ndim(self): + def ndim(self) -> int: """ Number of dimensions of the Categorical """ return self._codes.ndim @cache_readonly - def size(self): + def size(self) -> int: """ return the len of myself """ return len(self) @cache_readonly - def itemsize(self): + def itemsize(self) -> int: """ return the size of a single category """ return self.categories.itemsize - def tolist(self): + def tolist(self) -> list: """ Return a list of the values. @@ -565,7 +569,7 @@ def tolist(self): to_list = tolist @property - def base(self): + def base(self) -> None: """ compat, we are always our own object """ @@ -773,7 +777,7 @@ def _set_categories(self, categories, fastpath=False): self._dtype = new_dtype - def _set_dtype(self, dtype): + def _set_dtype(self, dtype: CategoricalDtype) -> "Categorical": """ Internal method for directly updating the CategoricalDtype diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index bba551bd30a2d..0ef234a22a9a0 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1,6 +1,6 @@ """ define extension dtypes """ import re -from typing import Any, Dict, List, Optional, Tuple, Type, Union +from typing import Any, Dict, List, Optional, Tuple, Type, Union, cast import warnings import numpy as np @@ -11,6 +11,8 @@ from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCDateOffset, ABCIndexClass +from pandas._typing import OrderedType + from .base import ExtensionDtype from .inference import is_bool, is_list_like @@ -20,9 +22,6 @@ # CategoricalDtype constructor to detect when ordered=None is explicitly passed ordered_sentinel = object() # type: object -# TODO(GH26403): Replace with Optional[bool] or bool -OrderedType = Union[None, bool, object] - def register_extension_dtype(cls: Type[ExtensionDtype],) -> Type[ExtensionDtype]: """ @@ -529,7 +528,9 @@ def validate_categories(categories, fastpath: bool = False): return categories - def update_dtype(self, dtype: "CategoricalDtype") -> "CategoricalDtype": + def update_dtype( + self, dtype: Union[str_type, "CategoricalDtype"] + ) -> "CategoricalDtype": """ Returns a CategoricalDtype with categories and ordered taken from dtype if specified, otherwise falling back to self if unspecified @@ -552,6 +553,8 @@ def update_dtype(self, dtype: "CategoricalDtype") -> "CategoricalDtype": ).format(dtype=dtype) raise ValueError(msg) + dtype = cast(CategoricalDtype, dtype) + # dtype is CDT: keep current categories/ordered if None new_categories = dtype.categories if new_categories is None: From b76f27ed5edcb5f2f04004b98e936f3d86fb46bd Mon Sep 17 00:00:00 2001 From: tp Date: Wed, 24 Jul 2019 23:53:58 +0100 Subject: [PATCH 2/2] Changes --- pandas/_typing.py | 6 ++---- pandas/core/arrays/categorical.py | 6 ++---- pandas/core/dtypes/dtypes.py | 25 ++++++++++++++----------- 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index a185513e37105..837a7a89e0b83 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import IO, TYPE_CHECKING, AnyStr, TypeVar, Union +from typing import IO, TYPE_CHECKING, AnyStr, Optional, TypeVar, Union import numpy as np @@ -27,6 +27,4 @@ FrameOrSeries = TypeVar("FrameOrSeries", "Series", "DataFrame") Scalar = Union[str, int, float] Axis = Union[str, int] - -# TODO(GH26403): Replace with Optional[bool] or bool -OrderedType = Union[None, bool, object] +Ordered = Optional[bool] diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 53ea26deddad0..50ea8d2f4d7a7 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -48,7 +48,7 @@ from pandas.core.dtypes.inference import is_hashable from pandas.core.dtypes.missing import isna, notna -from pandas._typing import ArrayLike, Dtype, OrderedType +from pandas._typing import ArrayLike, Dtype, Ordered from pandas.core import ops from pandas.core.accessor import PandasDelegate, delegate_names import pandas.core.algorithms as algorithms @@ -475,7 +475,7 @@ def categories(self, categories): self._dtype = new_dtype @property - def ordered(self) -> OrderedType: + def ordered(self) -> Ordered: """ Whether the categories have an ordered relationship. """ @@ -716,8 +716,6 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None): return cls(codes, dtype=dtype, fastpath=True) - _codes = None - def _get_codes(self): """ Get the codes. diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 0ef234a22a9a0..ee1866e60644b 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -11,7 +11,7 @@ from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCDateOffset, ABCIndexClass -from pandas._typing import OrderedType +from pandas._typing import Ordered from .base import ExtensionDtype from .inference import is_bool, is_list_like @@ -221,7 +221,11 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype): _metadata = ("categories", "ordered", "_ordered_from_sentinel") _cache = {} # type: Dict[str_type, PandasExtensionDtype] - def __init__(self, categories=None, ordered: OrderedType = ordered_sentinel): + def __init__( + self, categories=None, ordered: Union[Ordered, object] = ordered_sentinel + ): + # TODO(GH26403): Set type of ordered to Ordered + ordered = cast(Ordered, ordered) self._finalize(categories, ordered, fastpath=False) @classmethod @@ -234,7 +238,7 @@ def _from_fastpath( @classmethod def _from_categorical_dtype( - cls, dtype: "CategoricalDtype", categories=None, ordered: OrderedType = None + cls, dtype: "CategoricalDtype", categories=None, ordered: Ordered = None ) -> "CategoricalDtype": if categories is ordered is None: return dtype @@ -335,9 +339,7 @@ def _from_values_or_dtype( return dtype - def _finalize( - self, categories, ordered: OrderedType, fastpath: bool = False - ) -> None: + def _finalize(self, categories, ordered: Ordered, fastpath: bool = False) -> None: if ordered is not None and ordered is not ordered_sentinel: self.validate_ordered(ordered) @@ -422,7 +424,7 @@ def __repr__(self): return tpl.format(data, self._ordered) @staticmethod - def _hash_categories(categories, ordered: OrderedType = True) -> int: + def _hash_categories(categories, ordered: Ordered = True) -> int: from pandas.core.util.hashing import ( hash_array, _combine_hash_arrays, @@ -474,7 +476,7 @@ def construct_array_type(cls): return Categorical @staticmethod - def validate_ordered(ordered: OrderedType) -> None: + def validate_ordered(ordered: Ordered) -> None: """ Validates that we have a valid ordered parameter. If it is not a boolean, a TypeError will be raised. @@ -552,8 +554,9 @@ def update_dtype( "got {dtype!r}" ).format(dtype=dtype) raise ValueError(msg) - - dtype = cast(CategoricalDtype, dtype) + else: + # from here on, dtype is a CategoricalDtype + dtype = cast(CategoricalDtype, dtype) # dtype is CDT: keep current categories/ordered if None new_categories = dtype.categories @@ -586,7 +589,7 @@ def categories(self): return self._categories @property - def ordered(self) -> OrderedType: + def ordered(self) -> Ordered: """ Whether the categories have an ordered relationship. """