Skip to content

TYPING: Partial typing of Categorical #27318

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 25, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pandas/_typing.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pathlib import Path
from typing import IO, TYPE_CHECKING, AnyStr, TypeVar, Union
from typing import IO, TYPE_CHECKING, AnyStr, Optional, TypeVar, Union

import numpy as np

Expand Down Expand Up @@ -27,3 +27,4 @@
FrameOrSeries = TypeVar("FrameOrSeries", "Series", "DataFrame")
Scalar = Union[str, int, float]
Axis = Union[str, int]
Ordered = Optional[bool]
28 changes: 15 additions & 13 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from shutil import get_terminal_size
import textwrap
from typing import Type, Union, cast
from warnings import warn

import numpy as np
Expand Down Expand Up @@ -47,6 +48,7 @@
from pandas.core.dtypes.inference import is_hashable
from pandas.core.dtypes.missing import isna, notna

from pandas._typing import ArrayLike, Dtype, Ordered
from pandas.core import ops
from pandas.core.accessor import PandasDelegate, delegate_names
import pandas.core.algorithms as algorithms
Expand Down Expand Up @@ -473,7 +475,7 @@ def categories(self, categories):
self._dtype = new_dtype

@property
def ordered(self):
def ordered(self) -> Ordered:
"""
Whether the categories have an ordered relationship.
"""
Expand All @@ -487,11 +489,11 @@ def dtype(self) -> CategoricalDtype:
return self._dtype

@property
def _ndarray_values(self):
def _ndarray_values(self) -> np.ndarray:
return self.codes

@property
def _constructor(self):
def _constructor(self) -> Type["Categorical"]:
return Categorical

@classmethod
Expand All @@ -502,15 +504,15 @@ def _formatter(self, boxed=False):
# Defer to CategoricalFormatter's formatter.
return None

def copy(self):
def copy(self) -> "Categorical":
"""
Copy constructor.
"""
return self._constructor(
values=self._codes.copy(), dtype=self.dtype, fastpath=True
)

def astype(self, dtype, copy=True):
def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
"""
Coerce this type to another dtype

Expand All @@ -523,6 +525,8 @@ def astype(self, dtype, copy=True):
object is returned.
"""
if is_categorical_dtype(dtype):
dtype = cast(Union[str, CategoricalDtype], dtype)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mypy is not able to infer the dtype type here, so I have to do a cast.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For a typing dummy, can you explain why this is needed?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can add something to the contributing guide about this


# GH 10696/18593
dtype = self.dtype.update_dtype(dtype)
self = self.copy() if copy else self
Expand All @@ -532,27 +536,27 @@ def astype(self, dtype, copy=True):
return np.array(self, dtype=dtype, copy=copy)

@cache_readonly
def ndim(self):
def ndim(self) -> int:
"""
Number of dimensions of the Categorical
"""
return self._codes.ndim

@cache_readonly
def size(self):
def size(self) -> int:
"""
return the len of myself
"""
return len(self)

@cache_readonly
def itemsize(self):
def itemsize(self) -> int:
"""
return the size of a single category
"""
return self.categories.itemsize

def tolist(self):
def tolist(self) -> list:
"""
Return a list of the values.

Expand All @@ -565,7 +569,7 @@ def tolist(self):
to_list = tolist

@property
def base(self):
def base(self) -> None:
"""
compat, we are always our own object
"""
Expand Down Expand Up @@ -712,8 +716,6 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None):

return cls(codes, dtype=dtype, fastpath=True)

_codes = None

def _get_codes(self):
"""
Get the codes.
Expand Down Expand Up @@ -773,7 +775,7 @@ def _set_categories(self, categories, fastpath=False):

self._dtype = new_dtype

def _set_dtype(self, dtype):
def _set_dtype(self, dtype: CategoricalDtype) -> "Categorical":
"""
Internal method for directly updating the CategoricalDtype

Expand Down
32 changes: 19 additions & 13 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
""" define extension dtypes """
import re
from typing import Any, Dict, List, Optional, Tuple, Type, Union
from typing import Any, Dict, List, Optional, Tuple, Type, Union, cast
import warnings

import numpy as np
Expand All @@ -11,6 +11,8 @@

from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCDateOffset, ABCIndexClass

from pandas._typing import Ordered

from .base import ExtensionDtype
from .inference import is_bool, is_list_like

Expand All @@ -20,9 +22,6 @@
# CategoricalDtype constructor to detect when ordered=None is explicitly passed
ordered_sentinel = object() # type: object

# TODO(GH26403): Replace with Optional[bool] or bool
OrderedType = Union[None, bool, object]


def register_extension_dtype(cls: Type[ExtensionDtype],) -> Type[ExtensionDtype]:
"""
Expand Down Expand Up @@ -222,7 +221,11 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype):
_metadata = ("categories", "ordered", "_ordered_from_sentinel")
_cache = {} # type: Dict[str_type, PandasExtensionDtype]

def __init__(self, categories=None, ordered: OrderedType = ordered_sentinel):
def __init__(
self, categories=None, ordered: Union[Ordered, object] = ordered_sentinel
):
# TODO(GH26403): Set type of ordered to Ordered
ordered = cast(Ordered, ordered)
self._finalize(categories, ordered, fastpath=False)

@classmethod
Expand All @@ -235,7 +238,7 @@ def _from_fastpath(

@classmethod
def _from_categorical_dtype(
cls, dtype: "CategoricalDtype", categories=None, ordered: OrderedType = None
cls, dtype: "CategoricalDtype", categories=None, ordered: Ordered = None
) -> "CategoricalDtype":
if categories is ordered is None:
return dtype
Expand Down Expand Up @@ -336,9 +339,7 @@ def _from_values_or_dtype(

return dtype

def _finalize(
self, categories, ordered: OrderedType, fastpath: bool = False
) -> None:
def _finalize(self, categories, ordered: Ordered, fastpath: bool = False) -> None:

if ordered is not None and ordered is not ordered_sentinel:
self.validate_ordered(ordered)
Expand Down Expand Up @@ -423,7 +424,7 @@ def __repr__(self):
return tpl.format(data, self._ordered)

@staticmethod
def _hash_categories(categories, ordered: OrderedType = True) -> int:
def _hash_categories(categories, ordered: Ordered = True) -> int:
from pandas.core.util.hashing import (
hash_array,
_combine_hash_arrays,
Expand Down Expand Up @@ -475,7 +476,7 @@ def construct_array_type(cls):
return Categorical

@staticmethod
def validate_ordered(ordered: OrderedType) -> None:
def validate_ordered(ordered: Ordered) -> None:
"""
Validates that we have a valid ordered parameter. If
it is not a boolean, a TypeError will be raised.
Expand Down Expand Up @@ -529,7 +530,9 @@ def validate_categories(categories, fastpath: bool = False):

return categories

def update_dtype(self, dtype: "CategoricalDtype") -> "CategoricalDtype":
def update_dtype(
self, dtype: Union[str_type, "CategoricalDtype"]
) -> "CategoricalDtype":
"""
Returns a CategoricalDtype with categories and ordered taken from dtype
if specified, otherwise falling back to self if unspecified
Expand All @@ -551,6 +554,9 @@ def update_dtype(self, dtype: "CategoricalDtype") -> "CategoricalDtype":
"got {dtype!r}"
).format(dtype=dtype)
raise ValueError(msg)
else:
# from here on, dtype is a CategoricalDtype
dtype = cast(CategoricalDtype, dtype)

# dtype is CDT: keep current categories/ordered if None
new_categories = dtype.categories
Expand Down Expand Up @@ -583,7 +589,7 @@ def categories(self):
return self._categories

@property
def ordered(self) -> OrderedType:
def ordered(self) -> Ordered:
"""
Whether the categories have an ordered relationship.
"""
Expand Down