Skip to content

TYP: pandas/core/dtypes/dtypes.py #31384

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

74 changes: 51 additions & 23 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,18 @@
"""

import re
from typing import Any, Dict, List, MutableMapping, Optional, Tuple, Type, Union, cast
from typing import (
TYPE_CHECKING,
Any,
Dict,
List,
MutableMapping,
Optional,
Tuple,
Type,
Union,
cast,
)

import numpy as np
import pytz
Expand All @@ -16,6 +27,15 @@
from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCDateOffset, ABCIndexClass
from pandas.core.dtypes.inference import is_bool, is_list_like

if TYPE_CHECKING:
import pyarrow # noqa: F401
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This means that running mypy (eg in a pre-commit hook) requires pyarrow to be installed? (which is not a required dependency?)
Are we fine with that? (do we already do that for other deps?)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This means that running mypy (eg in a pre-commit hook) requires pyarrow to be installed?

no. mypy is a static checker.

The behavior if pyarrow is not installed depends on the strictness of the type checking.(ignore_missing_imports config option)

in general,"mypy will assume the type of that module is Any, the dynamic type. This means attempting to access any attribute of the module will automatically succeed"
https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports

from pandas.core.arrays import ( # noqa: F401
IntervalArray,
PeriodArray,
DatetimeArray,
)
from pandas import Categorical # noqa: F401

str_type = str


Expand Down Expand Up @@ -68,7 +88,7 @@ def register(self, dtype: Type[ExtensionDtype]) -> None:
"""
Parameters
----------
dtype : ExtensionDtype
dtype : ExtensionDtype class
"""
if not issubclass(dtype, ExtensionDtype):
raise ValueError("can only register pandas extension dtypes")
Expand Down Expand Up @@ -122,7 +142,7 @@ class PandasExtensionDtype(ExtensionDtype):
# and ExtensionDtype's @properties in the subclasses below. The kind and
# type variables in those subclasses are explicitly typed below.
subdtype = None
str: Optional[str_type] = None
str: str_type
num = 100
shape: Tuple[int, ...] = tuple()
itemsize = 8
Expand Down Expand Up @@ -500,15 +520,15 @@ def _hash_categories(categories, ordered: Ordered = True) -> int:
return np.bitwise_xor.reduce(hashed)

@classmethod
def construct_array_type(cls):
def construct_array_type(cls) -> Type["Categorical"]:
"""
Return the array type associated with this dtype.

Returns
-------
type
"""
from pandas import Categorical
from pandas import Categorical # noqa: F811

return Categorical

Expand Down Expand Up @@ -672,9 +692,9 @@ class DatetimeTZDtype(PandasExtensionDtype):
_match = re.compile(r"(datetime64|M8)\[(?P<unit>.+), (?P<tz>.+)\]")
_cache: Dict[str_type, PandasExtensionDtype] = {}

def __init__(self, unit="ns", tz=None):
def __init__(self, unit: Union[str_type, "DatetimeTZDtype"] = "ns", tz=None):
if isinstance(unit, DatetimeTZDtype):
unit, tz = unit.unit, unit.tz
unit, tz = unit.unit, unit.tz # type: ignore

if unit != "ns":
if isinstance(unit, str) and tz is None:
Expand Down Expand Up @@ -704,7 +724,7 @@ def __init__(self, unit="ns", tz=None):
self._tz = tz

@property
def unit(self):
def unit(self) -> str_type:
"""
The precision of the datetime data.
"""
Expand All @@ -718,20 +738,20 @@ def tz(self):
return self._tz

@classmethod
def construct_array_type(cls):
def construct_array_type(cls) -> Type["DatetimeArray"]:
"""
Return the array type associated with this dtype.

Returns
-------
type
"""
from pandas.core.arrays import DatetimeArray
from pandas.core.arrays import DatetimeArray # noqa: F811

return DatetimeArray

@classmethod
def construct_from_string(cls, string: str_type):
def construct_from_string(cls, string: str_type) -> "DatetimeTZDtype":
"""
Construct a DatetimeTZDtype from a string.

Expand Down Expand Up @@ -789,7 +809,7 @@ def __eq__(self, other: Any) -> bool:
and str(self.tz) == str(other.tz)
)

def __setstate__(self, state):
def __setstate__(self, state) -> None:
# for pickle compat. __get_state__ is defined in the
# PandasExtensionDtype superclass and uses the public properties to
# pickle -> need to set the settable private ones here (see GH26067)
Expand Down Expand Up @@ -884,7 +904,7 @@ def _parse_dtype_strict(cls, freq):
raise ValueError("could not construct PeriodDtype")

@classmethod
def construct_from_string(cls, string):
def construct_from_string(cls, string: str_type) -> "PeriodDtype":
"""
Strict construction from a string, raise a TypeError if not
possible
Expand Down Expand Up @@ -934,7 +954,7 @@ def __setstate__(self, state):
self._freq = state["freq"]

@classmethod
def is_dtype(cls, dtype) -> bool:
def is_dtype(cls, dtype: object) -> bool:
"""
Return a boolean if we if the passed type is an actual dtype that we
can match (via string or type)
Expand All @@ -955,7 +975,7 @@ def is_dtype(cls, dtype) -> bool:
return super().is_dtype(dtype)

@classmethod
def construct_array_type(cls):
def construct_array_type(cls) -> Type["PeriodArray"]:
"""
Return the array type associated with this dtype.

Expand All @@ -967,9 +987,13 @@ def construct_array_type(cls):

return PeriodArray

def __from_arrow__(self, array):
"""Construct PeriodArray from pyarrow Array/ChunkedArray."""
import pyarrow
def __from_arrow__(
self, array: Union["pyarrow.Array", "pyarrow.ChunkedArray"]
) -> "PeriodArray":
"""
Construct PeriodArray from pyarrow Array/ChunkedArray.
"""
import pyarrow # noqa: F811
from pandas.core.arrays import PeriodArray
from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask

Expand Down Expand Up @@ -1075,7 +1099,7 @@ def subtype(self):
return self._subtype

@classmethod
def construct_array_type(cls):
def construct_array_type(cls) -> Type["IntervalArray"]:
"""
Return the array type associated with this dtype.

Expand Down Expand Up @@ -1142,7 +1166,7 @@ def __setstate__(self, state):
self._subtype = state["subtype"]

@classmethod
def is_dtype(cls, dtype) -> bool:
def is_dtype(cls, dtype: object) -> bool:
"""
Return a boolean if we if the passed type is an actual dtype that we
can match (via string or type)
Expand All @@ -1160,9 +1184,13 @@ def is_dtype(cls, dtype) -> bool:
return False
return super().is_dtype(dtype)

def __from_arrow__(self, array):
"""Construct IntervalArray from pyarrow Array/ChunkedArray."""
import pyarrow
def __from_arrow__(
self, array: Union["pyarrow.Array", "pyarrow.ChunkedArray"]
) -> "IntervalArray":
"""
Construct IntervalArray from pyarrow Array/ChunkedArray.
"""
import pyarrow # noqa: F811
from pandas.core.arrays import IntervalArray

if isinstance(array, pyarrow.Array):
Expand Down