Skip to content

TYP: pandas/core/dtypes/dtypes.py #31384

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

16 changes: 8 additions & 8 deletions pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -697,7 +697,7 @@ def integer_arithmetic_method(self, other):
"""

# create the Dtype
Int8Dtype = register_extension_dtype(
Int8Dtype: Type[_IntegerDtype] = register_extension_dtype(
type(
"Int8Dtype",
(_IntegerDtype,),
Expand All @@ -709,7 +709,7 @@ def integer_arithmetic_method(self, other):
)
)

Int16Dtype = register_extension_dtype(
Int16Dtype: Type[_IntegerDtype] = register_extension_dtype(
type(
"Int16Dtype",
(_IntegerDtype,),
Expand All @@ -721,7 +721,7 @@ def integer_arithmetic_method(self, other):
)
)

Int32Dtype = register_extension_dtype(
Int32Dtype: Type[_IntegerDtype] = register_extension_dtype(
type(
"Int32Dtype",
(_IntegerDtype,),
Expand All @@ -733,7 +733,7 @@ def integer_arithmetic_method(self, other):
)
)

Int64Dtype = register_extension_dtype(
Int64Dtype: Type[_IntegerDtype] = register_extension_dtype(
type(
"Int64Dtype",
(_IntegerDtype,),
Expand All @@ -745,7 +745,7 @@ def integer_arithmetic_method(self, other):
)
)

UInt8Dtype = register_extension_dtype(
UInt8Dtype: Type[_IntegerDtype] = register_extension_dtype(
type(
"UInt8Dtype",
(_IntegerDtype,),
Expand All @@ -757,7 +757,7 @@ def integer_arithmetic_method(self, other):
)
)

UInt16Dtype = register_extension_dtype(
UInt16Dtype: Type[_IntegerDtype] = register_extension_dtype(
type(
"UInt16Dtype",
(_IntegerDtype,),
Expand All @@ -769,7 +769,7 @@ def integer_arithmetic_method(self, other):
)
)

UInt32Dtype = register_extension_dtype(
UInt32Dtype: Type[_IntegerDtype] = register_extension_dtype(
type(
"UInt32Dtype",
(_IntegerDtype,),
Expand All @@ -781,7 +781,7 @@ def integer_arithmetic_method(self, other):
)
)

UInt64Dtype = register_extension_dtype(
UInt64Dtype: Type[_IntegerDtype] = register_extension_dtype(
type(
"UInt64Dtype",
(_IntegerDtype,),
Expand Down
131 changes: 81 additions & 50 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,18 @@
""" define extension dtypes """
import re
from typing import Any, Dict, List, MutableMapping, Optional, Tuple, Type, Union, cast
from typing import (
TYPE_CHECKING,
Any,
Dict,
List,
MutableMapping,
Optional,
Tuple,
Type,
TypeVar,
Union,
cast,
)

import numpy as np
import pytz
Expand All @@ -13,10 +25,21 @@
from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCDateOffset, ABCIndexClass
from pandas.core.dtypes.inference import is_bool, is_list_like

if TYPE_CHECKING:
import pyarrow # noqa: F401
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This means that running mypy (eg in a pre-commit hook) requires pyarrow to be installed? (which is not a required dependency?)
Are we fine with that? (do we already do that for other deps?)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This means that running mypy (eg in a pre-commit hook) requires pyarrow to be installed?

no. mypy is a static checker.

The behavior if pyarrow is not installed depends on the strictness of the type checking.(ignore_missing_imports config option)

in general,"mypy will assume the type of that module is Any, the dynamic type. This means attempting to access any attribute of the module will automatically succeed"
https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports

from pandas.core.arrays import ( # noqa: F401
IntervalArray,
PeriodArray,
DatetimeArray,
)
from pandas import Categorical # noqa: F401

str_type = str

ExtensionDtypeT = TypeVar("ExtensionDtypeT", bound=ExtensionDtype)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When do you need to use ExtensionDtypeT and when can you use ExtensionDtype (both are used in this file/diff)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not using a typevar for find may be an oversight, I'll look into whether necessary.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the only callsites are in pandas\core\dtypes\common.py, so find may need typevars once types are added there, and the callsites to there are typed etc. etc.

There does seem to be some resistance to typevars, shall I add now or wait till becomes necessity?

I've also just noticed that the type annotation for dtype in find is also incorrect...

the code is

            if not isinstance(dtype, type):
                dtype_type = type(dtype)
            if issubclass(dtype_type, ExtensionDtype):
                return dtype

so dtype parameter can also accept an instance as well as a type. again could update here or in a subsequent PR. only the return type was changed in this PR as this caused errors once construct_from_string was typed as this returns an instance, return annotation on master is only
type.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The typevar has been removed for now.



def register_extension_dtype(cls: Type[ExtensionDtype]) -> Type[ExtensionDtype]:
def register_extension_dtype(cls: Type[ExtensionDtypeT]) -> Type[ExtensionDtypeT]:
"""
Register an ExtensionType with pandas as class decorator.

Expand Down Expand Up @@ -65,7 +88,7 @@ def register(self, dtype: Type[ExtensionDtype]) -> None:
"""
Parameters
----------
dtype : ExtensionDtype
dtype : Type[ExtensionDtype]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you put this in a non-typing way? Eg "ExtensionDtype class"

"""
if not issubclass(dtype, ExtensionDtype):
raise ValueError("can only register pandas extension dtypes")
Expand All @@ -74,7 +97,7 @@ def register(self, dtype: Type[ExtensionDtype]) -> None:

def find(
self, dtype: Union[Type[ExtensionDtype], str]
) -> Optional[Type[ExtensionDtype]]:
) -> Optional[Union[ExtensionDtype, Type[ExtensionDtype]]]:
"""
Parameters
----------
Expand Down Expand Up @@ -119,7 +142,7 @@ class PandasExtensionDtype(ExtensionDtype):
# and ExtensionDtype's @properties in the subclasses below. The kind and
# type variables in those subclasses are explicitly typed below.
subdtype = None
str: Optional[str_type] = None
str: str_type
num = 100
shape: Tuple[int, ...] = tuple()
itemsize = 8
Expand Down Expand Up @@ -481,15 +504,15 @@ def _hash_categories(categories, ordered: Ordered = True) -> int:
return np.bitwise_xor.reduce(hashed)

@classmethod
def construct_array_type(cls):
def construct_array_type(cls) -> Type["Categorical"]:
"""
Return the array type associated with this dtype.

Returns
-------
type
"""
from pandas import Categorical
from pandas import Categorical # noqa: F811

return Categorical

Expand Down Expand Up @@ -653,39 +676,39 @@ class DatetimeTZDtype(PandasExtensionDtype):
_match = re.compile(r"(datetime64|M8)\[(?P<unit>.+), (?P<tz>.+)\]")
_cache: Dict[str_type, PandasExtensionDtype] = {}

def __init__(self, unit="ns", tz=None):
def __init__(self, unit: Union[str_type, "DatetimeTZDtype"] = "ns", tz=None):
if isinstance(unit, DatetimeTZDtype):
unit, tz = unit.unit, unit.tz

if unit != "ns":
if isinstance(unit, str) and tz is None:
# maybe a string like datetime64[ns, tz], which we support for
# now.
result = type(self).construct_from_string(unit)
unit = result.unit
tz = result.tz
msg = (
f"Passing a dtype alias like 'datetime64[ns, {tz}]' "
"to DatetimeTZDtype is no longer supported. Use "
"'DatetimeTZDtype.construct_from_string()' instead."
)
raise ValueError(msg)
else:
raise ValueError("DatetimeTZDtype only supports ns units")
self._unit, self._tz = unit.unit, unit.tz
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just out of curiosity why did this need to change?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This gives pandas\core\dtypes\dtypes.py:684: error: "str" has no attribute "tz" which looks weird considering the isinstance check and maybe a mypy issue.

if we split the tuple assignment (which would obviously be wrong), may explain the message

        if isinstance(unit, DatetimeTZDtype):
            reveal_type(unit)
            reveal_type(unit.unit)
            reveal_type(unit.tz)
            unit = unit.unit
            tz = unit.tz
            reveal_type(unit)
pandas\core\dtypes\dtypes.py:683: note: Revealed type is 'pandas.core.dtypes.dtypes.DatetimeTZDtype'
pandas\core\dtypes\dtypes.py:684: note: Revealed type is 'builtins.str'
pandas\core\dtypes\dtypes.py:685: note: Revealed type is 'Any'
pandas\core\dtypes\dtypes.py:687: error: "str" has no attribute "tz"
pandas\core\dtypes\dtypes.py:688: note: Revealed type is 'builtins.str'

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea that seems weird. If we just type ignore does anything else need to change? Slight preference for keeping as is and opening an issue on mypy for narrowing issue, if one doesn’t already exist

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

reversing the assignment does not give a false positive, so could do this instead..

          tz, unit = unit.tz, unit.unit

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or we could ignore.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reversing works as well but in any case would be helpful to raise with mypy

else:
if unit != "ns":
if isinstance(unit, str) and tz is None:
# maybe a string like datetime64[ns, tz], which we support for
# now.
result = type(self).construct_from_string(unit)
unit = result.unit
tz = result.tz
msg = (
f"Passing a dtype alias like 'datetime64[ns, {tz}]' "
"to DatetimeTZDtype is no longer supported. Use "
"'DatetimeTZDtype.construct_from_string()' instead."
)
raise ValueError(msg)
else:
raise ValueError("DatetimeTZDtype only supports ns units")

if tz:
tz = timezones.maybe_get_tz(tz)
tz = timezones.tz_standardize(tz)
elif tz is not None:
raise pytz.UnknownTimeZoneError(tz)
if tz is None:
raise TypeError("A 'tz' is required.")
if tz:
tz = timezones.maybe_get_tz(tz)
tz = timezones.tz_standardize(tz)
elif tz is not None:
raise pytz.UnknownTimeZoneError(tz)
if tz is None:
raise TypeError("A 'tz' is required.")

self._unit = unit
self._tz = tz
self._unit = unit
self._tz = tz

@property
def unit(self):
def unit(self) -> str_type:
"""
The precision of the datetime data.
"""
Expand All @@ -699,20 +722,20 @@ def tz(self):
return self._tz

@classmethod
def construct_array_type(cls):
def construct_array_type(cls) -> Type["DatetimeArray"]:
"""
Return the array type associated with this dtype.

Returns
-------
type
"""
from pandas.core.arrays import DatetimeArray
from pandas.core.arrays import DatetimeArray # noqa: F811

return DatetimeArray

@classmethod
def construct_from_string(cls, string: str_type):
def construct_from_string(cls, string: str_type) -> "DatetimeTZDtype":
"""
Construct a DatetimeTZDtype from a string.

Expand Down Expand Up @@ -768,7 +791,7 @@ def __eq__(self, other: Any) -> bool:
and str(self.tz) == str(other.tz)
)

def __setstate__(self, state):
def __setstate__(self, state) -> None:
# for pickle compat. __get_state__ is defined in the
# PandasExtensionDtype superclass and uses the public properties to
# pickle -> need to set the settable private ones here (see GH26067)
Expand Down Expand Up @@ -864,7 +887,7 @@ def _parse_dtype_strict(cls, freq):
raise ValueError("could not construct PeriodDtype")

@classmethod
def construct_from_string(cls, string):
def construct_from_string(cls, string: str_type) -> "PeriodDtype":
"""
Strict construction from a string, raise a TypeError if not
possible
Expand Down Expand Up @@ -914,7 +937,7 @@ def __setstate__(self, state):
self._freq = state["freq"]

@classmethod
def is_dtype(cls, dtype) -> bool:
def is_dtype(cls, dtype: object) -> bool:
"""
Return a boolean if we if the passed type is an actual dtype that we
can match (via string or type)
Expand All @@ -936,7 +959,7 @@ def is_dtype(cls, dtype) -> bool:
return super().is_dtype(dtype)

@classmethod
def construct_array_type(cls):
def construct_array_type(cls) -> Type["PeriodArray"]:
"""
Return the array type associated with this dtype.

Expand All @@ -948,9 +971,13 @@ def construct_array_type(cls):

return PeriodArray

def __from_arrow__(self, array):
"""Construct PeriodArray from pyarrow Array/ChunkedArray."""
import pyarrow
def __from_arrow__(
self, array: Union["pyarrow.Array", "pyarrow.ChunkedArray"]
) -> "PeriodArray":
"""
Construct PeriodArray from pyarrow Array/ChunkedArray.
"""
import pyarrow # noqa: F811
from pandas.core.arrays import PeriodArray
from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask

Expand Down Expand Up @@ -1056,7 +1083,7 @@ def subtype(self):
return self._subtype

@classmethod
def construct_array_type(cls):
def construct_array_type(cls) -> Type["IntervalArray"]:
"""
Return the array type associated with this dtype.

Expand Down Expand Up @@ -1121,7 +1148,7 @@ def __setstate__(self, state):
self._subtype = state["subtype"]

@classmethod
def is_dtype(cls, dtype) -> bool:
def is_dtype(cls, dtype: object) -> bool:
"""
Return a boolean if we if the passed type is an actual dtype that we
can match (via string or type)
Expand All @@ -1140,9 +1167,13 @@ def is_dtype(cls, dtype) -> bool:
return False
return super().is_dtype(dtype)

def __from_arrow__(self, array):
"""Construct IntervalArray from pyarrow Array/ChunkedArray."""
import pyarrow
def __from_arrow__(
self, array: Union["pyarrow.Array", "pyarrow.ChunkedArray"]
) -> "IntervalArray":
"""
Construct IntervalArray from pyarrow Array/ChunkedArray.
"""
import pyarrow # noqa: F811
from pandas.core.arrays import IntervalArray

if isinstance(array, pyarrow.Array):
Expand Down