From e964b803964d34575da3c213fe511cc6e376d632 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Tue, 28 Jan 2020 10:07:40 +0000 Subject: [PATCH 1/3] TYP: pandas/core/dtypes/dtypes.py --- pandas/core/arrays/integer.py | 16 ++--- pandas/core/dtypes/dtypes.py | 131 +++++++++++++++++++++------------- 2 files changed, 89 insertions(+), 58 deletions(-) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 022e6a7322872..6c8584ed42352 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -697,7 +697,7 @@ def integer_arithmetic_method(self, other): """ # create the Dtype -Int8Dtype = register_extension_dtype( +Int8Dtype: Type[_IntegerDtype] = register_extension_dtype( type( "Int8Dtype", (_IntegerDtype,), @@ -709,7 +709,7 @@ def integer_arithmetic_method(self, other): ) ) -Int16Dtype = register_extension_dtype( +Int16Dtype: Type[_IntegerDtype] = register_extension_dtype( type( "Int16Dtype", (_IntegerDtype,), @@ -721,7 +721,7 @@ def integer_arithmetic_method(self, other): ) ) -Int32Dtype = register_extension_dtype( +Int32Dtype: Type[_IntegerDtype] = register_extension_dtype( type( "Int32Dtype", (_IntegerDtype,), @@ -733,7 +733,7 @@ def integer_arithmetic_method(self, other): ) ) -Int64Dtype = register_extension_dtype( +Int64Dtype: Type[_IntegerDtype] = register_extension_dtype( type( "Int64Dtype", (_IntegerDtype,), @@ -745,7 +745,7 @@ def integer_arithmetic_method(self, other): ) ) -UInt8Dtype = register_extension_dtype( +UInt8Dtype: Type[_IntegerDtype] = register_extension_dtype( type( "UInt8Dtype", (_IntegerDtype,), @@ -757,7 +757,7 @@ def integer_arithmetic_method(self, other): ) ) -UInt16Dtype = register_extension_dtype( +UInt16Dtype: Type[_IntegerDtype] = register_extension_dtype( type( "UInt16Dtype", (_IntegerDtype,), @@ -769,7 +769,7 @@ def integer_arithmetic_method(self, other): ) ) -UInt32Dtype = register_extension_dtype( +UInt32Dtype: Type[_IntegerDtype] = register_extension_dtype( type( "UInt32Dtype", (_IntegerDtype,), @@ -781,7 +781,7 @@ def integer_arithmetic_method(self, other): ) ) -UInt64Dtype = register_extension_dtype( +UInt64Dtype: Type[_IntegerDtype] = register_extension_dtype( type( "UInt64Dtype", (_IntegerDtype,), diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 93522abc3a48f..fa84513ad9f6d 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1,6 +1,18 @@ """ define extension dtypes """ import re -from typing import Any, Dict, List, MutableMapping, Optional, Tuple, Type, Union, cast +from typing import ( + TYPE_CHECKING, + Any, + Dict, + List, + MutableMapping, + Optional, + Tuple, + Type, + TypeVar, + Union, + cast, +) import numpy as np import pytz @@ -13,10 +25,21 @@ from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCDateOffset, ABCIndexClass from pandas.core.dtypes.inference import is_bool, is_list_like +if TYPE_CHECKING: + import pyarrow # noqa: F401 + from pandas.core.arrays import ( # noqa: F401 + IntervalArray, + PeriodArray, + DatetimeArray, + ) + from pandas import Categorical # noqa: F401 + str_type = str +ExtensionDtypeT = TypeVar("ExtensionDtypeT", bound=ExtensionDtype) + -def register_extension_dtype(cls: Type[ExtensionDtype]) -> Type[ExtensionDtype]: +def register_extension_dtype(cls: Type[ExtensionDtypeT]) -> Type[ExtensionDtypeT]: """ Register an ExtensionType with pandas as class decorator. @@ -65,7 +88,7 @@ def register(self, dtype: Type[ExtensionDtype]) -> None: """ Parameters ---------- - dtype : ExtensionDtype + dtype : Type[ExtensionDtype] """ if not issubclass(dtype, ExtensionDtype): raise ValueError("can only register pandas extension dtypes") @@ -74,7 +97,7 @@ def register(self, dtype: Type[ExtensionDtype]) -> None: def find( self, dtype: Union[Type[ExtensionDtype], str] - ) -> Optional[Type[ExtensionDtype]]: + ) -> Optional[Union[ExtensionDtype, Type[ExtensionDtype]]]: """ Parameters ---------- @@ -119,7 +142,7 @@ class PandasExtensionDtype(ExtensionDtype): # and ExtensionDtype's @properties in the subclasses below. The kind and # type variables in those subclasses are explicitly typed below. subdtype = None - str: Optional[str_type] = None + str: str_type num = 100 shape: Tuple[int, ...] = tuple() itemsize = 8 @@ -481,7 +504,7 @@ def _hash_categories(categories, ordered: Ordered = True) -> int: return np.bitwise_xor.reduce(hashed) @classmethod - def construct_array_type(cls): + def construct_array_type(cls) -> Type["Categorical"]: """ Return the array type associated with this dtype. @@ -489,7 +512,7 @@ def construct_array_type(cls): ------- type """ - from pandas import Categorical + from pandas import Categorical # noqa: F811 return Categorical @@ -653,39 +676,39 @@ class DatetimeTZDtype(PandasExtensionDtype): _match = re.compile(r"(datetime64|M8)\[(?P.+), (?P.+)\]") _cache: Dict[str_type, PandasExtensionDtype] = {} - def __init__(self, unit="ns", tz=None): + def __init__(self, unit: Union[str_type, "DatetimeTZDtype"] = "ns", tz=None): if isinstance(unit, DatetimeTZDtype): - unit, tz = unit.unit, unit.tz - - if unit != "ns": - if isinstance(unit, str) and tz is None: - # maybe a string like datetime64[ns, tz], which we support for - # now. - result = type(self).construct_from_string(unit) - unit = result.unit - tz = result.tz - msg = ( - f"Passing a dtype alias like 'datetime64[ns, {tz}]' " - "to DatetimeTZDtype is no longer supported. Use " - "'DatetimeTZDtype.construct_from_string()' instead." - ) - raise ValueError(msg) - else: - raise ValueError("DatetimeTZDtype only supports ns units") + self._unit, self._tz = unit.unit, unit.tz + else: + if unit != "ns": + if isinstance(unit, str) and tz is None: + # maybe a string like datetime64[ns, tz], which we support for + # now. + result = type(self).construct_from_string(unit) + unit = result.unit + tz = result.tz + msg = ( + f"Passing a dtype alias like 'datetime64[ns, {tz}]' " + "to DatetimeTZDtype is no longer supported. Use " + "'DatetimeTZDtype.construct_from_string()' instead." + ) + raise ValueError(msg) + else: + raise ValueError("DatetimeTZDtype only supports ns units") - if tz: - tz = timezones.maybe_get_tz(tz) - tz = timezones.tz_standardize(tz) - elif tz is not None: - raise pytz.UnknownTimeZoneError(tz) - if tz is None: - raise TypeError("A 'tz' is required.") + if tz: + tz = timezones.maybe_get_tz(tz) + tz = timezones.tz_standardize(tz) + elif tz is not None: + raise pytz.UnknownTimeZoneError(tz) + if tz is None: + raise TypeError("A 'tz' is required.") - self._unit = unit - self._tz = tz + self._unit = unit + self._tz = tz @property - def unit(self): + def unit(self) -> str_type: """ The precision of the datetime data. """ @@ -699,7 +722,7 @@ def tz(self): return self._tz @classmethod - def construct_array_type(cls): + def construct_array_type(cls) -> Type["DatetimeArray"]: """ Return the array type associated with this dtype. @@ -707,12 +730,12 @@ def construct_array_type(cls): ------- type """ - from pandas.core.arrays import DatetimeArray + from pandas.core.arrays import DatetimeArray # noqa: F811 return DatetimeArray @classmethod - def construct_from_string(cls, string: str_type): + def construct_from_string(cls, string: str_type) -> "DatetimeTZDtype": """ Construct a DatetimeTZDtype from a string. @@ -768,7 +791,7 @@ def __eq__(self, other: Any) -> bool: and str(self.tz) == str(other.tz) ) - def __setstate__(self, state): + def __setstate__(self, state) -> None: # for pickle compat. __get_state__ is defined in the # PandasExtensionDtype superclass and uses the public properties to # pickle -> need to set the settable private ones here (see GH26067) @@ -864,7 +887,7 @@ def _parse_dtype_strict(cls, freq): raise ValueError("could not construct PeriodDtype") @classmethod - def construct_from_string(cls, string): + def construct_from_string(cls, string: str_type) -> "PeriodDtype": """ Strict construction from a string, raise a TypeError if not possible @@ -914,7 +937,7 @@ def __setstate__(self, state): self._freq = state["freq"] @classmethod - def is_dtype(cls, dtype) -> bool: + def is_dtype(cls, dtype: object) -> bool: """ Return a boolean if we if the passed type is an actual dtype that we can match (via string or type) @@ -936,7 +959,7 @@ def is_dtype(cls, dtype) -> bool: return super().is_dtype(dtype) @classmethod - def construct_array_type(cls): + def construct_array_type(cls) -> Type["PeriodArray"]: """ Return the array type associated with this dtype. @@ -948,9 +971,13 @@ def construct_array_type(cls): return PeriodArray - def __from_arrow__(self, array): - """Construct PeriodArray from pyarrow Array/ChunkedArray.""" - import pyarrow + def __from_arrow__( + self, array: Union["pyarrow.Array", "pyarrow.ChunkedArray"] + ) -> "PeriodArray": + """ + Construct PeriodArray from pyarrow Array/ChunkedArray. + """ + import pyarrow # noqa: F811 from pandas.core.arrays import PeriodArray from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask @@ -1056,7 +1083,7 @@ def subtype(self): return self._subtype @classmethod - def construct_array_type(cls): + def construct_array_type(cls) -> Type["IntervalArray"]: """ Return the array type associated with this dtype. @@ -1121,7 +1148,7 @@ def __setstate__(self, state): self._subtype = state["subtype"] @classmethod - def is_dtype(cls, dtype) -> bool: + def is_dtype(cls, dtype: object) -> bool: """ Return a boolean if we if the passed type is an actual dtype that we can match (via string or type) @@ -1140,9 +1167,13 @@ def is_dtype(cls, dtype) -> bool: return False return super().is_dtype(dtype) - def __from_arrow__(self, array): - """Construct IntervalArray from pyarrow Array/ChunkedArray.""" - import pyarrow + def __from_arrow__( + self, array: Union["pyarrow.Array", "pyarrow.ChunkedArray"] + ) -> "IntervalArray": + """ + Construct IntervalArray from pyarrow Array/ChunkedArray. + """ + import pyarrow # noqa: F811 from pandas.core.arrays import IntervalArray if isinstance(array, pyarrow.Array): From 60120bb018008740f3e005f4f7921085285d71ee Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 5 Feb 2020 23:59:07 +0000 Subject: [PATCH 2/3] address comments --- pandas/core/dtypes/dtypes.py | 63 +++++++++++++++++------------------- 1 file changed, 30 insertions(+), 33 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index cc581f29a9248..06476beff252b 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -9,7 +9,6 @@ Optional, Tuple, Type, - TypeVar, Union, cast, ) @@ -36,10 +35,8 @@ str_type = str -ExtensionDtypeT = TypeVar("ExtensionDtypeT", bound=ExtensionDtype) - -def register_extension_dtype(cls: Type[ExtensionDtypeT]) -> Type[ExtensionDtypeT]: +def register_extension_dtype(cls: Type[ExtensionDtype]) -> Type[ExtensionDtype]: """ Register an ExtensionType with pandas as class decorator. @@ -97,7 +94,7 @@ def register(self, dtype: Type[ExtensionDtype]) -> None: def find( self, dtype: Union[Type[ExtensionDtype], str] - ) -> Optional[Union[ExtensionDtype, Type[ExtensionDtype]]]: + ) -> Optional[Type[ExtensionDtype]]: """ Parameters ---------- @@ -680,34 +677,34 @@ class DatetimeTZDtype(PandasExtensionDtype): def __init__(self, unit: Union[str_type, "DatetimeTZDtype"] = "ns", tz=None): if isinstance(unit, DatetimeTZDtype): - self._unit, self._tz = unit.unit, unit.tz - else: - if unit != "ns": - if isinstance(unit, str) and tz is None: - # maybe a string like datetime64[ns, tz], which we support for - # now. - result = type(self).construct_from_string(unit) - unit = result.unit - tz = result.tz - msg = ( - f"Passing a dtype alias like 'datetime64[ns, {tz}]' " - "to DatetimeTZDtype is no longer supported. Use " - "'DatetimeTZDtype.construct_from_string()' instead." - ) - raise ValueError(msg) - else: - raise ValueError("DatetimeTZDtype only supports ns units") - - if tz: - tz = timezones.maybe_get_tz(tz) - tz = timezones.tz_standardize(tz) - elif tz is not None: - raise pytz.UnknownTimeZoneError(tz) - if tz is None: - raise TypeError("A 'tz' is required.") - - self._unit = unit - self._tz = tz + unit, tz = unit.unit, unit.tz # type: ignore + + if unit != "ns": + if isinstance(unit, str) and tz is None: + # maybe a string like datetime64[ns, tz], which we support for + # now. + result = type(self).construct_from_string(unit) + unit = result.unit + tz = result.tz + msg = ( + f"Passing a dtype alias like 'datetime64[ns, {tz}]' " + "to DatetimeTZDtype is no longer supported. Use " + "'DatetimeTZDtype.construct_from_string()' instead." + ) + raise ValueError(msg) + else: + raise ValueError("DatetimeTZDtype only supports ns units") + + if tz: + tz = timezones.maybe_get_tz(tz) + tz = timezones.tz_standardize(tz) + elif tz is not None: + raise pytz.UnknownTimeZoneError(tz) + if tz is None: + raise TypeError("A 'tz' is required.") + + self._unit = unit + self._tz = tz @property def unit(self) -> str_type: From f0d7827379bcb3cdca9d340389e07031f55f58df Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 7 Feb 2020 18:18:19 +0000 Subject: [PATCH 3/3] address comment --- pandas/core/dtypes/dtypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 06476beff252b..f7eb1a28cdc97 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -85,7 +85,7 @@ def register(self, dtype: Type[ExtensionDtype]) -> None: """ Parameters ---------- - dtype : Type[ExtensionDtype] + dtype : ExtensionDtype class """ if not issubclass(dtype, ExtensionDtype): raise ValueError("can only register pandas extension dtypes")