From 3b4f1a2d6a3422795671028a7af8aebd43fbb931 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Mon, 27 Jan 2020 15:52:44 +0000 Subject: [PATCH 1/4] TYP: pandas/core/dtypes/base.py --- pandas/core/dtypes/base.py | 100 ++++++++++++++++++++++++----------- pandas/core/dtypes/dtypes.py | 2 +- 2 files changed, 70 insertions(+), 32 deletions(-) diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 1b4e7062b38e5..821ac60f69d99 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -1,5 +1,15 @@ """Extend pandas with custom array types""" -from typing import Any, List, Optional, Tuple, Type +from typing import ( + TYPE_CHECKING, + Any, + Dict, + List, + Optional, + Pattern, + Tuple, + Type, + TypeVar, +) import numpy as np @@ -7,6 +17,11 @@ from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries +if TYPE_CHECKING: + from pandas.core.arrays import ExtensionArray # noqa: F401 + +ExtensionDtypeT = TypeVar("ExtensionDtypeT", bound="ExtensionDtype") + class ExtensionDtype: """ @@ -26,7 +41,6 @@ class ExtensionDtype: * type * name - * construct_from_string The following attributes influence the behavior of the dtype in pandas operations @@ -71,7 +85,7 @@ class property**. class ExtensionDtype: def __from_arrow__( - self, array: pyarrow.Array/ChunkedArray + self, array: Union[pyarrow.Array, pyarrow.ChunkedArray] ) -> ExtensionArray: ... @@ -82,6 +96,7 @@ def __from_arrow__( """ _metadata: Tuple[str, ...] = () + _match: Pattern def __str__(self) -> str: return self.name @@ -119,11 +134,11 @@ def __eq__(self, other: Any) -> bool: def __hash__(self) -> int: return hash(tuple(getattr(self, attr) for attr in self._metadata)) - def __ne__(self, other) -> bool: + def __ne__(self, other: Any) -> bool: return not self.__eq__(other) @property - def na_value(self): + def na_value(self) -> object: """ Default NA value to use for this type. @@ -134,12 +149,12 @@ def na_value(self): return np.nan @property - def type(self) -> Type: + def type(self) -> Type[object]: """ - The scalar type for the array, e.g. ``int`` + The type for the array, e.g. ``int`` It's expected ``ExtensionArray[item]`` returns an instance - of ``ExtensionDtype.type`` for scalar ``item``, assuming + of ``ExtensionDtype.type`` for ``item``, assuming that value is valid (not NA). NA values do not need to be instances of `type`. """ @@ -181,7 +196,7 @@ def names(self) -> Optional[List[str]]: return None @classmethod - def construct_array_type(cls): + def construct_array_type(cls) -> Type["ExtensionArray"]: """ Return the array type associated with this dtype. @@ -192,7 +207,40 @@ def construct_array_type(cls): raise NotImplementedError @classmethod - def construct_from_string(cls, string: str): + def _validate_from_string(cls, string: str) -> Dict[str, Any]: + """ + Validate string argument of cls.construct_from_string(). + + If subclass defines class attribute `_match`, returns a dictionary + containing all the named subgroups of the match keyed by the subgroup name. + Used for keyword arguments to class constructor. + + Returns + ------- + dict + """ + if not isinstance(string, str): + raise TypeError( + f"'construct_from_string' expects a string, got {type(string)}" + ) + + if hasattr(cls, "_match"): + match = cls._match.match(string) + if match: + return match.groupdict() + else: + # error: Non-overlapping equality check (left operand type: "str", right + # operand type: "Callable[[ExtensionDtype], str]") [comparison-overlap] + assert isinstance(cls.name, str), (cls, type(cls.name)) + if string == cls.name: + return {} + + raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'") + + @classmethod + def construct_from_string( + cls: Type[ExtensionDtypeT], string: str + ) -> ExtensionDtypeT: r""" Construct this type from a string. @@ -201,8 +249,9 @@ def construct_from_string(cls, string: str): can be set as ``period[H]`` (where H means hourly frequency). By default, in the abstract class, just the name of the type is - expected. But subclasses can overwrite this method to accept - parameters. + expected. Subclasses can add a class attribute `_match` to with a + compiled regex to extract the required keyword arguments for the + constructor. Parameters ---------- @@ -224,28 +273,17 @@ def construct_from_string(cls, string: str): For extension dtypes with arguments the following may be an adequate implementation. - >>> @classmethod - ... def construct_from_string(cls, string): - ... pattern = re.compile(r"^my_type\[(?P.+)\]$") - ... match = pattern.match(string) - ... if match: - ... return cls(**match.groupdict()) - ... else: - ... raise TypeError(f"Cannot construct a '{cls.__name__}' from - ... " "'{string}'") + >>> _match = re.compile(r"^my_type\[(?P.+)\]$") """ - if not isinstance(string, str): - raise TypeError(f"Expects a string, got {type(string).__name__}") - - # error: Non-overlapping equality check (left operand type: "str", right - # operand type: "Callable[[ExtensionDtype], str]") [comparison-overlap] - assert isinstance(cls.name, str), (cls, type(cls.name)) - if string != cls.name: + kwargs = cls._validate_from_string(string) + try: + # error: Too many arguments for "ExtensionDtype" + return cls(**kwargs) # type: ignore + except Exception: raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'") - return cls() @classmethod - def is_dtype(cls, dtype) -> bool: + def is_dtype(cls, dtype: object) -> bool: """ Check if we match 'dtype'. @@ -256,7 +294,7 @@ def is_dtype(cls, dtype) -> bool: Returns ------- - is_dtype : bool + bool Notes ----- diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 93522abc3a48f..7016def744a84 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -74,7 +74,7 @@ def register(self, dtype: Type[ExtensionDtype]) -> None: def find( self, dtype: Union[Type[ExtensionDtype], str] - ) -> Optional[Type[ExtensionDtype]]: + ) -> Optional[Union[ExtensionDtype, Type[ExtensionDtype]]]: """ Parameters ---------- From e99b84ca803546e8fb4fd295a9d028804eb71a0d Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Tue, 28 Jan 2020 09:50:54 +0000 Subject: [PATCH 2/4] revert changes to construct_from_string --- pandas/core/dtypes/base.py | 78 ++++++++++---------------------------- 1 file changed, 21 insertions(+), 57 deletions(-) diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 821ac60f69d99..8a992a6f2211f 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -1,15 +1,5 @@ """Extend pandas with custom array types""" -from typing import ( - TYPE_CHECKING, - Any, - Dict, - List, - Optional, - Pattern, - Tuple, - Type, - TypeVar, -) +from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Type import numpy as np @@ -20,8 +10,6 @@ if TYPE_CHECKING: from pandas.core.arrays import ExtensionArray # noqa: F401 -ExtensionDtypeT = TypeVar("ExtensionDtypeT", bound="ExtensionDtype") - class ExtensionDtype: """ @@ -96,7 +84,6 @@ def __from_arrow__( """ _metadata: Tuple[str, ...] = () - _match: Pattern def __str__(self) -> str: return self.name @@ -207,40 +194,7 @@ def construct_array_type(cls) -> Type["ExtensionArray"]: raise NotImplementedError @classmethod - def _validate_from_string(cls, string: str) -> Dict[str, Any]: - """ - Validate string argument of cls.construct_from_string(). - - If subclass defines class attribute `_match`, returns a dictionary - containing all the named subgroups of the match keyed by the subgroup name. - Used for keyword arguments to class constructor. - - Returns - ------- - dict - """ - if not isinstance(string, str): - raise TypeError( - f"'construct_from_string' expects a string, got {type(string)}" - ) - - if hasattr(cls, "_match"): - match = cls._match.match(string) - if match: - return match.groupdict() - else: - # error: Non-overlapping equality check (left operand type: "str", right - # operand type: "Callable[[ExtensionDtype], str]") [comparison-overlap] - assert isinstance(cls.name, str), (cls, type(cls.name)) - if string == cls.name: - return {} - - raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'") - - @classmethod - def construct_from_string( - cls: Type[ExtensionDtypeT], string: str - ) -> ExtensionDtypeT: + def construct_from_string(cls, string: str): r""" Construct this type from a string. @@ -249,9 +203,8 @@ def construct_from_string( can be set as ``period[H]`` (where H means hourly frequency). By default, in the abstract class, just the name of the type is - expected. Subclasses can add a class attribute `_match` to with a - compiled regex to extract the required keyword arguments for the - constructor. + expected. But subclasses can overwrite this method to accept + parameters. Parameters ---------- @@ -273,14 +226,25 @@ def construct_from_string( For extension dtypes with arguments the following may be an adequate implementation. - >>> _match = re.compile(r"^my_type\[(?P.+)\]$") + >>> @classmethod + ... def construct_from_string(cls, string): + ... pattern = re.compile(r"^my_type\[(?P.+)\]$") + ... match = pattern.match(string) + ... if match: + ... return cls(**match.groupdict()) + ... else: + ... raise TypeError(f"Cannot construct a '{cls.__name__}' from + ... " "'{string}'") """ - kwargs = cls._validate_from_string(string) - try: - # error: Too many arguments for "ExtensionDtype" - return cls(**kwargs) # type: ignore - except Exception: + if not isinstance(string, str): + raise TypeError(f"Expects a string, got {type(string).__name__}") + + # error: Non-overlapping equality check (left operand type: "str", right + # operand type: "Callable[[ExtensionDtype], str]") [comparison-overlap] + assert isinstance(cls.name, str), (cls, type(cls.name)) + if string != cls.name: raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'") + return cls() @classmethod def is_dtype(cls, dtype: object) -> bool: From 1a14565dba959d0f99dd4397212e12320d3189e6 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 31 Jan 2020 17:21:40 +0000 Subject: [PATCH 3/4] mypy fixup --- pandas/core/dtypes/base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index ffcf61b554d86..40efefd702a9b 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -136,12 +136,12 @@ def na_value(self) -> object: return np.nan @property - def type(self) -> Type[object]: + def type(self) -> Type: """ - The type for the array, e.g. ``int`` + The scalar type for the array, e.g. ``int`` It's expected ``ExtensionArray[item]`` returns an instance - of ``ExtensionDtype.type`` for ``item``, assuming + of ``ExtensionDtype.type`` for scalar ``item``, assuming that value is valid (not NA). NA values do not need to be instances of `type`. """ From 89e4f56662fcbf988034e24c4efedd150659c48e Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 5 Feb 2020 21:46:55 +0000 Subject: [PATCH 4/4] revert changes to pandas/core/dtypes/dtypes.py --- pandas/core/dtypes/dtypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 858fe011a3078..d00b46700981c 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -74,7 +74,7 @@ def register(self, dtype: Type[ExtensionDtype]) -> None: def find( self, dtype: Union[Type[ExtensionDtype], str] - ) -> Optional[Union[ExtensionDtype, Type[ExtensionDtype]]]: + ) -> Optional[Type[ExtensionDtype]]: """ Parameters ----------