Skip to content

TYP: pandas/core/dtypes/base.py #31352

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Feb 12, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 69 additions & 31 deletions pandas/core/dtypes/base.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,27 @@
"""Extend pandas with custom array types"""
from typing import Any, List, Optional, Tuple, Type
from typing import (
TYPE_CHECKING,
Any,
Dict,
List,
Optional,
Pattern,
Tuple,
Type,
TypeVar,
)

import numpy as np

from pandas.errors import AbstractMethodError

from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries

if TYPE_CHECKING:
from pandas.core.arrays import ExtensionArray # noqa: F401

ExtensionDtypeT = TypeVar("ExtensionDtypeT", bound="ExtensionDtype")


class ExtensionDtype:
"""
Expand All @@ -26,7 +41,6 @@ class ExtensionDtype:

* type
* name
* construct_from_string

The following attributes influence the behavior of the dtype in
pandas operations
Expand Down Expand Up @@ -71,7 +85,7 @@ class property**.
class ExtensionDtype:

def __from_arrow__(
self, array: pyarrow.Array/ChunkedArray
self, array: Union[pyarrow.Array, pyarrow.ChunkedArray]
) -> ExtensionArray:
...

Expand All @@ -82,6 +96,7 @@ def __from_arrow__(
"""

_metadata: Tuple[str, ...] = ()
_match: Pattern

def __str__(self) -> str:
return self.name
Expand Down Expand Up @@ -119,11 +134,11 @@ def __eq__(self, other: Any) -> bool:
def __hash__(self) -> int:
return hash(tuple(getattr(self, attr) for attr in self._metadata))

def __ne__(self, other) -> bool:
def __ne__(self, other: Any) -> bool:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did we ever decide on consistent use of Any vs object here? Thought we were using object but maybe missed that

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in this class we already have def __eq__(self, other: Any) -> bool: and in #29947, Any was applied throughout.

I think it should be object in order to check the body of the __eq__/__ne__ methods, but since we want consistency these will need to be changed as well...

__eq__

pandas/core/arrays/interval.py:    def __eq__(self, other):
pandas/core/arrays/sparse/dtype.py:    def __eq__(self, other: Any) -> bool:
pandas/core/arrays/_arrow_utils.py:        def __eq__(self, other):
pandas/core/arrays/_arrow_utils.py:        def __eq__(self, other):
pandas/core/dtypes/base.py:    def __eq__(self, other: Any) -> bool:
pandas/core/dtypes/dtypes.py:    def __eq__(self, other: Any) -> bool:
pandas/core/dtypes/dtypes.py:    def __eq__(self, other: Any) -> bool:
pandas/core/dtypes/dtypes.py:    def __eq__(self, other: Any) -> bool:
pandas/core/dtypes/dtypes.py:    def __eq__(self, other: Any) -> bool:
pandas/core/indexes/frozen.py:    def __eq__(self, other: Any) -> bool:
pandas/io/pytables.py:    def __eq__(self, other: Any) -> bool:
pandas/io/pytables.py:    def __eq__(self, other: Any) -> bool:
pandas/io/stata.py:    def __eq__(self, other: Any) -> bool:
pandas/tests/groupby/test_function.py:        def __eq__(self, other):
pandas/tests/indexing/test_indexing.py:            def __eq__(self, other) -> bool:
pandas/tests/scalar/timedelta/test_timedelta.py:            def __eq__(self, other):
pandas/tests/scalar/timestamp/test_comparisons.py:        def __eq__(self, other) -> bool:
pandas/tests/series/test_ufunc.py:        def __eq__(self, other) -> bool:
pandas/tests/test_algos.py:            def __eq__(self, other) -> bool:
pandas/tseries/offsets.py:    def __eq__(self, other: Any) -> bool:
pandas/_libs/tslibs/offsets.pyx:    def __eq__(self, other: Any) -> bool:

__ne__

pandas/core/arrays/interval.py:    def __ne__(self, other):
pandas/io/pytables.py:    def __ne__(self, other) -> bool:
pandas/tests/series/test_duplicates.py:        def __ne__(self, other):
pandas/tseries/offsets.py:    def __ne__(self, other):
pandas/_libs/tslibs/offsets.pyx:    def __ne__(self, other):

However, doing this raises issues with Extension Array equalities, see #31646. So should be done in a separate PR, I think though that this does affirm the need to use object instead of Any for more thorough checking.

return not self.__eq__(other)

@property
def na_value(self):
def na_value(self) -> object:
"""
Default NA value to use for this type.

Expand All @@ -134,12 +149,12 @@ def na_value(self):
return np.nan

@property
def type(self) -> Type:
def type(self) -> Type[object]:
"""
The scalar type for the array, e.g. ``int``
The type for the array, e.g. ``int``

It's expected ``ExtensionArray[item]`` returns an instance
of ``ExtensionDtype.type`` for scalar ``item``, assuming
of ``ExtensionDtype.type`` for ``item``, assuming
that value is valid (not NA). NA values do not need to be
instances of `type`.
"""
Expand Down Expand Up @@ -181,7 +196,7 @@ def names(self) -> Optional[List[str]]:
return None

@classmethod
def construct_array_type(cls):
def construct_array_type(cls) -> Type["ExtensionArray"]:
"""
Return the array type associated with this dtype.

Expand All @@ -192,7 +207,40 @@ def construct_array_type(cls):
raise NotImplementedError

@classmethod
def construct_from_string(cls, string: str):
def _validate_from_string(cls, string: str) -> Dict[str, Any]:
"""
Validate string argument of cls.construct_from_string().

If subclass defines class attribute `_match`, returns a dictionary
containing all the named subgroups of the match keyed by the subgroup name.
Used for keyword arguments to class constructor.

Returns
-------
dict
"""
if not isinstance(string, str):
raise TypeError(
f"'construct_from_string' expects a string, got {type(string)}"
)

if hasattr(cls, "_match"):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what uses this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

some repeated logic in pandas/core/dtypes/dtypes.py can be removed with this. will remove for now til needed. (will also be adding a test in the base extension tests for consistent error messages for wrong type to construct_from_string.)

I'll open a PR to break off changes to pandas/core/dtypes/dtypes.py without the deduplication for now.

match = cls._match.match(string)
if match:
return match.groupdict()
else:
# error: Non-overlapping equality check (left operand type: "str", right
# operand type: "Callable[[ExtensionDtype], str]") [comparison-overlap]
assert isinstance(cls.name, str), (cls, type(cls.name))
if string == cls.name:
return {}

raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'")

@classmethod
def construct_from_string(
cls: Type[ExtensionDtypeT], string: str
) -> ExtensionDtypeT:
r"""
Construct this type from a string.

Expand All @@ -201,8 +249,9 @@ def construct_from_string(cls, string: str):
can be set as ``period[H]`` (where H means hourly frequency).

By default, in the abstract class, just the name of the type is
expected. But subclasses can overwrite this method to accept
parameters.
expected. Subclasses can add a class attribute `_match` to with a
compiled regex to extract the required keyword arguments for the
constructor.

Parameters
----------
Expand All @@ -224,28 +273,17 @@ def construct_from_string(cls, string: str):
For extension dtypes with arguments the following may be an
adequate implementation.

>>> @classmethod
... def construct_from_string(cls, string):
... pattern = re.compile(r"^my_type\[(?P<arg_name>.+)\]$")
... match = pattern.match(string)
... if match:
... return cls(**match.groupdict())
... else:
... raise TypeError(f"Cannot construct a '{cls.__name__}' from
... " "'{string}'")
>>> _match = re.compile(r"^my_type\[(?P<arg_name>.+)\]$")
"""
if not isinstance(string, str):
raise TypeError(f"Expects a string, got {type(string).__name__}")

# error: Non-overlapping equality check (left operand type: "str", right
# operand type: "Callable[[ExtensionDtype], str]") [comparison-overlap]
assert isinstance(cls.name, str), (cls, type(cls.name))
if string != cls.name:
kwargs = cls._validate_from_string(string)
try:
# error: Too many arguments for "ExtensionDtype"
return cls(**kwargs) # type: ignore
except Exception:
raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'")
return cls()

@classmethod
def is_dtype(cls, dtype) -> bool:
def is_dtype(cls, dtype: object) -> bool:
"""
Check if we match 'dtype'.

Expand All @@ -256,7 +294,7 @@ def is_dtype(cls, dtype) -> bool:

Returns
-------
is_dtype : bool
bool

Notes
-----
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def register(self, dtype: Type[ExtensionDtype]) -> None:

def find(
self, dtype: Union[Type[ExtensionDtype], str]
) -> Optional[Type[ExtensionDtype]]:
) -> Optional[Union[ExtensionDtype, Type[ExtensionDtype]]]:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this changed?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

construct_from_string on L98 returns an instance. However, since the type annotations for construct_from_string were removed following #31352 (comment), this can also be removed for now.

"""
Parameters
----------
Expand Down