Skip to content

TYP: _concat_same_type method of EA #37817

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Optional, Sequence, TypeVar
from typing import Any, Optional, Sequence, Type, TypeVar

import numpy as np

Expand Down Expand Up @@ -160,7 +160,7 @@ def unique(self: _T) -> _T:

@classmethod
@doc(ExtensionArray._concat_same_type)
def _concat_same_type(cls, to_concat, axis: int = 0):
def _concat_same_type(cls: Type[_T], to_concat: Sequence[_T], axis: int = 0) -> _T:
dtypes = {str(x.dtype) for x in to_concat}
if len(dtypes) != 1:
raise ValueError("to_concat must have the same dtype (tz)", dtypes)
Expand Down
19 changes: 16 additions & 3 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,18 @@
without warning.
"""
import operator
from typing import Any, Callable, Dict, Optional, Sequence, Tuple, Union, cast
from typing import (
Any,
Callable,
Dict,
Optional,
Sequence,
Tuple,
Type,
TypeVar,
Union,
cast,
)

import numpy as np

Expand Down Expand Up @@ -37,6 +48,8 @@

_extension_array_shared_docs: Dict[str, str] = dict()

ExtensionArrayT = TypeVar("ExtensionArrayT", bound="ExtensionArray")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I might have asked before, but can you explain again why this is needed, and the "ExtensionArray" as string version doesn't work?

(I think I mentioned this before, but IMO we should have better documentation about the specific patterns we are using in pandas in our contributing guides)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(I think I mentioned this before, but IMO we should have better documentation about the specific patterns we are using in pandas in our contributing guides)

these are not specific patterns to pandas. https://mypy.readthedocs.io/en/stable/generics.html#generic-functions and https://mypy.readthedocs.io/en/stable/generics.html#generic-methods-and-generic-self

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I might have asked before, but can you explain again why this is needed, and the "ExtensionArray" as string version doesn't work?

I do recall explaining it in the past (obviously didn't do a good job) won't bother with a generic explanation again, and next time I come across a concrete example of why this is needed will cc you in.



class ExtensionArray:
"""
Expand Down Expand Up @@ -1130,8 +1143,8 @@ def ravel(self, order="C") -> "ExtensionArray":

@classmethod
def _concat_same_type(
cls, to_concat: Sequence["ExtensionArray"]
) -> "ExtensionArray":
cls: Type[ExtensionArrayT], to_concat: Sequence[ExtensionArrayT]
) -> ExtensionArrayT:
"""
Concatenate multiple array of this dtype.

Expand Down
8 changes: 6 additions & 2 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from functools import partial
import operator
from shutil import get_terminal_size
from typing import Dict, Hashable, List, Type, Union, cast
from typing import Dict, Hashable, List, Sequence, Type, TypeVar, Union, cast
from warnings import warn

import numpy as np
Expand Down Expand Up @@ -56,6 +56,8 @@

from pandas.io.formats import console

CategoricalT = TypeVar("CategoricalT", bound="Categorical")


def _cat_compare_op(op):
opname = f"__{op.__name__}__"
Expand Down Expand Up @@ -2080,7 +2082,9 @@ def equals(self, other: object) -> bool:
return False

@classmethod
def _concat_same_type(self, to_concat):
def _concat_same_type(
cls: Type[CategoricalT], to_concat: Sequence[CategoricalT], axis: int = 0
) -> CategoricalT:
from pandas.core.dtypes.concat import union_categoricals

return union_categoricals(to_concat)
Expand Down
6 changes: 5 additions & 1 deletion pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,11 @@ def view(self, dtype=None):
# ExtensionArray Interface

@classmethod
def _concat_same_type(cls, to_concat, axis: int = 0):
def _concat_same_type(
cls: Type[DatetimeLikeArrayT],
to_concat: Sequence[DatetimeLikeArrayT],
axis: int = 0,
) -> DatetimeLikeArrayT:
new_obj = super()._concat_same_type(to_concat, axis)

obj = to_concat[0]
Expand Down
25 changes: 19 additions & 6 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import operator
from operator import le, lt
import textwrap
from typing import TYPE_CHECKING, Optional, Tuple, Union, cast
from typing import TYPE_CHECKING, Optional, Sequence, Tuple, Type, TypeVar, Union, cast

import numpy as np

Expand Down Expand Up @@ -56,6 +56,8 @@
from pandas import Index
from pandas.core.arrays import DatetimeArray, TimedeltaArray

IntervalArrayT = TypeVar("IntervalArrayT", bound="IntervalArray")

_interval_shared_docs = {}

_shared_docs_kwargs = dict(
Expand Down Expand Up @@ -722,7 +724,9 @@ def equals(self, other) -> bool:
)

@classmethod
def _concat_same_type(cls, to_concat):
def _concat_same_type(
cls: Type[IntervalArrayT], to_concat: Sequence[IntervalArrayT]
) -> IntervalArrayT:
"""
Concatenate multiple IntervalArray

Expand Down Expand Up @@ -1468,10 +1472,19 @@ def _get_combined_data(
axis=1,
)
else:
left = cast(Union["DatetimeArray", "TimedeltaArray"], left)
right = cast(Union["DatetimeArray", "TimedeltaArray"], right)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the casting doesnt help?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the casts as they stand give

pandas\core\arrays\interval.py:1478: error: List item 0 has incompatible type "Union[DatetimeArray, TimedeltaArray]"; expected "DatetimeArray"  [list-item]
pandas\core\arrays\interval.py:1478: error: List item 0 has incompatible type "Union[DatetimeArray, TimedeltaArray]"; expected "TimedeltaArray"  [list-item]
pandas\core\arrays\interval.py:1478: error: List item 1 has incompatible type "Union[DatetimeArray, TimedeltaArray]"; expected "DatetimeArray"  [list-item]
pandas\core\arrays\interval.py:1478: error: List item 1 has incompatible type "Union[DatetimeArray, TimedeltaArray]"; expected "TimedeltaArray"  [list-item]

and would need to be changed. I can't immediately see what guarentees we have that justify the casts.

the 'ignore' is a fix later properly. (I guess the function signature here maybe incorrect, but not checked) and will circle back to this after getting started on the other EA methods as a precursor to adding type annotations to #35259

If this is not a false positive, it needs more investigation before casting to silence the mypy errors.

combined = type(left)._concat_same_type(
[left.reshape(-1, 1), right.reshape(-1, 1)],
# error: Item "type" of "Union[Type[Index], Type[ExtensionArray]]" has
# no attribute "_concat_same_type" [union-attr]

# error: Unexpected keyword argument "axis" for "_concat_same_type" of
# "ExtensionArray" [call-arg]

# error: Item "Index" of "Union[Index, ExtensionArray]" has no
# attribute "reshape" [union-attr]

# error: Item "ExtensionArray" of "Union[Index, ExtensionArray]" has no
# attribute "reshape" [union-attr]
combined = type(left)._concat_same_type( # type: ignore[union-attr,call-arg]
[left.reshape(-1, 1), right.reshape(-1, 1)], # type: ignore[union-attr]
axis=1,
)
return combined
6 changes: 4 additions & 2 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import TYPE_CHECKING, Optional, Tuple, Type, TypeVar
from typing import TYPE_CHECKING, Optional, Sequence, Tuple, Type, TypeVar

import numpy as np

Expand Down Expand Up @@ -261,7 +261,9 @@ def nbytes(self) -> int:
return self._data.nbytes + self._mask.nbytes

@classmethod
def _concat_same_type(cls: Type[BaseMaskedArrayT], to_concat) -> BaseMaskedArrayT:
def _concat_same_type(
cls: Type[BaseMaskedArrayT], to_concat: Sequence[BaseMaskedArrayT]
) -> BaseMaskedArrayT:
data = np.concatenate([x._data for x in to_concat])
mask = np.concatenate([x._mask for x in to_concat])
return cls(data, mask)
Expand Down
7 changes: 5 additions & 2 deletions pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from collections import abc
import numbers
import operator
from typing import Any, Callable, Union
from typing import Any, Callable, Sequence, Type, TypeVar, Union
import warnings

import numpy as np
Expand Down Expand Up @@ -56,6 +56,7 @@
# ----------------------------------------------------------------------------
# Array

SparseArrayT = TypeVar("SparseArrayT", bound="SparseArray")

_sparray_doc_kwargs = dict(klass="SparseArray")

Expand Down Expand Up @@ -942,7 +943,9 @@ def copy(self):
return self._simple_new(values, self.sp_index, self.dtype)

@classmethod
def _concat_same_type(cls, to_concat):
def _concat_same_type(
cls: Type[SparseArrayT], to_concat: Sequence[SparseArrayT]
) -> SparseArrayT:
fill_value = to_concat[0].fill_value

values = []
Expand Down