Skip to content

TYP: _concat_same_type method of EA #37817

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Optional, Sequence, TypeVar
from typing import Any, Optional, Sequence, Type, TypeVar

import numpy as np

Expand Down Expand Up @@ -170,7 +170,11 @@ def unique(self: NDArrayBackedExtensionArrayT) -> NDArrayBackedExtensionArrayT:

@classmethod
@doc(ExtensionArray._concat_same_type)
def _concat_same_type(cls, to_concat, axis: int = 0):
def _concat_same_type(
cls: Type[NDArrayBackedExtensionArrayT],
to_concat: Sequence[NDArrayBackedExtensionArrayT],
axis: int = 0,
) -> NDArrayBackedExtensionArrayT:
dtypes = {str(x.dtype) for x in to_concat}
if len(dtypes) != 1:
raise ValueError("to_concat must have the same dtype (tz)", dtypes)
Expand Down
19 changes: 15 additions & 4 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,23 @@
without warning.
"""
import operator
from typing import Any, Callable, Dict, Optional, Sequence, Tuple, Union, cast
from typing import (
Any,
Callable,
Dict,
Optional,
Sequence,
Tuple,
Type,
TypeVar,
Union,
cast,
)

import numpy as np

from pandas._libs import lib
from pandas._typing import ArrayLike, Shape, TypeVar
from pandas._typing import ArrayLike, Shape
from pandas.compat import set_function_name
from pandas.compat.numpy import function as nv
from pandas.errors import AbstractMethodError
Expand Down Expand Up @@ -1132,8 +1143,8 @@ def ravel(self, order="C") -> "ExtensionArray":

@classmethod
def _concat_same_type(
cls, to_concat: Sequence["ExtensionArray"]
) -> "ExtensionArray":
cls: Type[ExtensionArrayT], to_concat: Sequence[ExtensionArrayT]
) -> ExtensionArrayT:
"""
Concatenate multiple array of this dtype.

Expand Down
8 changes: 6 additions & 2 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from functools import partial
import operator
from shutil import get_terminal_size
from typing import Dict, Hashable, List, Type, Union, cast
from typing import Dict, Hashable, List, Sequence, Type, TypeVar, Union, cast
from warnings import warn

import numpy as np
Expand Down Expand Up @@ -56,6 +56,8 @@

from pandas.io.formats import console

CategoricalT = TypeVar("CategoricalT", bound="Categorical")


def _cat_compare_op(op):
opname = f"__{op.__name__}__"
Expand Down Expand Up @@ -2080,7 +2082,9 @@ def equals(self, other: object) -> bool:
return False

@classmethod
def _concat_same_type(self, to_concat):
def _concat_same_type(
cls: Type[CategoricalT], to_concat: Sequence[CategoricalT], axis: int = 0
) -> CategoricalT:
from pandas.core.dtypes.concat import union_categoricals

return union_categoricals(to_concat)
Expand Down
6 changes: 5 additions & 1 deletion pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,11 @@ def view(self, dtype=None):
# ExtensionArray Interface

@classmethod
def _concat_same_type(cls, to_concat, axis: int = 0):
def _concat_same_type(
cls: Type[DatetimeLikeArrayT],
to_concat: Sequence[DatetimeLikeArrayT],
axis: int = 0,
) -> DatetimeLikeArrayT:
new_obj = super()._concat_same_type(to_concat, axis)

obj = to_concat[0]
Expand Down
23 changes: 17 additions & 6 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import operator
from operator import le, lt
import textwrap
from typing import TYPE_CHECKING, Optional, Tuple, TypeVar, Union, cast
from typing import TYPE_CHECKING, Optional, Sequence, Tuple, Type, TypeVar, Union, cast

import numpy as np

Expand Down Expand Up @@ -724,7 +724,9 @@ def equals(self, other) -> bool:
)

@classmethod
def _concat_same_type(cls, to_concat):
def _concat_same_type(
cls: Type[IntervalArrayT], to_concat: Sequence[IntervalArrayT]
) -> IntervalArrayT:
"""
Concatenate multiple IntervalArray

Expand Down Expand Up @@ -1470,10 +1472,19 @@ def _get_combined_data(
axis=1,
)
else:
left = cast(Union["DatetimeArray", "TimedeltaArray"], left)
right = cast(Union["DatetimeArray", "TimedeltaArray"], right)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the casting doesnt help?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the casts as they stand give

pandas\core\arrays\interval.py:1478: error: List item 0 has incompatible type "Union[DatetimeArray, TimedeltaArray]"; expected "DatetimeArray"  [list-item]
pandas\core\arrays\interval.py:1478: error: List item 0 has incompatible type "Union[DatetimeArray, TimedeltaArray]"; expected "TimedeltaArray"  [list-item]
pandas\core\arrays\interval.py:1478: error: List item 1 has incompatible type "Union[DatetimeArray, TimedeltaArray]"; expected "DatetimeArray"  [list-item]
pandas\core\arrays\interval.py:1478: error: List item 1 has incompatible type "Union[DatetimeArray, TimedeltaArray]"; expected "TimedeltaArray"  [list-item]

and would need to be changed. I can't immediately see what guarentees we have that justify the casts.

the 'ignore' is a fix later properly. (I guess the function signature here maybe incorrect, but not checked) and will circle back to this after getting started on the other EA methods as a precursor to adding type annotations to #35259

If this is not a false positive, it needs more investigation before casting to silence the mypy errors.

combined = type(left)._concat_same_type(
[left.reshape(-1, 1), right.reshape(-1, 1)],
# error: Item "type" of "Union[Type[Index], Type[ExtensionArray]]" has
# no attribute "_concat_same_type" [union-attr]

# error: Unexpected keyword argument "axis" for "_concat_same_type" of
# "ExtensionArray" [call-arg]

# error: Item "Index" of "Union[Index, ExtensionArray]" has no
# attribute "reshape" [union-attr]

# error: Item "ExtensionArray" of "Union[Index, ExtensionArray]" has no
# attribute "reshape" [union-attr]
combined = type(left)._concat_same_type( # type: ignore[union-attr,call-arg]
[left.reshape(-1, 1), right.reshape(-1, 1)], # type: ignore[union-attr]
axis=1,
)
return combined
6 changes: 4 additions & 2 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import TYPE_CHECKING, Optional, Tuple, Type, TypeVar
from typing import TYPE_CHECKING, Optional, Sequence, Tuple, Type, TypeVar

import numpy as np

Expand Down Expand Up @@ -261,7 +261,9 @@ def nbytes(self) -> int:
return self._data.nbytes + self._mask.nbytes

@classmethod
def _concat_same_type(cls: Type[BaseMaskedArrayT], to_concat) -> BaseMaskedArrayT:
def _concat_same_type(
cls: Type[BaseMaskedArrayT], to_concat: Sequence[BaseMaskedArrayT]
) -> BaseMaskedArrayT:
data = np.concatenate([x._data for x in to_concat])
mask = np.concatenate([x._mask for x in to_concat])
return cls(data, mask)
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from collections import abc
import numbers
import operator
from typing import Any, Callable, Type, TypeVar, Union
from typing import Any, Callable, Sequence, Type, TypeVar, Union
import warnings

import numpy as np
Expand Down Expand Up @@ -946,7 +946,9 @@ def copy(self: SparseArrayT) -> SparseArrayT:
return self._simple_new(values, self.sp_index, self.dtype)

@classmethod
def _concat_same_type(cls, to_concat):
def _concat_same_type(
cls: Type[SparseArrayT], to_concat: Sequence[SparseArrayT]
) -> SparseArrayT:
fill_value = to_concat[0].fill_value

values = []
Expand Down