Skip to content

TYP: remove #type: ignore for pd.array constructor #33706

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,7 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
return self
return self._set_dtype(dtype)
if is_extension_array_dtype(dtype):
return array(self, dtype=dtype, copy=copy) # type: ignore # GH 28770
return array(self, dtype=dtype, copy=copy)
if is_integer_dtype(dtype) and self.isna().any():
raise ValueError("Cannot convert float NaN to integer")
return np.array(self, dtype=dtype, copy=copy)
Expand Down
35 changes: 18 additions & 17 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from pandas._libs import lib
from pandas._libs.tslibs import IncompatibleFrequency, OutOfBoundsDatetime
from pandas._typing import ArrayLike, Dtype
from pandas._typing import AnyArrayLike, ArrayLike, Dtype

from pandas.core.dtypes.cast import (
construct_1d_arraylike_from_scalar,
Expand Down Expand Up @@ -52,13 +52,14 @@
if TYPE_CHECKING:
from pandas.core.series import Series # noqa: F401
from pandas.core.indexes.api import Index # noqa: F401
from pandas.core.arrays.base import ExtensionArray


def array(
data: Sequence[object],
data: Union[Sequence[object], AnyArrayLike],
dtype: Optional[Union[str, np.dtype, ExtensionDtype]] = None,
copy: bool = True,
) -> ABCExtensionArray:
) -> "ExtensionArray":
"""
Create an array.

Expand Down Expand Up @@ -275,28 +276,28 @@ def array(
):
dtype = data.dtype

data = extract_array(data, extract_numpy=True)
_data = extract_array(data, extract_numpy=True)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this needed? What is returned here should still be typed the same as the original data

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is returned here should still be typed the same as the original data

IIUC the return type is a numpy array. And therefore data should be narrowed to just np.ndarray. This is not the same as the original data, which is typed as Union[Sequence[object], AnyArrayLike]

The crux of issue is np.ndarray resolves to Any.

data is typed Union[Sequence[object], AnyArrayLike] which is equivalent to Union[Sequence[object], Any, Index, Series, ExtensionArray].

np.ndarray resolving to Any prevents mypy being able to type narrow from extract_array for a few reasons.

  1. if we overload extract_array we get error: Overloaded function signature 2 will never be matched: signature 1's parameter type(s) are the same or broader.
@overload
def extract_array(
    obj: Union[Sequence[object], AnyArrayLike], extract_numpy: bool = False
) -> ArrayLike:
    ...


@overload
def extract_array(obj: Any, extract_numpy: bool = False) -> Any:
    ...


def extract_array(obj: Any, extract_numpy: bool = False) -> Union[Any, ArrayLike]:

AnyArrayLike includes np.ndarray which resolves to Any and therefore the second signature is unreachable.

  1. even if np.ndarray did not resolve to Any, without Literal (py 3.8) we can't overload extract_numpy to return a numpy array i.e to implicitly cast to the return type of extract_array, i.e. from extract_array(Union[Sequence[object], AnyArrayLike], extract_numpy=True) -> np.ndarray ( requires additional overloads to above)

  2. without the overloads, we have just a union return type from extract_array. we can't cast from Union[Sequence[object], AnyArrayLike] to Union[Any, ArrayLike] without a mypy error pandas\core\construction.py:279: error: Redundant cast to "Any" (unless we remove warn_redundant_casts = True from setup.cfg)

so in summary, _data is a separate type to data that resolves for Any since it is a numpy array following the extract_array call and we revert to dynamic typing for the rest of the method.


# this returns None for not-found dtypes.
if isinstance(dtype, str):
dtype = registry.find(dtype) or dtype

if is_extension_array_dtype(dtype):
cls = cast(ExtensionDtype, dtype).construct_array_type()
return cls._from_sequence(data, dtype=dtype, copy=copy)
return cls._from_sequence(_data, dtype=dtype, copy=copy)

if dtype is None:
inferred_dtype = lib.infer_dtype(data, skipna=True)
inferred_dtype = lib.infer_dtype(_data, skipna=True)
if inferred_dtype == "period":
try:
return period_array(data, copy=copy)
return period_array(_data, copy=copy)
except IncompatibleFrequency:
# We may have a mixture of frequencies.
# We choose to return an ndarray, rather than raising.
pass
elif inferred_dtype == "interval":
try:
return IntervalArray(data, copy=copy)
return IntervalArray(_data, copy=copy)
except ValueError:
# We may have a mixture of `closed` here.
# We choose to return an ndarray, rather than raising.
Expand All @@ -305,38 +306,38 @@ def array(
elif inferred_dtype.startswith("datetime"):
# datetime, datetime64
try:
return DatetimeArray._from_sequence(data, copy=copy)
return DatetimeArray._from_sequence(_data, copy=copy)
except ValueError:
# Mixture of timezones, fall back to PandasArray
pass

elif inferred_dtype.startswith("timedelta"):
# timedelta, timedelta64
return TimedeltaArray._from_sequence(data, copy=copy)
return TimedeltaArray._from_sequence(_data, copy=copy)

elif inferred_dtype == "string":
return StringArray._from_sequence(data, copy=copy)
return StringArray._from_sequence(_data, copy=copy)

elif inferred_dtype == "integer":
return IntegerArray._from_sequence(data, copy=copy)
return IntegerArray._from_sequence(_data, copy=copy)

elif inferred_dtype == "boolean":
return BooleanArray._from_sequence(data, copy=copy)
return BooleanArray._from_sequence(_data, copy=copy)

# Pandas overrides NumPy for
# 1. datetime64[ns]
# 2. timedelta64[ns]
# so that a DatetimeArray is returned.
if is_datetime64_ns_dtype(dtype):
return DatetimeArray._from_sequence(data, dtype=dtype, copy=copy)
return DatetimeArray._from_sequence(_data, dtype=dtype, copy=copy)
elif is_timedelta64_ns_dtype(dtype):
return TimedeltaArray._from_sequence(data, dtype=dtype, copy=copy)
return TimedeltaArray._from_sequence(_data, dtype=dtype, copy=copy)

result = PandasArray._from_sequence(data, dtype=dtype, copy=copy)
result = PandasArray._from_sequence(_data, dtype=dtype, copy=copy)
return result


def extract_array(obj, extract_numpy: bool = False):
def extract_array(obj: Any, extract_numpy: bool = False) -> Union[Any, ArrayLike]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does it work to make the Any a Sequence[object] ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

extract_array accepts any object and returns the object unchanged if can't extract array.

if isinstance(obj, (ABCIndexClass, ABCSeries)):
obj = obj.array
if extract_numpy and isinstance(obj, ABCPandasArray):
obj = obj.to_numpy()
return obj

we could look into changing this (raise instead of returning object?) but would not change the numpy array resolving to Any issues.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thinking some more, will revert this for now.

"""
Extract the ndarray or ExtensionArray from a Series or Index.

Expand Down