-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
TYP: remove #type: ignore for pd.array constructor #33706
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
4b1700e
8339d8c
54ee74e
b37bb64
acee5dd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -13,7 +13,7 @@ | |||||||||||||||
|
||||||||||||||||
from pandas._libs import lib | ||||||||||||||||
from pandas._libs.tslibs import IncompatibleFrequency, OutOfBoundsDatetime | ||||||||||||||||
from pandas._typing import ArrayLike, Dtype | ||||||||||||||||
from pandas._typing import AnyArrayLike, ArrayLike, Dtype | ||||||||||||||||
|
||||||||||||||||
from pandas.core.dtypes.cast import ( | ||||||||||||||||
construct_1d_arraylike_from_scalar, | ||||||||||||||||
|
@@ -52,13 +52,14 @@ | |||||||||||||||
if TYPE_CHECKING: | ||||||||||||||||
from pandas.core.series import Series # noqa: F401 | ||||||||||||||||
from pandas.core.indexes.api import Index # noqa: F401 | ||||||||||||||||
from pandas.core.arrays.base import ExtensionArray | ||||||||||||||||
|
||||||||||||||||
|
||||||||||||||||
def array( | ||||||||||||||||
data: Sequence[object], | ||||||||||||||||
data: Union[Sequence[object], AnyArrayLike], | ||||||||||||||||
dtype: Optional[Union[str, np.dtype, ExtensionDtype]] = None, | ||||||||||||||||
copy: bool = True, | ||||||||||||||||
) -> ABCExtensionArray: | ||||||||||||||||
) -> "ExtensionArray": | ||||||||||||||||
""" | ||||||||||||||||
Create an array. | ||||||||||||||||
|
||||||||||||||||
|
@@ -275,28 +276,28 @@ def array( | |||||||||||||||
): | ||||||||||||||||
dtype = data.dtype | ||||||||||||||||
|
||||||||||||||||
data = extract_array(data, extract_numpy=True) | ||||||||||||||||
_data = extract_array(data, extract_numpy=True) | ||||||||||||||||
|
||||||||||||||||
# this returns None for not-found dtypes. | ||||||||||||||||
if isinstance(dtype, str): | ||||||||||||||||
dtype = registry.find(dtype) or dtype | ||||||||||||||||
|
||||||||||||||||
if is_extension_array_dtype(dtype): | ||||||||||||||||
cls = cast(ExtensionDtype, dtype).construct_array_type() | ||||||||||||||||
return cls._from_sequence(data, dtype=dtype, copy=copy) | ||||||||||||||||
return cls._from_sequence(_data, dtype=dtype, copy=copy) | ||||||||||||||||
|
||||||||||||||||
if dtype is None: | ||||||||||||||||
inferred_dtype = lib.infer_dtype(data, skipna=True) | ||||||||||||||||
inferred_dtype = lib.infer_dtype(_data, skipna=True) | ||||||||||||||||
if inferred_dtype == "period": | ||||||||||||||||
try: | ||||||||||||||||
return period_array(data, copy=copy) | ||||||||||||||||
return period_array(_data, copy=copy) | ||||||||||||||||
except IncompatibleFrequency: | ||||||||||||||||
# We may have a mixture of frequencies. | ||||||||||||||||
# We choose to return an ndarray, rather than raising. | ||||||||||||||||
pass | ||||||||||||||||
elif inferred_dtype == "interval": | ||||||||||||||||
try: | ||||||||||||||||
return IntervalArray(data, copy=copy) | ||||||||||||||||
return IntervalArray(_data, copy=copy) | ||||||||||||||||
except ValueError: | ||||||||||||||||
# We may have a mixture of `closed` here. | ||||||||||||||||
# We choose to return an ndarray, rather than raising. | ||||||||||||||||
|
@@ -305,38 +306,38 @@ def array( | |||||||||||||||
elif inferred_dtype.startswith("datetime"): | ||||||||||||||||
# datetime, datetime64 | ||||||||||||||||
try: | ||||||||||||||||
return DatetimeArray._from_sequence(data, copy=copy) | ||||||||||||||||
return DatetimeArray._from_sequence(_data, copy=copy) | ||||||||||||||||
except ValueError: | ||||||||||||||||
# Mixture of timezones, fall back to PandasArray | ||||||||||||||||
pass | ||||||||||||||||
|
||||||||||||||||
elif inferred_dtype.startswith("timedelta"): | ||||||||||||||||
# timedelta, timedelta64 | ||||||||||||||||
return TimedeltaArray._from_sequence(data, copy=copy) | ||||||||||||||||
return TimedeltaArray._from_sequence(_data, copy=copy) | ||||||||||||||||
|
||||||||||||||||
elif inferred_dtype == "string": | ||||||||||||||||
return StringArray._from_sequence(data, copy=copy) | ||||||||||||||||
return StringArray._from_sequence(_data, copy=copy) | ||||||||||||||||
|
||||||||||||||||
elif inferred_dtype == "integer": | ||||||||||||||||
return IntegerArray._from_sequence(data, copy=copy) | ||||||||||||||||
return IntegerArray._from_sequence(_data, copy=copy) | ||||||||||||||||
|
||||||||||||||||
elif inferred_dtype == "boolean": | ||||||||||||||||
return BooleanArray._from_sequence(data, copy=copy) | ||||||||||||||||
return BooleanArray._from_sequence(_data, copy=copy) | ||||||||||||||||
|
||||||||||||||||
# Pandas overrides NumPy for | ||||||||||||||||
# 1. datetime64[ns] | ||||||||||||||||
# 2. timedelta64[ns] | ||||||||||||||||
# so that a DatetimeArray is returned. | ||||||||||||||||
if is_datetime64_ns_dtype(dtype): | ||||||||||||||||
return DatetimeArray._from_sequence(data, dtype=dtype, copy=copy) | ||||||||||||||||
return DatetimeArray._from_sequence(_data, dtype=dtype, copy=copy) | ||||||||||||||||
elif is_timedelta64_ns_dtype(dtype): | ||||||||||||||||
return TimedeltaArray._from_sequence(data, dtype=dtype, copy=copy) | ||||||||||||||||
return TimedeltaArray._from_sequence(_data, dtype=dtype, copy=copy) | ||||||||||||||||
|
||||||||||||||||
result = PandasArray._from_sequence(data, dtype=dtype, copy=copy) | ||||||||||||||||
result = PandasArray._from_sequence(_data, dtype=dtype, copy=copy) | ||||||||||||||||
return result | ||||||||||||||||
|
||||||||||||||||
|
||||||||||||||||
def extract_array(obj, extract_numpy: bool = False): | ||||||||||||||||
def extract_array(obj: Any, extract_numpy: bool = False) -> Union[Any, ArrayLike]: | ||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does it work to make the Any a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. extract_array accepts any object and returns the object unchanged if can't extract array. pandas/pandas/core/construction.py Lines 381 to 387 in 8dd3d87
we could look into changing this (raise instead of returning object?) but would not change the numpy array resolving to Any issues. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thinking some more, will revert this for now. |
||||||||||||||||
""" | ||||||||||||||||
Extract the ndarray or ExtensionArray from a Series or Index. | ||||||||||||||||
|
||||||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is this needed? What is returned here should still be typed the same as the original
data
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IIUC the return type is a numpy array. And therefore data should be narrowed to just np.ndarray. This is not the same as the original data, which is typed as Union[Sequence[object], AnyArrayLike]
The crux of issue is np.ndarray resolves to Any.
data is typed Union[Sequence[object], AnyArrayLike] which is equivalent to Union[Sequence[object], Any, Index, Series, ExtensionArray].
np.ndarray resolving to Any prevents mypy being able to type narrow from extract_array for a few reasons.
error: Overloaded function signature 2 will never be matched: signature 1's parameter type(s) are the same or broader
.AnyArrayLike includes np.ndarray which resolves to Any and therefore the second signature is unreachable.
even if np.ndarray did not resolve to Any, without Literal (py 3.8) we can't overload extract_numpy to return a numpy array i.e to implicitly
cast
to the return type of extract_array, i.e. from extract_array(Union[Sequence[object], AnyArrayLike], extract_numpy=True) -> np.ndarray ( requires additional overloads to above)without the overloads, we have just a union return type from extract_array. we can't cast from Union[Sequence[object], AnyArrayLike] to Union[Any, ArrayLike] without a mypy error
pandas\core\construction.py:279: error: Redundant cast to "Any"
(unless we remove warn_redundant_casts = True from setup.cfg)so in summary, _data is a separate type to data that resolves for Any since it is a numpy array following the extract_array call and we revert to dynamic typing for the rest of the method.