From 4b1700eac8285bd1e3948d96a0c62a6eac1d3a61 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Tue, 21 Apr 2020 19:53:18 +0100 Subject: [PATCH 1/3] TYP: remove #type: ignore for pd.array constructor --- pandas/core/arrays/categorical.py | 2 +- pandas/core/construction.py | 35 ++++++++++++++++--------------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index cdd0717849e96..0d2b42aabb58e 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -478,7 +478,7 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: return self return self._set_dtype(dtype) if is_extension_array_dtype(dtype): - return array(self, dtype=dtype, copy=copy) # type: ignore # GH 28770 + return array(self, dtype=dtype, copy=copy) if is_integer_dtype(dtype) and self.isna().any(): raise ValueError("Cannot convert float NaN to integer") return np.array(self, dtype=dtype, copy=copy) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 2d60ad9ba50bf..f4ed9588d7f1f 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -13,7 +13,7 @@ from pandas._libs import lib from pandas._libs.tslibs import IncompatibleFrequency, OutOfBoundsDatetime -from pandas._typing import ArrayLike, Dtype +from pandas._typing import AnyArrayLike, ArrayLike, Dtype from pandas.core.dtypes.cast import ( construct_1d_arraylike_from_scalar, @@ -52,13 +52,14 @@ if TYPE_CHECKING: from pandas.core.series import Series # noqa: F401 from pandas.core.indexes.api import Index # noqa: F401 + from pandas.core.arrays.base import ExtensionArray def array( - data: Sequence[object], + data: Union[Sequence[object], AnyArrayLike], dtype: Optional[Union[str, np.dtype, ExtensionDtype]] = None, copy: bool = True, -) -> ABCExtensionArray: +) -> "ExtensionArray": """ Create an array. @@ -275,7 +276,7 @@ def array( ): dtype = data.dtype - data = extract_array(data, extract_numpy=True) + _data = extract_array(data, extract_numpy=True) # this returns None for not-found dtypes. if isinstance(dtype, str): @@ -283,20 +284,20 @@ def array( if is_extension_array_dtype(dtype): cls = cast(ExtensionDtype, dtype).construct_array_type() - return cls._from_sequence(data, dtype=dtype, copy=copy) + return cls._from_sequence(_data, dtype=dtype, copy=copy) if dtype is None: - inferred_dtype = lib.infer_dtype(data, skipna=True) + inferred_dtype = lib.infer_dtype(_data, skipna=True) if inferred_dtype == "period": try: - return period_array(data, copy=copy) + return period_array(_data, copy=copy) except IncompatibleFrequency: # We may have a mixture of frequencies. # We choose to return an ndarray, rather than raising. pass elif inferred_dtype == "interval": try: - return IntervalArray(data, copy=copy) + return IntervalArray(_data, copy=copy) except ValueError: # We may have a mixture of `closed` here. # We choose to return an ndarray, rather than raising. @@ -305,38 +306,38 @@ def array( elif inferred_dtype.startswith("datetime"): # datetime, datetime64 try: - return DatetimeArray._from_sequence(data, copy=copy) + return DatetimeArray._from_sequence(_data, copy=copy) except ValueError: # Mixture of timezones, fall back to PandasArray pass elif inferred_dtype.startswith("timedelta"): # timedelta, timedelta64 - return TimedeltaArray._from_sequence(data, copy=copy) + return TimedeltaArray._from_sequence(_data, copy=copy) elif inferred_dtype == "string": - return StringArray._from_sequence(data, copy=copy) + return StringArray._from_sequence(_data, copy=copy) elif inferred_dtype == "integer": - return IntegerArray._from_sequence(data, copy=copy) + return IntegerArray._from_sequence(_data, copy=copy) elif inferred_dtype == "boolean": - return BooleanArray._from_sequence(data, copy=copy) + return BooleanArray._from_sequence(_data, copy=copy) # Pandas overrides NumPy for # 1. datetime64[ns] # 2. timedelta64[ns] # so that a DatetimeArray is returned. if is_datetime64_ns_dtype(dtype): - return DatetimeArray._from_sequence(data, dtype=dtype, copy=copy) + return DatetimeArray._from_sequence(_data, dtype=dtype, copy=copy) elif is_timedelta64_ns_dtype(dtype): - return TimedeltaArray._from_sequence(data, dtype=dtype, copy=copy) + return TimedeltaArray._from_sequence(_data, dtype=dtype, copy=copy) - result = PandasArray._from_sequence(data, dtype=dtype, copy=copy) + result = PandasArray._from_sequence(_data, dtype=dtype, copy=copy) return result -def extract_array(obj, extract_numpy: bool = False): +def extract_array(obj: Any, extract_numpy: bool = False) -> Union[Any, ArrayLike]: """ Extract the ndarray or ExtensionArray from a Series or Index. From 8339d8c6309736f92d0dafeb73a7ce6c99c9a990 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 22 Apr 2020 12:04:23 +0100 Subject: [PATCH 2/3] revert narrow types from extract_array --- pandas/core/arrays/datetimelike.py | 7 +++++-- pandas/core/arrays/period.py | 10 +++++----- pandas/core/construction.py | 26 +++++++++++++------------- 3 files changed, 23 insertions(+), 20 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 27b2ed822a49f..15047812b2cbf 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1,6 +1,6 @@ from datetime import datetime, timedelta import operator -from typing import Any, Sequence, Type, Union, cast +from typing import Any, Sequence, Type, TypeVar, Union, cast import warnings import numpy as np @@ -425,6 +425,9 @@ def _with_freq(self, freq): return self +DatetimeLikeArrayT = TypeVar("DatetimeLikeArrayT", bound="DatetimeLikeArrayMixin") + + class DatetimeLikeArrayMixin(ExtensionOpsMixin, AttributesMixin, ExtensionArray): """ Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray @@ -704,7 +707,7 @@ def _concat_same_type(cls, to_concat, axis: int = 0): return cls._simple_new(values, dtype=dtype, freq=new_freq) - def copy(self): + def copy(self: DatetimeLikeArrayT) -> DatetimeLikeArrayT: values = self.asi8.copy() return type(self)._simple_new(values, dtype=self.dtype, freq=self.freq) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 2d9522b00627c..b7dfcd4cb188c 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -1,6 +1,6 @@ from datetime import timedelta import operator -from typing import Any, Callable, List, Optional, Sequence, Union +from typing import Any, Callable, List, Optional, Sequence, Type, Union import numpy as np @@ -20,6 +20,7 @@ period_asfreq_arr, ) from pandas._libs.tslibs.timedeltas import Timedelta, delta_to_nanoseconds +from pandas._typing import AnyArrayLike from pandas.util._decorators import cache_readonly from pandas.core.dtypes.common import ( @@ -172,8 +173,8 @@ def _simple_new(cls, values: np.ndarray, freq=None, **kwargs) -> "PeriodArray": @classmethod def _from_sequence( - cls, - scalars: Sequence[Optional[Period]], + cls: Type["PeriodArray"], + scalars: Union[Sequence[Optional[Period]], AnyArrayLike], dtype: Optional[PeriodDtype] = None, copy: bool = False, ) -> "PeriodArray": @@ -186,7 +187,6 @@ def _from_sequence( validate_dtype_freq(scalars.dtype, freq) if copy: scalars = scalars.copy() - assert isinstance(scalars, PeriodArray) # for mypy return scalars periods = np.asarray(scalars, dtype=object) @@ -772,7 +772,7 @@ def raise_on_incompatible(left, right): def period_array( - data: Sequence[Optional[Period]], + data: Union[Sequence[Optional[Period]], AnyArrayLike], freq: Optional[Union[str, Tick]] = None, copy: bool = False, ) -> PeriodArray: diff --git a/pandas/core/construction.py b/pandas/core/construction.py index f4ed9588d7f1f..7dea72b334130 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -276,7 +276,7 @@ def array( ): dtype = data.dtype - _data = extract_array(data, extract_numpy=True) + data = extract_array(data, extract_numpy=True) # this returns None for not-found dtypes. if isinstance(dtype, str): @@ -284,20 +284,20 @@ def array( if is_extension_array_dtype(dtype): cls = cast(ExtensionDtype, dtype).construct_array_type() - return cls._from_sequence(_data, dtype=dtype, copy=copy) + return cls._from_sequence(data, dtype=dtype, copy=copy) if dtype is None: - inferred_dtype = lib.infer_dtype(_data, skipna=True) + inferred_dtype = lib.infer_dtype(data, skipna=True) if inferred_dtype == "period": try: - return period_array(_data, copy=copy) + return period_array(data, copy=copy) except IncompatibleFrequency: # We may have a mixture of frequencies. # We choose to return an ndarray, rather than raising. pass elif inferred_dtype == "interval": try: - return IntervalArray(_data, copy=copy) + return IntervalArray(data, copy=copy) except ValueError: # We may have a mixture of `closed` here. # We choose to return an ndarray, rather than raising. @@ -306,34 +306,34 @@ def array( elif inferred_dtype.startswith("datetime"): # datetime, datetime64 try: - return DatetimeArray._from_sequence(_data, copy=copy) + return DatetimeArray._from_sequence(data, copy=copy) except ValueError: # Mixture of timezones, fall back to PandasArray pass elif inferred_dtype.startswith("timedelta"): # timedelta, timedelta64 - return TimedeltaArray._from_sequence(_data, copy=copy) + return TimedeltaArray._from_sequence(data, copy=copy) elif inferred_dtype == "string": - return StringArray._from_sequence(_data, copy=copy) + return StringArray._from_sequence(data, copy=copy) elif inferred_dtype == "integer": - return IntegerArray._from_sequence(_data, copy=copy) + return IntegerArray._from_sequence(data, copy=copy) elif inferred_dtype == "boolean": - return BooleanArray._from_sequence(_data, copy=copy) + return BooleanArray._from_sequence(data, copy=copy) # Pandas overrides NumPy for # 1. datetime64[ns] # 2. timedelta64[ns] # so that a DatetimeArray is returned. if is_datetime64_ns_dtype(dtype): - return DatetimeArray._from_sequence(_data, dtype=dtype, copy=copy) + return DatetimeArray._from_sequence(data, dtype=dtype, copy=copy) elif is_timedelta64_ns_dtype(dtype): - return TimedeltaArray._from_sequence(_data, dtype=dtype, copy=copy) + return TimedeltaArray._from_sequence(data, dtype=dtype, copy=copy) - result = PandasArray._from_sequence(_data, dtype=dtype, copy=copy) + result = PandasArray._from_sequence(data, dtype=dtype, copy=copy) return result From 54ee74e5b9def34ccaf2a9189667f9a39ca9b9f4 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 22 Apr 2020 12:20:31 +0100 Subject: [PATCH 3/3] revert addition of types to extract_array --- pandas/core/construction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 7dea72b334130..6abff7d04758b 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -337,7 +337,7 @@ def array( return result -def extract_array(obj: Any, extract_numpy: bool = False) -> Union[Any, ArrayLike]: +def extract_array(obj, extract_numpy: bool = False): """ Extract the ndarray or ExtensionArray from a Series or Index.