From 8700ee014a5832880b781c249b7094b882ac7631 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 11 Mar 2021 19:29:28 -0800 Subject: [PATCH 1/6] TYP: fix ignores --- pandas/core/groupby/groupby.py | 28 +++++++++------------------- pandas/core/groupby/ops.py | 23 ++++++++--------------- pandas/core/indexes/base.py | 9 ++++++++- pandas/core/indexes/datetimelike.py | 7 ++++++- pandas/core/reshape/reshape.py | 18 ++++++++++++------ pandas/core/sorting.py | 3 +-- pandas/core/tools/datetimes.py | 16 +++++----------- pandas/core/tools/numeric.py | 7 +++---- pandas/core/tools/timedeltas.py | 6 ++---- 9 files changed, 54 insertions(+), 63 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index be8d641169b10..9de7e4c523625 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -68,7 +68,6 @@ class providing the base-class of operations. ensure_float, is_bool_dtype, is_datetime64_dtype, - is_extension_array_dtype, is_integer_dtype, is_numeric_dtype, is_object_dtype, @@ -85,6 +84,7 @@ class providing the base-class of operations. from pandas.core.arrays import ( Categorical, DatetimeArray, + ExtensionArray, ) from pandas.core.base import ( DataError, @@ -2271,28 +2271,18 @@ def pre_processor(vals: np.ndarray) -> Tuple[np.ndarray, Optional[Type]]: "'quantile' cannot be performed against 'object' dtypes!" ) - inference = None + inference: Optional[np.dtype] = None if is_integer_dtype(vals.dtype): - if is_extension_array_dtype(vals.dtype): - # error: "ndarray" has no attribute "to_numpy" - vals = vals.to_numpy( # type: ignore[attr-defined] - dtype=float, na_value=np.nan - ) - inference = np.int64 - elif is_bool_dtype(vals.dtype) and is_extension_array_dtype(vals.dtype): - # error: "ndarray" has no attribute "to_numpy" - vals = vals.to_numpy( # type: ignore[attr-defined] - dtype=float, na_value=np.nan - ) + if isinstance(vals, ExtensionArray): + vals = vals.to_numpy(dtype=float, na_value=np.nan) + inference = np.dtype(np.int64) + elif is_bool_dtype(vals.dtype) and isinstance(vals, ExtensionArray): + vals = vals.to_numpy(dtype=float, na_value=np.nan) elif is_datetime64_dtype(vals.dtype): - # error: Incompatible types in assignment (expression has type - # "str", variable has type "Optional[Type[int64]]") - inference = "datetime64[ns]" # type: ignore[assignment] + inference = np.dtype("datetime64[ns]") vals = np.asarray(vals).astype(float) elif is_timedelta64_dtype(vals.dtype): - # error: Incompatible types in assignment (expression has type "str", - # variable has type "Optional[Type[signedinteger[Any]]]") - inference = "timedelta64[ns]" # type: ignore[assignment] + inference = np.dtype("timedelta64[ns]") vals = np.asarray(vals).astype(float) return vals, inference diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 6495a4d26da3a..e505359987eb3 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -65,6 +65,7 @@ is_timedelta64_dtype, needs_i8_conversion, ) +from pandas.core.dtypes.dtypes import ExtensionDtype from pandas.core.dtypes.generic import ABCCategoricalIndex from pandas.core.dtypes.missing import ( isna, @@ -522,7 +523,7 @@ def _disallow_invalid_ops(self, values: ArrayLike, how: str): @final def _ea_wrap_cython_operation( self, kind: str, values, how: str, axis: int, min_count: int = -1, **kwargs - ) -> Tuple[np.ndarray, Optional[List[str]]]: + ) -> np.ndarray: """ If we have an ExtensionArray, unwrap, call _cython_operation, and re-wrap if appropriate. @@ -539,10 +540,7 @@ def _ea_wrap_cython_operation( ) if how in ["rank"]: # preserve float64 dtype - - # error: Incompatible return value type (got "ndarray", expected - # "Tuple[ndarray, Optional[List[str]]]") - return res_values # type: ignore[return-value] + return res_values res_values = res_values.astype("i8", copy=False) result = type(orig_values)(res_values, dtype=orig_values.dtype) @@ -555,14 +553,11 @@ def _ea_wrap_cython_operation( kind, values, how, axis, min_count, **kwargs ) dtype = maybe_cast_result_dtype(orig_values.dtype, how) - if is_extension_array_dtype(dtype): - # error: Item "dtype[Any]" of "Union[dtype[Any], ExtensionDtype]" has no - # attribute "construct_array_type" - cls = dtype.construct_array_type() # type: ignore[union-attr] + if isinstance(dtype, ExtensionDtype): + cls = dtype.construct_array_type() return cls._from_sequence(res_values, dtype=dtype) - # error: Incompatible return value type (got "ndarray", expected - # "Tuple[ndarray, Optional[List[str]]]") - return res_values # type: ignore[return-value] + + return res_values elif is_float_dtype(values.dtype): # FloatingArray @@ -599,9 +594,7 @@ def _cython_operation( self._disallow_invalid_ops(values, how) if is_extension_array_dtype(values.dtype): - # error: Incompatible return value type (got "Tuple[ndarray, - # Optional[List[str]]]", expected "ndarray") - return self._ea_wrap_cython_operation( # type: ignore[return-value] + return self._ea_wrap_cython_operation( kind, values, how, axis, min_count, **kwargs ) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b001139bef6c5..e17ca2078796f 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3876,7 +3876,14 @@ def _reindex_non_unique(self, target): # -------------------------------------------------------------------- # Join Methods - def join(self, other, how="left", level=None, return_indexers=False, sort=False): + def join( + self, + other, + how: str = "left", + level=None, + return_indexers: bool = False, + sort: bool = False, + ): """ Compute join_index and indexers to conform data structures to the new index. diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 96459970a9b57..0e32e5c5d2762 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -827,7 +827,12 @@ def _union(self, other, sort): _join_precedence = 10 def join( - self, other, how: str = "left", level=None, return_indexers=False, sort=False + self, + other, + how: str = "left", + level=None, + return_indexers: bool = False, + sort: bool = False, ): """ See Index.join diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 13119b9997002..30889068030e2 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -2,6 +2,7 @@ import itertools from typing import ( + TYPE_CHECKING, List, Optional, Union, @@ -44,6 +45,9 @@ get_group_index_sorter, ) +if TYPE_CHECKING: + from pandas.core.arrays import ExtensionArray + class _Unstacker: """ @@ -166,7 +170,7 @@ def _make_selectors(self): comp_index = ensure_platform_int(comp_index) stride = self.index.levshape[self.level] + self.lift - self.full_shape = ngroups, stride + self.full_shape = ngroups, int(stride) # int() for mypy selector = self.sorted_labels[-1] + stride * comp_index + self.lift # error: Argument 1 to "zeros" has incompatible type "number"; expected @@ -942,11 +946,11 @@ def _get_dummies_1d( data, prefix, prefix_sep="_", - dummy_na=False, - sparse=False, - drop_first=False, + dummy_na: bool = False, + sparse: bool = False, + drop_first: bool = False, dtype: Optional[Dtype] = None, -): +) -> DataFrame: from pandas.core.reshape.concat import concat # Series avoids inconsistent NaN handling @@ -1045,7 +1049,9 @@ def get_empty_frame(data) -> DataFrame: return DataFrame(dummy_mat, index=index, columns=dummy_cols) -def _reorder_for_extension_array_stack(arr, n_rows: int, n_columns: int): +def _reorder_for_extension_array_stack( + arr: ExtensionArray, n_rows: int, n_columns: int +) -> ExtensionArray: """ Re-orders the values when stacking multiple extension-arrays. diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index ba81866602361..720643d3d98aa 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -43,7 +43,6 @@ _INT64_MAX = np.iinfo(np.int64).max -# error: Function "numpy.array" is not valid as a type def get_indexer_indexer( target: Index, level: Union[str, int, List[str], List[int]], @@ -52,7 +51,7 @@ def get_indexer_indexer( na_position: str, sort_remaining: bool, key: IndexKeyFunc, -) -> Optional[np.array]: # type: ignore[valid-type] +) -> Optional[np.ndarray]: """ Helper method that return the indexer according to input parameters for the sort_index method of DataFrame and Series. diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 1e71069e5be4d..2efce5db2a4a3 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -534,25 +534,19 @@ def _to_datetime_with_unit(arg, unit, name, tz, errors: Optional[str]) -> Index: # GH#30050 pass an ndarray to tslib.array_with_unit_to_datetime # because it expects an ndarray argument if isinstance(arg, IntegerArray): - result = arg.astype(f"datetime64[{unit}]") + res = arg.astype(f"datetime64[{unit}]") tz_parsed = None else: - result, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors) + res, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors) if errors == "ignore": # Index constructor _may_ infer to DatetimeIndex - - # error: Incompatible types in assignment (expression has type "Index", variable - # has type "ExtensionArray") - result = Index(result, name=name) # type: ignore[assignment] + result = Index(res, name=name) else: - # error: Incompatible types in assignment (expression has type "DatetimeIndex", - # variable has type "ExtensionArray") - result = DatetimeIndex(result, name=name) # type: ignore[assignment] + result = DatetimeIndex(res, name=name) if not isinstance(result, DatetimeIndex): - # error: Incompatible return value type (got "ExtensionArray", expected "Index") - return result # type: ignore[return-value] + return result # GH#23758: We may still need to localize the result with tz # GH#25546: Apply tz_parsed first (from arg), then tz (from caller) diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index 31ab78e59a556..b7116ee95949b 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from pandas._libs import lib @@ -164,13 +166,10 @@ def to_numeric(arg, errors="raise", downcast=None): # GH33013: for IntegerArray & FloatingArray extract non-null values for casting # save mask to reconstruct the full array after casting + mask: Optional[np.ndarray] = None if isinstance(values, NumericArray): mask = values._mask values = values._data[~mask] - else: - # error: Incompatible types in assignment (expression has type "None", variable - # has type "ndarray") - mask = None # type: ignore[assignment] values_dtype = getattr(values, "dtype", None) if is_numeric_dtype(values_dtype): diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index a8378e91f9375..047cec6501627 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -165,7 +165,7 @@ def _convert_listlike(arg, unit=None, errors="raise", name=None): arg = np.array(list(arg), dtype=object) try: - value = sequence_to_td64ns(arg, unit=unit, errors=errors, copy=False)[0] + td64arr = sequence_to_td64ns(arg, unit=unit, errors=errors, copy=False)[0] except ValueError: if errors == "ignore": return arg @@ -181,7 +181,5 @@ def _convert_listlike(arg, unit=None, errors="raise", name=None): from pandas import TimedeltaIndex - # error: Incompatible types in assignment (expression has type "TimedeltaIndex", - # variable has type "ndarray") - value = TimedeltaIndex(value, unit="ns", name=name) # type: ignore[assignment] + value = TimedeltaIndex(td64arr, unit="ns", name=name) return value From 4cca1bb60d247048e090abbc4cc3a2735d7b3618 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 12 Mar 2021 11:01:17 +0000 Subject: [PATCH 2/6] mypy fixup --- pandas/core/groupby/groupby.py | 2 +- pandas/core/indexes/base.py | 2 +- pandas/core/reshape/reshape.py | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 9de7e4c523625..c56f865796798 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2265,7 +2265,7 @@ def quantile(self, q=0.5, interpolation: str = "linear"): """ from pandas import concat - def pre_processor(vals: np.ndarray) -> Tuple[np.ndarray, Optional[Type]]: + def pre_processor(vals: np.ndarray) -> Tuple[np.ndarray, Optional[np.dtype]]: if is_object_dtype(vals): raise TypeError( "'quantile' cannot be performed against 'object' dtypes!" diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e17ca2078796f..8b67b98b32f7f 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3879,7 +3879,7 @@ def _reindex_non_unique(self, target): def join( self, other, - how: str = "left", + how: str_t = "left", level=None, return_indexers: bool = False, sort: bool = False, diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 30889068030e2..21e0ebc338de9 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -6,6 +6,7 @@ List, Optional, Union, + cast, ) import numpy as np @@ -1033,6 +1034,8 @@ def get_empty_frame(data) -> DataFrame: sparse_series.append(Series(data=sarr, index=index, name=col)) out = concat(sparse_series, axis=1, copy=False) + # TODO: overload concat with Literal for axis + out = cast(DataFrame, out) return out else: From 6cfc1ea715b6377feed7ae70908c61ab367d9f6b Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 12 Mar 2021 07:10:09 -0800 Subject: [PATCH 3/6] update annotations --- pandas/core/groupby/groupby.py | 3 ++- pandas/core/reshape/reshape.py | 2 +- pandas/core/tools/datetimes.py | 8 ++++---- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 9de7e4c523625..d749e82178e34 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -46,6 +46,7 @@ class providing the base-class of operations. ) import pandas._libs.groupby as libgroupby from pandas._typing import ( + ArrayLike, F, FrameOrSeries, FrameOrSeriesUnion, @@ -2265,7 +2266,7 @@ def quantile(self, q=0.5, interpolation: str = "linear"): """ from pandas import concat - def pre_processor(vals: np.ndarray) -> Tuple[np.ndarray, Optional[Type]]: + def pre_processor(vals: ArrayLike) -> Tuple[np.ndarray, Optional[np.dtype]]: if is_object_dtype(vals): raise TypeError( "'quantile' cannot be performed against 'object' dtypes!" diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 30889068030e2..e0f09614c4597 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -170,7 +170,7 @@ def _make_selectors(self): comp_index = ensure_platform_int(comp_index) stride = self.index.levshape[self.level] + self.lift - self.full_shape = ngroups, int(stride) # int() for mypy + self.full_shape = ngroups, stride selector = self.sorted_labels[-1] + stride * comp_index + self.lift # error: Argument 1 to "zeros" has incompatible type "number"; expected diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 2efce5db2a4a3..9822356d11d7c 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -534,16 +534,16 @@ def _to_datetime_with_unit(arg, unit, name, tz, errors: Optional[str]) -> Index: # GH#30050 pass an ndarray to tslib.array_with_unit_to_datetime # because it expects an ndarray argument if isinstance(arg, IntegerArray): - res = arg.astype(f"datetime64[{unit}]") + arr = arg.astype(f"datetime64[{unit}]") tz_parsed = None else: - res, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors) + arr, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors) if errors == "ignore": # Index constructor _may_ infer to DatetimeIndex - result = Index(res, name=name) + result = Index(arr, name=name) else: - result = DatetimeIndex(res, name=name) + result = DatetimeIndex(arr, name=name) if not isinstance(result, DatetimeIndex): return result From f03029b0c9708196cb336c8ec714c6ee252d9849 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 12 Mar 2021 08:03:38 -0800 Subject: [PATCH 4/6] mypy fixup --- pandas/core/groupby/groupby.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index d749e82178e34..ac398ef2f11a9 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2275,18 +2275,22 @@ def pre_processor(vals: ArrayLike) -> Tuple[np.ndarray, Optional[np.dtype]]: inference: Optional[np.dtype] = None if is_integer_dtype(vals.dtype): if isinstance(vals, ExtensionArray): - vals = vals.to_numpy(dtype=float, na_value=np.nan) + out = vals.to_numpy(dtype=float, na_value=np.nan) + else: + out = vals inference = np.dtype(np.int64) elif is_bool_dtype(vals.dtype) and isinstance(vals, ExtensionArray): - vals = vals.to_numpy(dtype=float, na_value=np.nan) + out = vals.to_numpy(dtype=float, na_value=np.nan) elif is_datetime64_dtype(vals.dtype): inference = np.dtype("datetime64[ns]") - vals = np.asarray(vals).astype(float) + out = np.asarray(vals).astype(float) elif is_timedelta64_dtype(vals.dtype): inference = np.dtype("timedelta64[ns]") - vals = np.asarray(vals).astype(float) + out = np.asarray(vals).astype(float) + else: + out = np.asarray(out) - return vals, inference + return out, inference def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray: if inference: From 926936b15ab4127a4cfd094272d14c297453603d Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 12 Mar 2021 08:57:32 -0800 Subject: [PATCH 5/6] typo fixup --- pandas/core/groupby/groupby.py | 2 +- pandas/core/resample.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index ac398ef2f11a9..979c7aa990184 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2288,7 +2288,7 @@ def pre_processor(vals: ArrayLike) -> Tuple[np.ndarray, Optional[np.dtype]]: inference = np.dtype("timedelta64[ns]") out = np.asarray(vals).astype(float) else: - out = np.asarray(out) + out = np.asarray(vals) return out, inference diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 2308f9edb4328..342c05038304d 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -24,6 +24,7 @@ to_offset, ) from pandas._typing import ( + FrameOrSeries, T, TimedeltaConvertibleTypes, TimestampConvertibleTypes, @@ -1742,7 +1743,10 @@ def _get_period_bins(self, ax: PeriodIndex): return binner, bins, labels -def _take_new_index(obj, indexer, new_index, axis=0): +# TODO: type indexer as ndarray[np.intp] once dtypes can be annotated +def _take_new_index( + obj: FrameOrSeries, indexer: np.ndarray, new_index, axis: int = 0 +) -> FrameOrSeries: if isinstance(obj, ABCSeries): new_values = algos.take_nd(obj._values, indexer) From f559025a8f1ddaa82707b78402dae955da101888 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 12 Mar 2021 09:56:51 -0800 Subject: [PATCH 6/6] revert accidental commit --- pandas/core/resample.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 342c05038304d..2308f9edb4328 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -24,7 +24,6 @@ to_offset, ) from pandas._typing import ( - FrameOrSeries, T, TimedeltaConvertibleTypes, TimestampConvertibleTypes, @@ -1743,10 +1742,7 @@ def _get_period_bins(self, ax: PeriodIndex): return binner, bins, labels -# TODO: type indexer as ndarray[np.intp] once dtypes can be annotated -def _take_new_index( - obj: FrameOrSeries, indexer: np.ndarray, new_index, axis: int = 0 -) -> FrameOrSeries: +def _take_new_index(obj, indexer, new_index, axis=0): if isinstance(obj, ABCSeries): new_values = algos.take_nd(obj._values, indexer)