diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index be8d641169b10..979c7aa990184 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -46,6 +46,7 @@ class providing the base-class of operations. ) import pandas._libs.groupby as libgroupby from pandas._typing import ( + ArrayLike, F, FrameOrSeries, FrameOrSeriesUnion, @@ -68,7 +69,6 @@ class providing the base-class of operations. ensure_float, is_bool_dtype, is_datetime64_dtype, - is_extension_array_dtype, is_integer_dtype, is_numeric_dtype, is_object_dtype, @@ -85,6 +85,7 @@ class providing the base-class of operations. from pandas.core.arrays import ( Categorical, DatetimeArray, + ExtensionArray, ) from pandas.core.base import ( DataError, @@ -2265,37 +2266,31 @@ def quantile(self, q=0.5, interpolation: str = "linear"): """ from pandas import concat - def pre_processor(vals: np.ndarray) -> Tuple[np.ndarray, Optional[Type]]: + def pre_processor(vals: ArrayLike) -> Tuple[np.ndarray, Optional[np.dtype]]: if is_object_dtype(vals): raise TypeError( "'quantile' cannot be performed against 'object' dtypes!" ) - inference = None + inference: Optional[np.dtype] = None if is_integer_dtype(vals.dtype): - if is_extension_array_dtype(vals.dtype): - # error: "ndarray" has no attribute "to_numpy" - vals = vals.to_numpy( # type: ignore[attr-defined] - dtype=float, na_value=np.nan - ) - inference = np.int64 - elif is_bool_dtype(vals.dtype) and is_extension_array_dtype(vals.dtype): - # error: "ndarray" has no attribute "to_numpy" - vals = vals.to_numpy( # type: ignore[attr-defined] - dtype=float, na_value=np.nan - ) + if isinstance(vals, ExtensionArray): + out = vals.to_numpy(dtype=float, na_value=np.nan) + else: + out = vals + inference = np.dtype(np.int64) + elif is_bool_dtype(vals.dtype) and isinstance(vals, ExtensionArray): + out = vals.to_numpy(dtype=float, na_value=np.nan) elif is_datetime64_dtype(vals.dtype): - # error: Incompatible types in assignment (expression has type - # "str", variable has type "Optional[Type[int64]]") - inference = "datetime64[ns]" # type: ignore[assignment] - vals = np.asarray(vals).astype(float) + inference = np.dtype("datetime64[ns]") + out = np.asarray(vals).astype(float) elif is_timedelta64_dtype(vals.dtype): - # error: Incompatible types in assignment (expression has type "str", - # variable has type "Optional[Type[signedinteger[Any]]]") - inference = "timedelta64[ns]" # type: ignore[assignment] - vals = np.asarray(vals).astype(float) + inference = np.dtype("timedelta64[ns]") + out = np.asarray(vals).astype(float) + else: + out = np.asarray(vals) - return vals, inference + return out, inference def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray: if inference: diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 6495a4d26da3a..e505359987eb3 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -65,6 +65,7 @@ is_timedelta64_dtype, needs_i8_conversion, ) +from pandas.core.dtypes.dtypes import ExtensionDtype from pandas.core.dtypes.generic import ABCCategoricalIndex from pandas.core.dtypes.missing import ( isna, @@ -522,7 +523,7 @@ def _disallow_invalid_ops(self, values: ArrayLike, how: str): @final def _ea_wrap_cython_operation( self, kind: str, values, how: str, axis: int, min_count: int = -1, **kwargs - ) -> Tuple[np.ndarray, Optional[List[str]]]: + ) -> np.ndarray: """ If we have an ExtensionArray, unwrap, call _cython_operation, and re-wrap if appropriate. @@ -539,10 +540,7 @@ def _ea_wrap_cython_operation( ) if how in ["rank"]: # preserve float64 dtype - - # error: Incompatible return value type (got "ndarray", expected - # "Tuple[ndarray, Optional[List[str]]]") - return res_values # type: ignore[return-value] + return res_values res_values = res_values.astype("i8", copy=False) result = type(orig_values)(res_values, dtype=orig_values.dtype) @@ -555,14 +553,11 @@ def _ea_wrap_cython_operation( kind, values, how, axis, min_count, **kwargs ) dtype = maybe_cast_result_dtype(orig_values.dtype, how) - if is_extension_array_dtype(dtype): - # error: Item "dtype[Any]" of "Union[dtype[Any], ExtensionDtype]" has no - # attribute "construct_array_type" - cls = dtype.construct_array_type() # type: ignore[union-attr] + if isinstance(dtype, ExtensionDtype): + cls = dtype.construct_array_type() return cls._from_sequence(res_values, dtype=dtype) - # error: Incompatible return value type (got "ndarray", expected - # "Tuple[ndarray, Optional[List[str]]]") - return res_values # type: ignore[return-value] + + return res_values elif is_float_dtype(values.dtype): # FloatingArray @@ -599,9 +594,7 @@ def _cython_operation( self._disallow_invalid_ops(values, how) if is_extension_array_dtype(values.dtype): - # error: Incompatible return value type (got "Tuple[ndarray, - # Optional[List[str]]]", expected "ndarray") - return self._ea_wrap_cython_operation( # type: ignore[return-value] + return self._ea_wrap_cython_operation( kind, values, how, axis, min_count, **kwargs ) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b001139bef6c5..8b67b98b32f7f 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3876,7 +3876,14 @@ def _reindex_non_unique(self, target): # -------------------------------------------------------------------- # Join Methods - def join(self, other, how="left", level=None, return_indexers=False, sort=False): + def join( + self, + other, + how: str_t = "left", + level=None, + return_indexers: bool = False, + sort: bool = False, + ): """ Compute join_index and indexers to conform data structures to the new index. diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 96459970a9b57..0e32e5c5d2762 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -827,7 +827,12 @@ def _union(self, other, sort): _join_precedence = 10 def join( - self, other, how: str = "left", level=None, return_indexers=False, sort=False + self, + other, + how: str = "left", + level=None, + return_indexers: bool = False, + sort: bool = False, ): """ See Index.join diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 13119b9997002..003353856eac8 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -2,9 +2,11 @@ import itertools from typing import ( + TYPE_CHECKING, List, Optional, Union, + cast, ) import numpy as np @@ -44,6 +46,9 @@ get_group_index_sorter, ) +if TYPE_CHECKING: + from pandas.core.arrays import ExtensionArray + class _Unstacker: """ @@ -942,11 +947,11 @@ def _get_dummies_1d( data, prefix, prefix_sep="_", - dummy_na=False, - sparse=False, - drop_first=False, + dummy_na: bool = False, + sparse: bool = False, + drop_first: bool = False, dtype: Optional[Dtype] = None, -): +) -> DataFrame: from pandas.core.reshape.concat import concat # Series avoids inconsistent NaN handling @@ -1029,6 +1034,8 @@ def get_empty_frame(data) -> DataFrame: sparse_series.append(Series(data=sarr, index=index, name=col)) out = concat(sparse_series, axis=1, copy=False) + # TODO: overload concat with Literal for axis + out = cast(DataFrame, out) return out else: @@ -1045,7 +1052,9 @@ def get_empty_frame(data) -> DataFrame: return DataFrame(dummy_mat, index=index, columns=dummy_cols) -def _reorder_for_extension_array_stack(arr, n_rows: int, n_columns: int): +def _reorder_for_extension_array_stack( + arr: ExtensionArray, n_rows: int, n_columns: int +) -> ExtensionArray: """ Re-orders the values when stacking multiple extension-arrays. diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index ba81866602361..720643d3d98aa 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -43,7 +43,6 @@ _INT64_MAX = np.iinfo(np.int64).max -# error: Function "numpy.array" is not valid as a type def get_indexer_indexer( target: Index, level: Union[str, int, List[str], List[int]], @@ -52,7 +51,7 @@ def get_indexer_indexer( na_position: str, sort_remaining: bool, key: IndexKeyFunc, -) -> Optional[np.array]: # type: ignore[valid-type] +) -> Optional[np.ndarray]: """ Helper method that return the indexer according to input parameters for the sort_index method of DataFrame and Series. diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 1e71069e5be4d..9822356d11d7c 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -534,25 +534,19 @@ def _to_datetime_with_unit(arg, unit, name, tz, errors: Optional[str]) -> Index: # GH#30050 pass an ndarray to tslib.array_with_unit_to_datetime # because it expects an ndarray argument if isinstance(arg, IntegerArray): - result = arg.astype(f"datetime64[{unit}]") + arr = arg.astype(f"datetime64[{unit}]") tz_parsed = None else: - result, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors) + arr, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors) if errors == "ignore": # Index constructor _may_ infer to DatetimeIndex - - # error: Incompatible types in assignment (expression has type "Index", variable - # has type "ExtensionArray") - result = Index(result, name=name) # type: ignore[assignment] + result = Index(arr, name=name) else: - # error: Incompatible types in assignment (expression has type "DatetimeIndex", - # variable has type "ExtensionArray") - result = DatetimeIndex(result, name=name) # type: ignore[assignment] + result = DatetimeIndex(arr, name=name) if not isinstance(result, DatetimeIndex): - # error: Incompatible return value type (got "ExtensionArray", expected "Index") - return result # type: ignore[return-value] + return result # GH#23758: We may still need to localize the result with tz # GH#25546: Apply tz_parsed first (from arg), then tz (from caller) diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index 31ab78e59a556..b7116ee95949b 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from pandas._libs import lib @@ -164,13 +166,10 @@ def to_numeric(arg, errors="raise", downcast=None): # GH33013: for IntegerArray & FloatingArray extract non-null values for casting # save mask to reconstruct the full array after casting + mask: Optional[np.ndarray] = None if isinstance(values, NumericArray): mask = values._mask values = values._data[~mask] - else: - # error: Incompatible types in assignment (expression has type "None", variable - # has type "ndarray") - mask = None # type: ignore[assignment] values_dtype = getattr(values, "dtype", None) if is_numeric_dtype(values_dtype): diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index a8378e91f9375..047cec6501627 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -165,7 +165,7 @@ def _convert_listlike(arg, unit=None, errors="raise", name=None): arg = np.array(list(arg), dtype=object) try: - value = sequence_to_td64ns(arg, unit=unit, errors=errors, copy=False)[0] + td64arr = sequence_to_td64ns(arg, unit=unit, errors=errors, copy=False)[0] except ValueError: if errors == "ignore": return arg @@ -181,7 +181,5 @@ def _convert_listlike(arg, unit=None, errors="raise", name=None): from pandas import TimedeltaIndex - # error: Incompatible types in assignment (expression has type "TimedeltaIndex", - # variable has type "ndarray") - value = TimedeltaIndex(value, unit="ns", name=name) # type: ignore[assignment] + value = TimedeltaIndex(td64arr, unit="ns", name=name) return value