pandas-dev · WillAyd · Mar 12, 2021 · Mar 12, 2021 · Mar 12, 2021 · Mar 12, 2021
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -46,6 +46,7 @@ class providing the base-class of operations.
 )
 import pandas._libs.groupby as libgroupby
 from pandas._typing import (
+    ArrayLike,
     F,
     FrameOrSeries,
     FrameOrSeriesUnion,
@@ -68,7 +69,6 @@ class providing the base-class of operations.
     ensure_float,
     is_bool_dtype,
     is_datetime64_dtype,
-    is_extension_array_dtype,
     is_integer_dtype,
     is_numeric_dtype,
     is_object_dtype,
@@ -85,6 +85,7 @@ class providing the base-class of operations.
 from pandas.core.arrays import (
     Categorical,
     DatetimeArray,
+    ExtensionArray,
 )
 from pandas.core.base import (
     DataError,
@@ -2265,37 +2266,31 @@ def quantile(self, q=0.5, interpolation: str = "linear"):
         """
         from pandas import concat
 
-        def pre_processor(vals: np.ndarray) -> Tuple[np.ndarray, Optional[Type]]:
+        def pre_processor(vals: ArrayLike) -> Tuple[np.ndarray, Optional[np.dtype]]:
             if is_object_dtype(vals):
                 raise TypeError(
                     "'quantile' cannot be performed against 'object' dtypes!"
                 )
 
-            inference = None
+            inference: Optional[np.dtype] = None
             if is_integer_dtype(vals.dtype):
-                if is_extension_array_dtype(vals.dtype):
-                    # error: "ndarray" has no attribute "to_numpy"
-                    vals = vals.to_numpy(  # type: ignore[attr-defined]
-                        dtype=float, na_value=np.nan
-                    )
-                inference = np.int64
-            elif is_bool_dtype(vals.dtype) and is_extension_array_dtype(vals.dtype):
-                # error: "ndarray" has no attribute "to_numpy"
-                vals = vals.to_numpy(  # type: ignore[attr-defined]
-                    dtype=float, na_value=np.nan
-                )
+                if isinstance(vals, ExtensionArray):
+                    out = vals.to_numpy(dtype=float, na_value=np.nan)
+                else:
+                    out = vals
+                inference = np.dtype(np.int64)
+            elif is_bool_dtype(vals.dtype) and isinstance(vals, ExtensionArray):
+                out = vals.to_numpy(dtype=float, na_value=np.nan)
             elif is_datetime64_dtype(vals.dtype):
-                # error: Incompatible types in assignment (expression has type
-                # "str", variable has type "Optional[Type[int64]]")
-                inference = "datetime64[ns]"  # type: ignore[assignment]
-                vals = np.asarray(vals).astype(float)
+                inference = np.dtype("datetime64[ns]")
+                out = np.asarray(vals).astype(float)
             elif is_timedelta64_dtype(vals.dtype):
-                # error: Incompatible types in assignment (expression has type "str",
-                # variable has type "Optional[Type[signedinteger[Any]]]")
-                inference = "timedelta64[ns]"  # type: ignore[assignment]
-                vals = np.asarray(vals).astype(float)
+                inference = np.dtype("timedelta64[ns]")
+                out = np.asarray(vals).astype(float)
+            else:
+                out = np.asarray(vals)
 
-            return vals, inference
+            return out, inference
 
         def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray:
             if inference:

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -65,6 +65,7 @@
     is_timedelta64_dtype,
     needs_i8_conversion,
 )
+from pandas.core.dtypes.dtypes import ExtensionDtype
 from pandas.core.dtypes.generic import ABCCategoricalIndex
 from pandas.core.dtypes.missing import (
     isna,
@@ -522,7 +523,7 @@ def _disallow_invalid_ops(self, values: ArrayLike, how: str):
     @final
     def _ea_wrap_cython_operation(
         self, kind: str, values, how: str, axis: int, min_count: int = -1, **kwargs
-    ) -> Tuple[np.ndarray, Optional[List[str]]]:
+    ) -> np.ndarray:
         """
         If we have an ExtensionArray, unwrap, call _cython_operation, and
         re-wrap if appropriate.
@@ -539,10 +540,7 @@ def _ea_wrap_cython_operation(
             )
             if how in ["rank"]:
                 # preserve float64 dtype
-
-                # error: Incompatible return value type (got "ndarray", expected
-                # "Tuple[ndarray, Optional[List[str]]]")
-                return res_values  # type: ignore[return-value]
+                return res_values
 
             res_values = res_values.astype("i8", copy=False)
             result = type(orig_values)(res_values, dtype=orig_values.dtype)
@@ -555,14 +553,11 @@ def _ea_wrap_cython_operation(
                 kind, values, how, axis, min_count, **kwargs
             )
             dtype = maybe_cast_result_dtype(orig_values.dtype, how)
-            if is_extension_array_dtype(dtype):
-                # error: Item "dtype[Any]" of "Union[dtype[Any], ExtensionDtype]" has no
-                # attribute "construct_array_type"
-                cls = dtype.construct_array_type()  # type: ignore[union-attr]
+            if isinstance(dtype, ExtensionDtype):
+                cls = dtype.construct_array_type()
                 return cls._from_sequence(res_values, dtype=dtype)
-            # error: Incompatible return value type (got "ndarray", expected
-            # "Tuple[ndarray, Optional[List[str]]]")
-            return res_values  # type: ignore[return-value]
+
+            return res_values
 
         elif is_float_dtype(values.dtype):
             # FloatingArray
@@ -599,9 +594,7 @@ def _cython_operation(
         self._disallow_invalid_ops(values, how)
 
         if is_extension_array_dtype(values.dtype):
-            # error: Incompatible return value type (got "Tuple[ndarray,
-            # Optional[List[str]]]", expected "ndarray")
-            return self._ea_wrap_cython_operation(  # type: ignore[return-value]
+            return self._ea_wrap_cython_operation(
                 kind, values, how, axis, min_count, **kwargs
             )
 

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -3876,7 +3876,14 @@ def _reindex_non_unique(self, target):
     # --------------------------------------------------------------------
     # Join Methods
 
-    def join(self, other, how="left", level=None, return_indexers=False, sort=False):
+    def join(
+        self,
+        other,
+        how: str_t = "left",
+        level=None,
+        return_indexers: bool = False,
+        sort: bool = False,
+    ):
         """
         Compute join_index and indexers to conform data
         structures to the new index.

diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
@@ -827,7 +827,12 @@ def _union(self, other, sort):
     _join_precedence = 10
 
     def join(
-        self, other, how: str = "left", level=None, return_indexers=False, sort=False
+        self,
+        other,
+        how: str = "left",
+        level=None,
+        return_indexers: bool = False,
+        sort: bool = False,
     ):
         """
         See Index.join

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
@@ -2,9 +2,11 @@
 
 import itertools
 from typing import (
+    TYPE_CHECKING,
     List,
     Optional,
     Union,
+    cast,
 )
 
 import numpy as np
@@ -44,6 +46,9 @@
     get_group_index_sorter,
 )
 
+if TYPE_CHECKING:
+    from pandas.core.arrays import ExtensionArray
+
 
 class _Unstacker:
     """
@@ -942,11 +947,11 @@ def _get_dummies_1d(
     data,
     prefix,
     prefix_sep="_",
-    dummy_na=False,
-    sparse=False,
-    drop_first=False,
+    dummy_na: bool = False,
+    sparse: bool = False,
+    drop_first: bool = False,
     dtype: Optional[Dtype] = None,
-):
+) -> DataFrame:
     from pandas.core.reshape.concat import concat
 
     # Series avoids inconsistent NaN handling
@@ -1029,6 +1034,8 @@ def get_empty_frame(data) -> DataFrame:
             sparse_series.append(Series(data=sarr, index=index, name=col))
 
         out = concat(sparse_series, axis=1, copy=False)
+        # TODO: overload concat with Literal for axis
+        out = cast(DataFrame, out)
         return out
 
     else:
@@ -1045,7 +1052,9 @@ def get_empty_frame(data) -> DataFrame:
         return DataFrame(dummy_mat, index=index, columns=dummy_cols)
 
 
-def _reorder_for_extension_array_stack(arr, n_rows: int, n_columns: int):
+def _reorder_for_extension_array_stack(
+    arr: ExtensionArray, n_rows: int, n_columns: int
+) -> ExtensionArray:
     """
     Re-orders the values when stacking multiple extension-arrays.
 

diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
@@ -43,7 +43,6 @@
 _INT64_MAX = np.iinfo(np.int64).max
 
 
-# error: Function "numpy.array" is not valid as a type
 def get_indexer_indexer(
     target: Index,
     level: Union[str, int, List[str], List[int]],
@@ -52,7 +51,7 @@ def get_indexer_indexer(
     na_position: str,
     sort_remaining: bool,
     key: IndexKeyFunc,
-) -> Optional[np.array]:  # type: ignore[valid-type]
+) -> Optional[np.ndarray]:
     """
     Helper method that return the indexer according to input parameters for
     the sort_index method of DataFrame and Series.

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
@@ -534,25 +534,19 @@ def _to_datetime_with_unit(arg, unit, name, tz, errors: Optional[str]) -> Index:
     # GH#30050 pass an ndarray to tslib.array_with_unit_to_datetime
     # because it expects an ndarray argument
     if isinstance(arg, IntegerArray):
-        result = arg.astype(f"datetime64[{unit}]")
+        arr = arg.astype(f"datetime64[{unit}]")
         tz_parsed = None
     else:
-        result, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors)
+        arr, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors)
 
     if errors == "ignore":
         # Index constructor _may_ infer to DatetimeIndex
-
-        # error: Incompatible types in assignment (expression has type "Index", variable
-        # has type "ExtensionArray")
-        result = Index(result, name=name)  # type: ignore[assignment]
+        result = Index(arr, name=name)
     else:
-        # error: Incompatible types in assignment (expression has type "DatetimeIndex",
-        # variable has type "ExtensionArray")
-        result = DatetimeIndex(result, name=name)  # type: ignore[assignment]
+        result = DatetimeIndex(arr, name=name)
 
     if not isinstance(result, DatetimeIndex):
-        # error: Incompatible return value type (got "ExtensionArray", expected "Index")
-        return result  # type: ignore[return-value]
+        return result
 
     # GH#23758: We may still need to localize the result with tz
     # GH#25546: Apply tz_parsed first (from arg), then tz (from caller)

diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 import numpy as np
 
 from pandas._libs import lib
@@ -164,13 +166,10 @@ def to_numeric(arg, errors="raise", downcast=None):
 
     # GH33013: for IntegerArray & FloatingArray extract non-null values for casting
     # save mask to reconstruct the full array after casting
+    mask: Optional[np.ndarray] = None
     if isinstance(values, NumericArray):
         mask = values._mask
         values = values._data[~mask]
-    else:
-        # error: Incompatible types in assignment (expression has type "None", variable
-        # has type "ndarray")
-        mask = None  # type: ignore[assignment]
 
     values_dtype = getattr(values, "dtype", None)
     if is_numeric_dtype(values_dtype):

diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py
@@ -165,7 +165,7 @@ def _convert_listlike(arg, unit=None, errors="raise", name=None):
         arg = np.array(list(arg), dtype=object)
 
     try:
-        value = sequence_to_td64ns(arg, unit=unit, errors=errors, copy=False)[0]
+        td64arr = sequence_to_td64ns(arg, unit=unit, errors=errors, copy=False)[0]
     except ValueError:
         if errors == "ignore":
             return arg
@@ -181,7 +181,5 @@ def _convert_listlike(arg, unit=None, errors="raise", name=None):
 
     from pandas import TimedeltaIndex
 
-    # error: Incompatible types in assignment (expression has type "TimedeltaIndex",
-    # variable has type "ndarray")
-    value = TimedeltaIndex(value, unit="ns", name=name)  # type: ignore[assignment]
+    value = TimedeltaIndex(td64arr, unit="ns", name=name)
     return value