pandas-dev · arw2019 · Dec 5, 2020 · Dec 7, 2020 · Dec 7, 2020 · Dec 7, 2020
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -33,7 +33,14 @@
     ints_to_pytimedelta,
 )
 from pandas._libs.tslibs.timezones import tz_compare
-from pandas._typing import AnyArrayLike, ArrayLike, Dtype, DtypeObj, Scalar
+from pandas._typing import (
+    AnyArrayLike,
+    ArrayLike,
+    Dtype,
+    DtypeObj,
+    PandasScalar,
+    Scalar,
+)
 from pandas.util._validators import validate_bool_kwarg
 
 from pandas.core.dtypes.common import (
@@ -834,8 +841,8 @@ def dict_compat(d: Dict[Scalar, Scalar]) -> Dict[Scalar, Scalar]:
 
 
 def infer_dtype_from_array(
-    arr, pandas_dtype: bool = False
-) -> Tuple[DtypeObj, ArrayLike]:
+    arr: "Union[ArrayLike, Series, PandasScalar]", pandas_dtype: bool = False
+) -> "Tuple[DtypeObj, Union[ArrayLike, Series]]":
     """
     Infer the dtype from an array.
 
@@ -883,7 +890,7 @@ def infer_dtype_from_array(
     # don't force numpy coerce with nan's
     inferred = lib.infer_dtype(arr, skipna=False)
     if inferred in ["string", "bytes", "mixed", "mixed-integer"]:
-        return (np.dtype(np.object_), arr)
+        return np.dtype(np.object_), arr
 
     arr = np.asarray(arr)
     return arr.dtype, arr

diff --git a/pandas/core/missing.py b/pandas/core/missing.py
@@ -2,12 +2,12 @@
 Routines for filling missing data.
 """
 from functools import partial
-from typing import TYPE_CHECKING, Any, List, Optional, Set, Union
+from typing import TYPE_CHECKING, Any, Callable, List, Optional, Set, Tuple, Union
 
 import numpy as np
 
 from pandas._libs import algos, lib
-from pandas._typing import ArrayLike, Axis, DtypeObj
+from pandas._typing import ArrayLike, DtypeObj, IndexLabel, Scalar
 from pandas.compat._optional import import_optional_dependency
 
 from pandas.core.dtypes.cast import infer_dtype_from_array
@@ -20,10 +20,12 @@
 from pandas.core.dtypes.missing import isna
 
 if TYPE_CHECKING:
-    from pandas import Index
+    from pandas import Index, Series
 
 
-def mask_missing(arr: ArrayLike, values_to_mask) -> np.ndarray:
+def mask_missing(
+    arr: ArrayLike, values_to_mask: "Union[ArrayLike, Scalar, Series]"
+) -> np.ndarray:
     """
     Return a masking array of same size/shape as arr
     with entries equaling any member of values_to_mask set to True
@@ -61,7 +63,9 @@ def mask_missing(arr: ArrayLike, values_to_mask) -> np.ndarray:
     return mask
 
 
-def clean_fill_method(method, allow_nearest: bool = False):
+def clean_fill_method(
+    method: Optional[str], allow_nearest: bool = False
+) -> Optional[str]:
     # asfreq is compat for resampling
     if method in [None, "asfreq"]:
         return None
@@ -120,7 +124,7 @@ def clean_interp_method(method: str, **kwargs) -> str:
     return method
 
 
-def find_valid_index(values, how: str):
+def find_valid_index(values: ArrayLike, how: str) -> Optional[int]:
     """
     Retrieves the index of the first valid value.
 
@@ -160,15 +164,15 @@ def find_valid_index(values, how: str):
 def interpolate_1d(
     xvalues: "Index",
     yvalues: np.ndarray,
-    method: Optional[str] = "linear",
+    method: str = "linear",
     limit: Optional[int] = None,
     limit_direction: str = "forward",
     limit_area: Optional[str] = None,
     fill_value: Optional[Any] = None,
     bounds_error: bool = False,
     order: Optional[int] = None,
     **kwargs,
-):
+) -> np.ndarray:
     """
     Logic for the 1-d interpolation.  The result should be 1-d, inputs
     xvalues and yvalues will each be 1-d arrays of the same length.
@@ -218,8 +222,13 @@ def interpolate_1d(
 
     # These are sets of index pointers to invalid values... i.e. {0, 1, etc...
     all_nans = set(np.flatnonzero(invalid))
-    start_nans = set(range(find_valid_index(yvalues, "first")))
-    end_nans = set(range(1 + find_valid_index(yvalues, "last"), len(valid)))
+
+    start_nan_idx = find_valid_index(yvalues, "first")
+    start_nans = set() if start_nan_idx is None else set(range(start_nan_idx))
+
+    end_nan_idx = find_valid_index(yvalues, "last")
+    end_nans = set() if end_nan_idx is None else set(range(1 + end_nan_idx, len(valid)))
+
     mid_nans = all_nans - start_nans - end_nans
 
     # Like the sets above, preserve_nans contains indices of invalid values,
@@ -292,8 +301,15 @@ def interpolate_1d(
 
 
 def _interpolate_scipy_wrapper(
-    x, y, new_x, method, fill_value=None, bounds_error=False, order=None, **kwargs
-):
+    x: np.ndarray,
+    y: np.ndarray,
+    new_x: Union[Scalar, np.ndarray],
+    method: str,
+    fill_value: Optional[Scalar] = None,
+    bounds_error: bool = False,
+    order: Optional[int] = None,
+    **kwargs,
+) -> np.ndarray:
     """
     Passed off to scipy.interpolate.interp1d. method is scipy's kind.
     Returns an array interpolated at new_x.  Add any new methods to
@@ -333,15 +349,14 @@ def _interpolate_scipy_wrapper(
         "polynomial",
     ]
     if method in interp1d_methods:
-        if method == "polynomial":
-            method = order
+        kind = order if method == "polynomial" else method
         terp = interpolate.interp1d(
-            x, y, kind=method, fill_value=fill_value, bounds_error=bounds_error
+            x, y, kind=kind, fill_value=fill_value, bounds_error=bounds_error
         )
         new_y = terp(new_x)
     elif method == "spline":
         # GH #10633, #24014
-        if isna(order) or (order <= 0):
+        if order is None or isna(order) or order <= 0:
             raise ValueError(
                 f"order needs to be specified and greater than 0; got order: {order}"
             )
@@ -356,12 +371,23 @@ def _interpolate_scipy_wrapper(
             y = y.copy()
         if not new_x.flags.writeable:
             new_x = new_x.copy()
-        method = alt_methods[method]
-        new_y = method(x, y, new_x, **kwargs)
+
+        if isinstance(method, str):
+            alt_method = alt_methods[method]
+            new_y = alt_method(x, y, new_x, **kwargs)
+        else:
+            raise ValueError(f"{method} is not a valid interp method")
     return new_y
 
 
-def _from_derivatives(xi, yi, x, order=None, der=0, extrapolate=False):
+def _from_derivatives(
+    xi: np.ndarray,
+    yi: np.ndarray,
+    x: Union[Scalar, ArrayLike],
+    order: Optional[Union[int, List[int]]] = None,
+    der: Union[int, List[int]] = 0,
+    extrapolate: bool = False,
+) -> np.ndarray:
     """
     Convenience function for interpolate.BPoly.from_derivatives.
 
@@ -374,15 +400,16 @@ def _from_derivatives(xi, yi, x, order=None, der=0, extrapolate=False):
         sorted 1D array of x-coordinates
     yi : array_like or list of array-likes
         yi[i][j] is the j-th derivative known at xi[i]
-    order: None or int or array_like of ints. Default: None.
+    x : scalar or array_like
+    order: None or int or array_like of ints, default: None
         Specifies the degree of local polynomials. If not None, some
         derivatives are ignored.
-    der : int or list
+    der : int or list, default: 0
         How many derivatives to extract; None for all potentially nonzero
         derivatives (that is a number equal to the number of points), or a
         list of derivatives to extract. This number includes the function
         value as 0th derivative.
-     extrapolate : bool, optional
+     extrapolate : bool, default False
         Whether to extrapolate to ouf-of-bounds points based on first and last
         intervals, or to return NaNs. Default: True.
 
@@ -404,7 +431,13 @@ def _from_derivatives(xi, yi, x, order=None, der=0, extrapolate=False):
     return m(x)
 
 
-def _akima_interpolate(xi, yi, x, der=0, axis=0):
+def _akima_interpolate(
+    xi: np.ndarray,
+    yi: np.ndarray,
+    x: Union[Scalar, ArrayLike],
 new_x = np.asarray(new_x) 
 new_x = np.asarray(new_x) 
+    der: int = 0,
+    axis: int = 0,
+) -> Union[Scalar, ArrayLike]:
     """
     Convenience function for akima interpolation.
     xi and yi are arrays of values used to approximate some function f,
@@ -414,9 +447,9 @@ def _akima_interpolate(xi, yi, x, der=0, axis=0):
 
     Parameters
     ----------
-    xi : array_like
+    xi : np.ndarray
         A sorted list of x-coordinates, of length N.
-    yi : array_like
+    yi : np.ndarray
         A 1-D array of real values.  `yi`'s length along the interpolation
         axis must be equal to the length of `xi`. If N-D array, use axis
         parameter to select correct axis.
@@ -447,7 +480,14 @@ def _akima_interpolate(xi, yi, x, der=0, axis=0):
     return P(x, nu=der)
 
 
-def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolate=None):
+def _cubicspline_interpolate(
+    xi: np.ndarray,
+    yi: np.ndarray,
+    x: Union[ArrayLike, Scalar],
+    axis: int = 0,
+    bc_type: Union[str, Tuple] = "not-a-knot",
+    extrapolate: Optional[Union[bool, str]] = None,
+) -> Union[ArrayLike, Scalar]:
     """
     Convenience function for cubic spline data interpolator.
 
@@ -555,6 +595,8 @@ def _interpolate_with_limit_area(
         first = find_valid_index(values, "first")
         last = find_valid_index(values, "last")
 
+        assert first is not None and last is not None
+
         values = interpolate_2d(
             values,
             method=method,
@@ -572,12 +614,12 @@ def _interpolate_with_limit_area(
 
 
 def interpolate_2d(
-    values,
+    values: np.ndarray,
     method: str = "pad",
-    axis: Axis = 0,
+    axis: int = 0,
     limit: Optional[int] = None,
     limit_area: Optional[str] = None,
-):
+) -> np.ndarray:
     """
     Perform an actual interpolation of values, values will be make 2-d if
     needed fills inplace, returns the result.
@@ -623,7 +665,10 @@ def interpolate_2d(
             raise AssertionError("cannot interpolate on a ndim == 1 with axis != 0")
         values = values.reshape(tuple((1,) + values.shape))
 
-    method = clean_fill_method(method)
+    method_cleaned = clean_fill_method(method)
+    assert isinstance(method_cleaned, str)
+    method = method_cleaned
+
     tvalues = transf(values)
     if method == "pad":
         result = _pad_2d(tvalues, limit=limit)
@@ -642,7 +687,9 @@ def interpolate_2d(
     return result
 
 
-def _cast_values_for_fillna(values, dtype: DtypeObj, has_mask: bool):
+def _cast_values_for_fillna(
+    values: ArrayLike, dtype: DtypeObj, has_mask: bool
+) -> ArrayLike:
     """
     Cast values to a dtype that algos.pad and algos.backfill can handle.
     """
@@ -661,34 +708,41 @@ def _cast_values_for_fillna(values, dtype: DtypeObj, has_mask: bool):
     return values
 
 
-def _fillna_prep(values, mask=None):
+def _fillna_prep(
+    values: np.ndarray, mask: Optional[np.ndarray] = None
+) -> Tuple[np.ndarray, np.ndarray]:
     # boilerplate for _pad_1d, _backfill_1d, _pad_2d, _backfill_2d
-    dtype = values.dtype
 
     has_mask = mask is not None
-    if not has_mask:
-        # This needs to occur before datetime/timedeltas are cast to int64
-        mask = isna(values)
 
-    values = _cast_values_for_fillna(values, dtype, has_mask)
+    # This needs to occur before datetime/timedeltas are cast to int64
+    mask = isna(values) if mask is None else mask
 
+    values = _cast_values_for_fillna(values, values.dtype, has_mask)
     mask = mask.view(np.uint8)
+
     return values, mask
 
 
-def _pad_1d(values, limit=None, mask=None):
+def _pad_1d(
+    values: np.ndarray, limit: Optional[int] = None, mask: Optional[np.ndarray] = None
+) -> np.ndarray:
     values, mask = _fillna_prep(values, mask)
     algos.pad_inplace(values, mask, limit=limit)
     return values
 
 
-def _backfill_1d(values, limit=None, mask=None):
+def _backfill_1d(
+    values: np.ndarray, limit: Optional[int] = None, mask: Optional[np.ndarray] = None
+) -> np.ndarray:
     values, mask = _fillna_prep(values, mask)
     algos.backfill_inplace(values, mask, limit=limit)
     return values
 
 
-def _pad_2d(values, limit=None, mask=None):
+def _pad_2d(
+    values: np.ndarray, limit: Optional[int] = None, mask: Optional[np.ndarray] = None
+) -> np.ndarray:
     values, mask = _fillna_prep(values, mask)
 
     if np.all(values.shape):
@@ -699,7 +753,9 @@ def _pad_2d(values, limit=None, mask=None):
     return values
 
 
-def _backfill_2d(values, limit=None, mask=None):
+def _backfill_2d(
+    values: np.ndarray, limit: Optional[int] = None, mask: Optional[np.ndarray] = None
+) -> np.ndarray:
     values, mask = _fillna_prep(values, mask)
 
     if np.all(values.shape):
@@ -713,16 +769,19 @@ def _backfill_2d(values, limit=None, mask=None):
 _fill_methods = {"pad": _pad_1d, "backfill": _backfill_1d}
 
 
-def get_fill_func(method):
-    method = clean_fill_method(method)
-    return _fill_methods[method]
+def get_fill_func(method: str) -> Callable:
+    method_cleaned = clean_fill_method(method)
+    assert isinstance(method_cleaned, str)
+    return _fill_methods[method_cleaned]
 
 
-def clean_reindex_fill_method(method):
+def clean_reindex_fill_method(method: str) -> Optional[str]:
     return clean_fill_method(method, allow_nearest=True)
 
 
-def _interp_limit(invalid, fw_limit, bw_limit):
+def _interp_limit(
+    invalid: np.ndarray, fw_limit: Optional[int], bw_limit: Optional[int]
+) -> Set[IndexLabel]:
     """
     Get indexers of values that won't be filled
     because they exceed the limits.
@@ -757,7 +816,7 @@ def _interp_limit(invalid, fw_limit, bw_limit):
     f_idx = set()
     b_idx = set()
 
-    def inner(invalid, limit):
+    def inner(invalid: np.ndarray, limit: int) -> Set[IndexLabel]:
         limit = min(limit, N)
         windowed = _rolling_window(invalid, limit + 1).all(1)
         idx = set(np.where(windowed)[0] + limit) | set(
@@ -787,7 +846,7 @@ def inner(invalid, limit):
     return f_idx & b_idx
 
 
-def _rolling_window(a: np.ndarray, window: int):
+def _rolling_window(a: np.ndarray, window: int) -> np.ndarray:
     """
     [True, True, False, True, False], 2 ->