diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index ec88eb817b3f8..f7f575ea1c29c 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -60,7 +60,7 @@ from pandas.core.indexers import validate_indices if TYPE_CHECKING: - from pandas import Categorical, DataFrame, Series + from pandas import Categorical, DataFrame, Index, Series _shared_docs: Dict[str, str] = {} @@ -533,7 +533,7 @@ def factorize( sort: bool = False, na_sentinel: Optional[int] = -1, size_hint: Optional[int] = None, -) -> Tuple[np.ndarray, Union[np.ndarray, ABCIndex]]: +) -> Tuple[np.ndarray, Union[np.ndarray, "Index"]]: """ Encode the object as an enumerated type or categorical variable. diff --git a/pandas/core/base.py b/pandas/core/base.py index b3366cca37617..5f724d9e89d05 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -269,12 +269,14 @@ def __getitem__(self, key): return self._gotitem(list(key), ndim=2) elif not getattr(self, "as_index", False): - if key not in self.obj.columns: + # error: "SelectionMixin" has no attribute "obj" [attr-defined] + if key not in self.obj.columns: # type: ignore[attr-defined] raise KeyError(f"Column not found: {key}") return self._gotitem(key, ndim=2) else: - if key not in self.obj: + # error: "SelectionMixin" has no attribute "obj" [attr-defined] + if key not in self.obj: # type: ignore[attr-defined] raise KeyError(f"Column not found: {key}") return self._gotitem(key, ndim=1) @@ -919,10 +921,9 @@ def _map_values(self, mapper, na_action=None): # "astype" [attr-defined] values = self.astype(object)._values # type: ignore[attr-defined] if na_action == "ignore": - - def map_f(values, f): - return lib.map_infer_mask(values, f, isna(values).view(np.uint8)) - + map_f = lambda values, f: lib.map_infer_mask( + values, f, isna(values).view(np.uint8) + ) elif na_action is None: map_f = lib.map_infer else: diff --git a/pandas/core/common.py b/pandas/core/common.py index d5c078b817ca0..09ed2005cd028 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -469,7 +469,8 @@ def convert_to_list_like( inputs are returned unmodified whereas others are converted to list. """ if isinstance(values, (list, np.ndarray, ABCIndex, ABCSeries, ABCExtensionArray)): - return values + # np.ndarray resolving as Any gives a false positive + return values # type: ignore[return-value] elif isinstance(values, abc.Iterable) and not isinstance(values, str): return list(values) diff --git a/pandas/core/computation/align.py b/pandas/core/computation/align.py index 8a8b0d564ea49..5ad3e78a76866 100644 --- a/pandas/core/computation/align.py +++ b/pandas/core/computation/align.py @@ -1,9 +1,10 @@ """ Core eval alignment algorithms. """ +from __future__ import annotations from functools import partial, wraps -from typing import Dict, Optional, Sequence, Tuple, Type, Union +from typing import TYPE_CHECKING, Dict, Optional, Sequence, Tuple, Type, Union import warnings import numpy as np @@ -17,13 +18,16 @@ import pandas.core.common as com from pandas.core.computation.common import result_type_many +if TYPE_CHECKING: + from pandas.core.indexes.api import Index + def _align_core_single_unary_op( term, -) -> Tuple[Union[partial, Type[FrameOrSeries]], Optional[Dict[str, int]]]: +) -> Tuple[Union[partial, Type[FrameOrSeries]], Optional[Dict[str, Index]]]: typ: Union[partial, Type[FrameOrSeries]] - axes: Optional[Dict[str, int]] = None + axes: Optional[Dict[str, Index]] = None if isinstance(term.value, np.ndarray): typ = partial(np.asanyarray, dtype=term.value.dtype) @@ -36,8 +40,8 @@ def _align_core_single_unary_op( def _zip_axes_from_type( - typ: Type[FrameOrSeries], new_axes: Sequence[int] -) -> Dict[str, int]: + typ: Type[FrameOrSeries], new_axes: Sequence[Index] +) -> Dict[str, Index]: return {name: new_axes[i] for i, name in enumerate(typ._AXIS_ORDERS)} diff --git a/pandas/core/computation/parsing.py b/pandas/core/computation/parsing.py index 86e125b6b909b..a1bebc92046ae 100644 --- a/pandas/core/computation/parsing.py +++ b/pandas/core/computation/parsing.py @@ -8,6 +8,8 @@ import tokenize from typing import Iterator, Tuple +from pandas._typing import Label + # A token value Python's tokenizer probably will never use. BACKTICK_QUOTED_STRING = 100 @@ -91,7 +93,7 @@ def clean_backtick_quoted_toks(tok: Tuple[int, str]) -> Tuple[int, str]: return toknum, tokval -def clean_column_name(name: str) -> str: +def clean_column_name(name: "Label") -> "Label": """ Function to emulate the cleaning of a backtick quoted name. @@ -102,12 +104,12 @@ def clean_column_name(name: str) -> str: Parameters ---------- - name : str + name : hashable Name to be cleaned. Returns ------- - name : str + name : hashable Returns the name after tokenizing and cleaning. Notes diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 7901e150a7ff4..f9ebe3f1e185e 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -351,7 +351,7 @@ def array( return result -def extract_array(obj: AnyArrayLike, extract_numpy: bool = False) -> ArrayLike: +def extract_array(obj: object, extract_numpy: bool = False) -> Union[Any, ArrayLike]: """ Extract the ndarray or ExtensionArray from a Series or Index. @@ -399,9 +399,7 @@ def extract_array(obj: AnyArrayLike, extract_numpy: bool = False) -> ArrayLike: if extract_numpy and isinstance(obj, ABCPandasArray): obj = obj.to_numpy() - # error: Incompatible return value type (got "Index", expected "ExtensionArray") - # error: Incompatible return value type (got "Series", expected "ExtensionArray") - return obj # type: ignore[return-value] + return obj def sanitize_array( diff --git a/pandas/core/dtypes/generic.py b/pandas/core/dtypes/generic.py index 7d2549713c6bc..34891180906bb 100644 --- a/pandas/core/dtypes/generic.py +++ b/pandas/core/dtypes/generic.py @@ -1,4 +1,11 @@ """ define generic base classes for pandas objects """ +from __future__ import annotations + +from typing import TYPE_CHECKING, Type, cast + +if TYPE_CHECKING: + from pandas import DataFrame, Series + from pandas.core.generic import NDFrame # define abstract base classes to enable isinstance type checking on our @@ -53,9 +60,17 @@ def _check(cls, inst) -> bool: }, ) -ABCNDFrame = create_pandas_abc_type("ABCNDFrame", "_typ", ("series", "dataframe")) -ABCSeries = create_pandas_abc_type("ABCSeries", "_typ", ("series",)) -ABCDataFrame = create_pandas_abc_type("ABCDataFrame", "_typ", ("dataframe",)) +ABCNDFrame = cast( + "Type[NDFrame]", + create_pandas_abc_type("ABCNDFrame", "_typ", ("series", "dataframe")), +) +ABCSeries = cast( + "Type[Series]", + create_pandas_abc_type("ABCSeries", "_typ", ("series",)), +) +ABCDataFrame = cast( + "Type[DataFrame]", create_pandas_abc_type("ABCDataFrame", "_typ", ("dataframe",)) +) ABCCategorical = create_pandas_abc_type("ABCCategorical", "_typ", ("categorical")) ABCDatetimeArray = create_pandas_abc_type("ABCDatetimeArray", "_typ", ("datetimearray")) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 4e6aba1961b64..fee143816164d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -511,7 +511,7 @@ def _get_axis_resolvers(self, axis: str) -> Dict[str, Union[Series, MultiIndex]] return d @final - def _get_index_resolvers(self) -> Dict[str, Union[Series, MultiIndex]]: + def _get_index_resolvers(self) -> Dict[Label, Union[Series, MultiIndex]]: from pandas.core.computation.parsing import clean_column_name d: Dict[str, Union[Series, MultiIndex]] = {} @@ -521,7 +521,7 @@ def _get_index_resolvers(self) -> Dict[str, Union[Series, MultiIndex]]: return {clean_column_name(k): v for k, v in d.items() if not isinstance(k, int)} @final - def _get_cleaned_column_resolvers(self) -> Dict[str, ABCSeries]: + def _get_cleaned_column_resolvers(self) -> Dict[Label, Series]: """ Return the special character free column resolvers of a dataframe. @@ -532,7 +532,6 @@ def _get_cleaned_column_resolvers(self) -> Dict[str, ABCSeries]: from pandas.core.computation.parsing import clean_column_name if isinstance(self, ABCSeries): - self = cast("Series", self) return {clean_column_name(self.name): self} return { diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index a8951e342e0da..47283375beaa3 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2016,7 +2016,7 @@ def ravel(i): raise ValueError("Incompatible indexer with Series") - def _align_frame(self, indexer, df: ABCDataFrame): + def _align_frame(self, indexer, df: "DataFrame"): is_frame = self.ndim == 2 if isinstance(indexer, tuple): diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index bcafa2c2fdca7..18c5452d9d6cf 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -369,7 +369,7 @@ def extract_index(data) -> Index: index = Index([]) elif len(data) > 0: raw_lengths = [] - indexes = [] + indexes: List[Union[List[Label], Index]] = [] have_raw_arrays = False have_series = False diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 77b1076920f20..a05c4270ff0c3 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -3,11 +3,21 @@ """ from collections import abc -from typing import TYPE_CHECKING, Iterable, List, Mapping, Type, Union, cast, overload +from typing import ( + TYPE_CHECKING, + Iterable, + List, + Mapping, + Optional, + Type, + Union, + cast, + overload, +) import numpy as np -from pandas._typing import FrameOrSeries, FrameOrSeriesUnion, Label +from pandas._typing import FrameOrSeriesUnion, Label from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries @@ -295,7 +305,7 @@ class _Concatenator: def __init__( self, - objs: Union[Iterable[FrameOrSeries], Mapping[Label, FrameOrSeries]], + objs: Union[Iterable["NDFrame"], Mapping[Label, "NDFrame"]], axis=0, join: str = "outer", keys=None, @@ -366,7 +376,7 @@ def __init__( # get the sample # want the highest ndim that we have, and must be non-empty # unless all objs are empty - sample = None + sample: Optional["NDFrame"] = None if len(ndims) > 1: max_ndim = max(ndims) for obj in objs: @@ -436,6 +446,8 @@ def __init__( # to line up if self._is_frame and axis == 1: name = 0 + # mypy needs to know sample is not an NDFrame + sample = cast("FrameOrSeriesUnion", sample) obj = sample._constructor({name: obj}) self.objs.append(obj) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 7d6a2bf1d776d..9d16beba669ca 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -157,11 +157,10 @@ def __init__(self, data): array = data.array self._array = array + self._index = self._name = None if isinstance(data, ABCSeries): self._index = data.index self._name = data.name - else: - self._index = self._name = None # ._values.categories works for both Series/Index self._parent = data._values.categories if self._is_categorical else data diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index 938f1846230cb..6ebf610587d30 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -1,5 +1,6 @@ """Common utility functions for rolling operations""" from collections import defaultdict +from typing import cast import warnings import numpy as np @@ -109,6 +110,9 @@ def dataframe_from_int_dict(data, frame_template): # set the index and reorder if arg2.columns.nlevels > 1: + # mypy needs to know columns is a MultiIndex, Index doesn't + # have levels attribute + arg2.columns = cast(MultiIndex, arg2.columns) result.index = MultiIndex.from_product( arg2.columns.levels + [result_index] ) diff --git a/pandas/plotting/_matplotlib/tools.py b/pandas/plotting/_matplotlib/tools.py index bec1f48f5e64a..b8fc93d9aba93 100644 --- a/pandas/plotting/_matplotlib/tools.py +++ b/pandas/plotting/_matplotlib/tools.py @@ -7,7 +7,7 @@ import matplotlib.ticker as ticker import numpy as np -from pandas._typing import FrameOrSeries +from pandas._typing import FrameOrSeriesUnion from pandas.core.dtypes.common import is_list_like from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries @@ -30,7 +30,9 @@ def format_date_labels(ax: "Axes", rot): fig.subplots_adjust(bottom=0.2) -def table(ax, data: FrameOrSeries, rowLabels=None, colLabels=None, **kwargs) -> "Table": +def table( + ax, data: FrameOrSeriesUnion, rowLabels=None, colLabels=None, **kwargs +) -> "Table": if isinstance(data, ABCSeries): data = data.to_frame() elif isinstance(data, ABCDataFrame):