diff --git a/pandas/_typing.py b/pandas/_typing.py index e1b6a5e2e6876..977812e5286af 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -75,3 +75,5 @@ # to maintain type information across generic functions and parametrization T = TypeVar("T") +FuncType = Callable[..., Any] +F = TypeVar("F", bound=FuncType) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 3547a33ea357b..6570e0782a69a 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -12,6 +12,8 @@ import sys import warnings +from pandas._typing import F + PY37 = sys.version_info >= (3, 7) PY38 = sys.version_info >= (3, 8) PYPY = platform.python_implementation() == "PyPy" @@ -25,7 +27,7 @@ # found at https://bitbucket.org/gutworth/six -def set_function_name(f, name, cls): +def set_function_name(f: F, name: str, cls) -> F: """ Bind the name/qualname attributes of the function. """ diff --git a/pandas/compat/chainmap.py b/pandas/compat/chainmap.py index a84dbb4a661e4..7bca4d278aed6 100644 --- a/pandas/compat/chainmap.py +++ b/pandas/compat/chainmap.py @@ -1,4 +1,6 @@ -from typing import ChainMap, MutableMapping, TypeVar, cast +from typing import ChainMap, List, MutableMapping, TypeVar + +from pandas._typing import T _KT = TypeVar("_KT") _VT = TypeVar("_VT") @@ -11,13 +13,17 @@ class DeepChainMap(ChainMap[_KT, _VT]): Only works when all passed mapping are mutable. """ + # error: Incompatible types in assignment (expression has type + # "List[MutableMapping[_KT, _VT]]", base class "ChainMap" defined the type + # as "List[Mapping[_KT, _VT]]") [assignment] + maps: List[MutableMapping[_KT, _VT]] # type: ignore + def __setitem__(self, key: _KT, value: _VT) -> None: for mapping in self.maps: - mutable_mapping = cast(MutableMapping[_KT, _VT], mapping) - if key in mutable_mapping: - mutable_mapping[key] = value + if key in mapping: + mapping[key] = value return - cast(MutableMapping[_KT, _VT], self.maps[0])[key] = value + self.maps[0][key] = value def __delitem__(self, key: _KT) -> None: """ @@ -27,8 +33,11 @@ def __delitem__(self, key: _KT) -> None: If `key` doesn't exist. """ for mapping in self.maps: - mutable_mapping = cast(MutableMapping[_KT, _VT], mapping) if key in mapping: - del mutable_mapping[key] + del mapping[key] return raise KeyError(key) + + # FIXME: return type of new_child incorrect in typeshed + def new_child(self: T, m) -> T: # type: ignore + return super().new_child(m) # type: ignore diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 62a3808d36ba2..7388cd7cac150 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -4,7 +4,7 @@ """ import operator from textwrap import dedent -from typing import TYPE_CHECKING, Dict, Optional, Tuple, Union +from typing import TYPE_CHECKING, Callable, Dict, Optional, Tuple, Union from warnings import catch_warnings, simplefilter, warn import numpy as np @@ -1074,6 +1074,9 @@ def __init__(self, obj, n: int, keep: str): if self.keep not in ("first", "last", "all"): raise ValueError('keep must be either "first", "last" or "all"') + def compute(self, method): + raise NotImplementedError + def nlargest(self): return self.compute("nlargest") @@ -1430,7 +1433,7 @@ def _take_nd_object(arr, indexer, out, axis: int, fill_value, mask_info): def _get_take_nd_function( ndim: int, arr_dtype, out_dtype, axis: int = 0, mask_info=None -): +) -> Callable: if ndim <= 2: tup = (arr_dtype.name, out_dtype.name) if ndim == 1: diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 22ce5a6f87a43..553cff8d1ad34 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -1,5 +1,6 @@ from operator import le, lt import textwrap +from typing import Any, Sequence, Tuple import numpy as np @@ -432,30 +433,33 @@ def from_arrays(cls, left, right, closed="right", copy=False, dtype=None): ), ) ) - def from_tuples(cls, data, closed="right", copy=False, dtype=None): + def from_tuples( + cls, data: Sequence[Tuple[Any, Any]], closed="right", copy=False, dtype=None + ): + left: Sequence + right: Sequence if len(data): left, right = [], [] + for d in data: + if isna(d): + lhs = rhs = np.nan + else: + name = cls.__name__ + try: + # need list of length 2 tuples, e.g. [(0, 1), (1, 2), ...] + lhs, rhs = d + except ValueError: + msg = f"{name}.from_tuples requires tuples of length 2, got {d}" + raise ValueError(msg) + except TypeError: + msg = f"{name}.from_tuples received an invalid item, {d}" + raise TypeError(msg) + left.append(lhs) + right.append(rhs) else: # ensure that empty data keeps input dtype left = right = data - for d in data: - if isna(d): - lhs = rhs = np.nan - else: - name = cls.__name__ - try: - # need list of length 2 tuples, e.g. [(0, 1), (1, 2), ...] - lhs, rhs = d - except ValueError as err: - msg = f"{name}.from_tuples requires tuples of length 2, got {d}" - raise ValueError(msg) from err - except TypeError as err: - msg = f"{name}.from_tuples received an invalid item, {d}" - raise TypeError(msg) from err - left.append(lhs) - right.append(rhs) - return cls.from_arrays(left, right, closed, copy=False, dtype=dtype) def _validate(self): diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index a98875ace09aa..89499a4f3ab67 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -4,7 +4,7 @@ from collections import abc import numbers import operator -from typing import Any, Callable, Union +from typing import Any, Callable, Type, TypeVar, Union import warnings import numpy as np @@ -51,6 +51,8 @@ import pandas.io.formats.printing as printing +_SparseArrayT = TypeVar("_SparseArrayT", bound="SparseArray") + # ---------------------------------------------------------------------------- # Array @@ -396,8 +398,11 @@ def __init__( @classmethod def _simple_new( - cls, sparse_array: np.ndarray, sparse_index: SparseIndex, dtype: SparseDtype - ) -> "SparseArray": + cls: Type[_SparseArrayT], + sparse_array: np.ndarray, + sparse_index: SparseIndex, + dtype: SparseDtype, + ) -> _SparseArrayT: new = object.__new__(cls) new._sparse_index = sparse_index new._sparse_values = sparse_array @@ -821,11 +826,12 @@ def _get_val_at(self, loc): val = com.maybe_box_datetimelike(val, self.sp_values.dtype) return val - def take(self, indices, allow_fill=False, fill_value=None) -> "SparseArray": + def take(self, indices, allow_fill: bool = False, fill_value=None) -> "SparseArray": if is_scalar(indices): raise ValueError(f"'indices' must be an array, not a scalar '{indices}'.") indices = np.asarray(indices, dtype=np.int32) + result: Union[np.ndarray, "SparseArray"] if indices.size == 0: result = np.array([], dtype="object") kwargs = {"dtype": self.dtype} @@ -1386,8 +1392,10 @@ def __abs__(self): # ------------------------------------------------------------------------ @classmethod - def _create_unary_method(cls, op) -> Callable[["SparseArray"], "SparseArray"]: - def sparse_unary_method(self) -> "SparseArray": + def _create_unary_method( + cls: Type[_SparseArrayT], op + ) -> Callable[[_SparseArrayT], _SparseArrayT]: + def sparse_unary_method(self: _SparseArrayT) -> _SparseArrayT: fill_value = op(np.array(self.fill_value)).item() values = op(self.sp_values) dtype = SparseDtype(values.dtype, fill_value) @@ -1479,14 +1487,14 @@ def cmp_method(self, other): @classmethod def _add_unary_ops(cls): - cls.__pos__ = cls._create_unary_method(operator.pos) - cls.__neg__ = cls._create_unary_method(operator.neg) - cls.__invert__ = cls._create_unary_method(operator.invert) + setattr(cls, "__pos__", cls._create_unary_method(operator.pos)) + setattr(cls, "__neg__", cls._create_unary_method(operator.neg)) + setattr(cls, "__invert__", cls._create_unary_method(operator.invert)) @classmethod def _add_comparison_ops(cls): - cls.__and__ = cls._create_comparison_method(operator.and_) - cls.__or__ = cls._create_comparison_method(operator.or_) + setattr(cls, "__and__", cls._create_comparison_method(operator.and_)) + setattr(cls, "__or__", cls._create_comparison_method(operator.or_)) cls.__xor__ = cls._create_arithmetic_method(operator.xor) super()._add_comparison_ops() diff --git a/pandas/core/base.py b/pandas/core/base.py index 5945d8a4b432d..734ea8eba65d6 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -4,7 +4,17 @@ import builtins import textwrap -from typing import Any, Dict, FrozenSet, List, Optional, Union +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + FrozenSet, + List, + Optional, + Tuple, + Union, +) import numpy as np @@ -34,6 +44,9 @@ from pandas.core.construction import create_series_with_explicit_dtype import pandas.core.nanops as nanops +if TYPE_CHECKING: + from pandas import DataFrame, Series # noqa: F401 + _shared_docs: Dict[str, str] = dict() _indexops_doc_kwargs = dict( klass="IndexOpsMixin", @@ -244,7 +257,7 @@ def _gotitem(self, key, ndim: int, subset=None): """ raise AbstractMethodError(self) - def aggregate(self, func, *args, **kwargs): + def aggregate(self, func, **kwargs): raise AbstractMethodError(self) agg = aggregate @@ -279,13 +292,15 @@ def _try_aggregate_string_function(self, arg: str, *args, **kwargs): f"'{arg}' is not a valid function for '{type(self).__name__}' object" ) - def _aggregate(self, arg, *args, **kwargs): + def _aggregate( + self, arg: Union[str, Dict], *args, **kwargs + ) -> Tuple[Optional[Union[Dict, "Series", "DataFrame"]], Optional[bool]]: """ provide an implementation for the aggregators Parameters ---------- - arg : string, dict, function + arg : str, dict, function *args : args to pass on to the function **kwargs : kwargs to pass on to the function @@ -318,7 +333,7 @@ def _aggregate(self, arg, *args, **kwargs): # eg. {'A' : ['mean']}, normalize all to # be list-likes if any(is_aggregator(x) for x in arg.values()): - new_arg = {} + new_arg: Dict[Any, Union[Tuple, List, Dict]] = {} for k, v in arg.items(): if not isinstance(v, (tuple, list, dict)): new_arg[k] = [v] @@ -374,7 +389,7 @@ def _agg_2dim(how): colg = self._gotitem(self._selection, ndim=2, subset=obj) return colg.aggregate(how) - def _agg(arg, func): + def _agg(arg: Dict, func: Callable) -> Dict: """ run the aggregations over the arg with func return a dict @@ -386,7 +401,7 @@ def _agg(arg, func): # set the final keys keys = list(arg.keys()) - result = {} + result: Any = {} if self._selection is not None: @@ -460,7 +475,7 @@ def is_any_frame() -> bool: return result, True # fall thru - from pandas import DataFrame, Series + from pandas import DataFrame, Series # noqa: F811 try: result = DataFrame(result) @@ -546,7 +561,7 @@ def _aggregate_multiple_funcs(self, arg, _axis): # we are concatting non-NDFrame objects, # e.g. a list of scalars - from pandas import Series + from pandas import Series # noqa: F811 result = Series(results, index=keys, name=self.name) if is_nested_object(result): diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index 7f93472c766d7..5b50964e83faf 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -6,6 +6,7 @@ """ +from typing import Callable, List, Set import warnings import numpy as np @@ -20,10 +21,10 @@ import numexpr as ne _TEST_MODE = None -_TEST_RESULT = None +_TEST_RESULT: List[bool] _USE_NUMEXPR = _NUMEXPR_INSTALLED -_evaluate = None -_where = None +_evaluate: Callable +_where: Callable # the set of dtypes that we will allow pass to numexpr _ALLOWED_DTYPES = { @@ -74,7 +75,7 @@ def _can_use_numexpr(op, op_str, a, b, dtype_check): # required min elements (otherwise we are adding overhead) if np.prod(a.shape) > _MIN_ELEMENTS: # check for dtype compatibility - dtypes = set() + dtypes: Set[str] = set() for o in [a, b]: # Series implements dtypes, check for dimension count as well if hasattr(o, "dtypes") and o.ndim > 1: @@ -213,7 +214,7 @@ def where(cond, a, b, use_numexpr=True): return _where(cond, a, b) if use_numexpr else _where_standard(cond, a, b) -def set_test_mode(v=True): +def set_test_mode(v: bool = True) -> None: """ Keeps track of whether numexpr was used. Stores an additional ``True`` for every successful use of evaluate with numexpr since the last diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 15d9987310f18..3050312d14320 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -2,7 +2,7 @@ import ast from functools import partial -from typing import Any, Dict, Optional, Tuple +from typing import Any, Callable, Dict, Optional, Tuple import numpy as np @@ -62,7 +62,7 @@ def _resolve_name(self): except UndefinedVariableError: return self.name - # read-only property overwriting read/write property + # error: Read-only property cannot override read-write property [misc] @property # type: ignore def value(self): return self._value @@ -183,6 +183,7 @@ def convert_value(self, v) -> "TermValue": """ def stringify(value): + encoder: Callable if self.encoding is not None: encoder = partial(pprint_thing_encoded, encoding=self.encoding) else: diff --git a/pandas/core/computation/scope.py b/pandas/core/computation/scope.py index 83bf92ad737e4..210e3b2ebf5d0 100644 --- a/pandas/core/computation/scope.py +++ b/pandas/core/computation/scope.py @@ -9,7 +9,7 @@ import pprint import struct import sys -from typing import List +from typing import Dict, List, Optional, Union import numpy as np @@ -108,8 +108,14 @@ class Scope: __slots__ = ["level", "scope", "target", "resolvers", "temps"] def __init__( - self, level, global_dict=None, local_dict=None, resolvers=(), target=None + self, + level, + global_dict=None, + local_dict: Optional[Union["Scope", Dict]] = None, + resolvers=(), + target=None, ): + self.resolvers: DeepChainMap self.level = level + 1 # shallow copy because we don't want to keep filling this up with what @@ -139,7 +145,7 @@ def __init__( if isinstance(local_dict, Scope): resolvers += tuple(local_dict.resolvers.maps) self.resolvers = DeepChainMap(*resolvers) - self.temps = {} + self.temps: Dict[str, object] = {} def __repr__(self) -> str: scope_keys = _get_pretty_string(list(self.scope.keys())) @@ -161,7 +167,7 @@ def has_resolvers(self) -> bool: """ return bool(len(self.resolvers)) - def resolve(self, key: str, is_local: bool): + def resolve(self, key: str, is_local: bool) -> object: """ Resolve a variable name in a possibly local context. @@ -271,7 +277,7 @@ def _update(self, level: int): finally: del stack[:], stack - def add_tmp(self, value) -> str: + def add_tmp(self, value: object) -> str: """ Add a temporary variable to the scope. diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 7dda6850ba4f7..e352b9d08b559 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -75,7 +75,7 @@ if TYPE_CHECKING: from pandas import Series - from pandas.core.arrays import ExtensionArray # noqa: F401 + from pandas.core.arrays.base import ExtensionArray # noqa: F401 _int8_max = np.iinfo(np.int8).max _int16_max = np.iinfo(np.int16).max diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index b2301ab0190c7..1a26ff98ca94f 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -2,7 +2,7 @@ Common type operations. """ -from typing import Any, Callable, Union +from typing import Any, Callable, Optional, Union, cast import warnings import numpy as np @@ -63,7 +63,7 @@ _INT64_DTYPE = np.dtype(np.int64) # oh the troubles to reduce import time -_is_scipy_sparse = None +_is_scipy_sparse: Optional[Callable] = None ensure_float64 = algos.ensure_float64 ensure_float32 = algos.ensure_float32 @@ -160,10 +160,12 @@ def ensure_int_or_float(arr: ArrayLike, copy: bool = False) -> np.array: """ # TODO: GH27506 potential bug with ExtensionArrays try: + # error: Unexpected keyword argument "casting" for "astype" return arr.astype("int64", copy=copy, casting="safe") # type: ignore except TypeError: pass try: + # error: Unexpected keyword argument "casting" for "astype" return arr.astype("uint64", copy=copy, casting="safe") # type: ignore except TypeError: if is_extension_array_dtype(arr.dtype): @@ -324,6 +326,8 @@ def is_scipy_sparse(arr) -> bool: if _is_scipy_sparse is None: try: from scipy.sparse import issparse as _is_scipy_sparse + + _is_scipy_sparse = cast(Callable, _is_scipy_sparse) except ImportError: _is_scipy_sparse = lambda _: False diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c3018861bce57..7501f2064cd61 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -29,6 +29,7 @@ Set, Tuple, Type, + TypeVar, Union, cast, ) @@ -135,6 +136,8 @@ from pandas.io.formats.info import info import pandas.plotting +_DataFrameT = TypeVar("_DataFrameT", bound="DataFrame") + if TYPE_CHECKING: from pandas.core.groupby.generic import DataFrameGroupBy from pandas.io.formats.style import Styler @@ -773,7 +776,7 @@ def to_string( max_rows: Optional[int] = None, min_rows: Optional[int] = None, max_cols: Optional[int] = None, - show_dimensions: bool = False, + show_dimensions: Union[bool, str] = False, decimal: str = ".", line_width: Optional[int] = None, max_colwidth: Optional[int] = None, @@ -963,7 +966,9 @@ def iterrows(self) -> Iterable[Tuple[Label, Series]]: s = klass(v, index=columns, name=k) yield k, s - def itertuples(self, index=True, name="Pandas"): + def itertuples( + self, index: bool = True, name: Optional[str] = "Pandas" + ) -> Iterable: """ Iterate over DataFrame rows as namedtuples. @@ -1039,7 +1044,11 @@ def itertuples(self, index=True, name="Pandas"): # Python versions before 3.7 support at most 255 arguments to constructors can_return_named_tuples = PY37 or len(self.columns) + index < 255 if name is not None and can_return_named_tuples: - itertuple = collections.namedtuple(name, fields, rename=True) + # https://github.com/python/mypy/issues/848 + # error: namedtuple() expects a string literal as the first argument [misc] + itertuple = collections.namedtuple( # type: ignore + name, fields, rename=True + ) return map(itertuple._make, zip(*arrays)) # fallback to regular tuples @@ -1665,13 +1674,13 @@ def from_records( arrays = [data[k] for k in columns] else: arrays = [] - arr_columns = [] + arr_columns_ = [] for k, v in data.items(): if k in columns: - arr_columns.append(k) + arr_columns_.append(k) arrays.append(v) - arrays, arr_columns = reorder_arrays(arrays, arr_columns, columns) + arrays, arr_columns = reorder_arrays(arrays, arr_columns_, columns) elif isinstance(data, (np.ndarray, DataFrame)): arrays, columns = to_arrays(data, columns) @@ -3277,10 +3286,15 @@ def select_dtypes(self, include=None, exclude=None) -> "DataFrame": def extract_unique_dtypes_from_dtypes_set( dtypes_set: FrozenSet[Dtype], unique_dtypes: np.ndarray ) -> List[Dtype]: + # error: Argument 1 to "tuple" has incompatible type + # "FrozenSet[Union[str, Any, ExtensionDtype]]"; + # expected "Iterable[Union[type, Tuple[Any, ...]]]" extracted_dtypes = [ unique_dtype for unique_dtype in unique_dtypes - if issubclass(unique_dtype.type, tuple(dtypes_set)) # type: ignore + if issubclass( + unique_dtype.type, tuple(dtypes_set) # type: ignore + ) ] return extracted_dtypes @@ -3320,7 +3334,7 @@ def insert(self, loc, column, value, allow_duplicates=False) -> None: value = self._sanitize_column(column, value, broadcast=False) self._mgr.insert(loc, column, value, allow_duplicates=allow_duplicates) - def assign(self, **kwargs) -> "DataFrame": + def assign(self: _DataFrameT, **kwargs) -> _DataFrameT: r""" Assign new columns to a DataFrame. @@ -4203,7 +4217,7 @@ def set_index( frame = self.copy() arrays = [] - names = [] + names: List = [] if append: names = list(self.index.names) if isinstance(self.index, ABCMultiIndex): @@ -4449,7 +4463,9 @@ def _maybe_casted_values(index, labels=None): values, _ = maybe_upcast_putmask(values, mask, np.nan) if issubclass(values_type, DatetimeLikeArray): - values = values_type(values, dtype=values_dtype) + # TODO: DatetimeLikeArray is a mixin not a base class + # error: Too many arguments for "DatetimeLikeArrayMixin" + values = values_type(values, dtype=values_dtype) # type: ignore return values @@ -6866,26 +6882,26 @@ def _gotitem( **_shared_doc_kwargs, ) @Appender(_shared_docs["aggregate"]) - def aggregate(self, func, axis=0, *args, **kwargs): + def aggregate(self, func, axis=0, **kwargs): axis = self._get_axis_number(axis) result = None try: - result, how = self._aggregate(func, axis=axis, *args, **kwargs) + result, how = self._aggregate(func, axis=axis, **kwargs) except TypeError: pass if result is None: - return self.apply(func, axis=axis, args=args, **kwargs) + return self.apply(func, axis=axis, **kwargs) return result - def _aggregate(self, arg, axis=0, *args, **kwargs): + def _aggregate(self, arg, axis=0, **kwargs): if axis == 1: # NDFrame.aggregate returns a tuple, and we need to transpose # only result - result, how = self.T._aggregate(arg, *args, **kwargs) + result, how = self.T._aggregate(arg, **kwargs) result = result.T if result is not None else result return result, how - return super()._aggregate(arg, *args, **kwargs) + return super()._aggregate(arg, **kwargs) agg = aggregate @@ -7253,8 +7269,14 @@ def append( ) def join( - self, other, on=None, how="left", lsuffix="", rsuffix="", sort=False - ) -> "DataFrame": + self: _DataFrameT, + other, + on=None, + how: str = "left", + lsuffix: str = "", + rsuffix: str = "", + sort: bool = False, + ) -> _DataFrameT: """ Join columns of another DataFrame. @@ -8277,6 +8299,7 @@ def idxmin(self, axis=0, skipna=True) -> Series: """ axis = self._get_axis_number(axis) indices = nanops.nanargmin(self.values, axis=axis, skipna=skipna) + assert not isinstance(indices, int) # needed for mypy index = self._get_axis(axis) result = [index[i] if i >= 0 else np.nan for i in indices] return Series(result, index=self._get_agg_axis(axis)) @@ -8344,6 +8367,7 @@ def idxmax(self, axis=0, skipna=True) -> Series: """ axis = self._get_axis_number(axis) indices = nanops.nanargmax(self.values, axis=axis, skipna=skipna) + assert not isinstance(indices, int) # needed for mypy index = self._get_axis(axis) result = [index[i] if i >= 0 else np.nan for i in indices] return Series(result, index=self._get_agg_axis(axis)) @@ -8730,7 +8754,7 @@ def isin(self, values) -> "DataFrame": def _from_nested_dict(data): # TODO: this should be seriously cythonized - new_data = collections.defaultdict(dict) + new_data: Dict = collections.defaultdict(dict) for index, s in data.items(): for col, v in s.items(): new_data[col][index] = v diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 47a2b22abe103..9638a96679b10 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -22,6 +22,7 @@ Tuple, Type, Union, + cast, ) import warnings import weakref @@ -107,6 +108,7 @@ from pandas.tseries.frequencies import to_offset if TYPE_CHECKING: + from pandas import Series # noqa: F401 from pandas.core.resample import Resampler # goal is to be able to define the docs close to function, while still being @@ -391,7 +393,7 @@ def _get_block_manager_axis(cls, axis): return m - axis return axis - def _get_axis_resolvers(self, axis: str) -> Dict[str, ABCSeries]: + def _get_axis_resolvers(self, axis: str) -> Dict[str, Union["Series", MultiIndex]]: # index or columns axis_index = getattr(self, axis) d = dict() @@ -421,10 +423,10 @@ def _get_axis_resolvers(self, axis: str) -> Dict[str, ABCSeries]: d[axis] = dindex return d - def _get_index_resolvers(self) -> Dict[str, ABCSeries]: + def _get_index_resolvers(self) -> Dict[str, Union["Series", MultiIndex]]: from pandas.core.computation.parsing import clean_column_name - d: Dict[str, ABCSeries] = {} + d: Dict[str, Union["Series", MultiIndex]] = {} for axis_name in self._AXIS_ORDERS: d.update(self._get_axis_resolvers(axis_name)) @@ -1861,7 +1863,7 @@ def __setstate__(self, state): elif len(state) == 2: raise NotImplementedError("Pre-0.12 pickles are no longer supported") - self._item_cache = {} + self._item_cache: Dict = {} # ---------------------------------------------------------------------- # Rendering Methods @@ -4598,14 +4600,15 @@ def filter( return self.reindex(**{name: [r for r in items if r in labels]}) elif like: - def f(x): + def f(x) -> bool: + assert like is not None # needed for mypy return like in ensure_str(x) values = labels.map(f) return self.loc(axis=axis)[values] elif regex: - def f(x): + def f(x) -> bool: return matcher.search(ensure_str(x)) is not None matcher = re.compile(regex) @@ -5423,7 +5426,7 @@ def dtypes(self): string object dtype: object """ - from pandas import Series + from pandas import Series # noqa: F811 return Series(self._mgr.get_dtypes(), index=self._info_axis, dtype=np.object_) @@ -6508,8 +6511,11 @@ def replace( to_replace = regex regex = True - items = list(to_replace.items()) - keys, values = zip(*items) if items else ([], []) + items = list(cast(dict, to_replace).items()) + if items: + keys, values = zip(*items) + else: + keys, values = ([], []) are_mappings = [is_dict_like(v) for v in values] @@ -7018,7 +7024,7 @@ def asof(self, where, subset=None): if where < start: if not is_series: - from pandas import Series + from pandas import Series # noqa: F811 return Series(index=self.columns, name=where, dtype=np.float64) return np.nan @@ -11181,7 +11187,13 @@ def cum_func(self, axis=None, skipna=True, *args, **kwargs): axis = self._get_axis_number(axis) if axis == 1: - return cum_func(self.T, axis=0, skipna=skipna, *args, **kwargs).T + # pandas\core\generic.py:11099:20: error: "cum_func" gets multiple values + # for keyword argument "axis" [misc] + # pandas\core\generic.py:11099:20: error: "cum_func" gets multiple values + # for keyword argument "skipna" [misc] + return cum_func( # type: ignore + self.T, axis=0, skipna=skipna, *args, **kwargs + ).T def block_accum_func(blk_values): values = blk_values.T if hasattr(blk_values, "T") else blk_values diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 208cbfc5b06d6..5375cdd2036bd 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -9,7 +9,6 @@ import copy from functools import partial from textwrap import dedent -import typing from typing import ( TYPE_CHECKING, Any, @@ -22,6 +21,7 @@ Sequence, Tuple, Type, + TypeVar, Union, cast, ) @@ -88,7 +88,7 @@ # TODO: validate types on ScalarResult and move to _typing # Blocked from using by https://github.com/python/mypy/issues/1484 # See note at _mangle_lambda_list -ScalarResult = typing.TypeVar("ScalarResult") +ScalarResult = TypeVar("ScalarResult") def generate_property(name: str, klass: Type[FrameOrSeries]): @@ -450,7 +450,7 @@ def _get_index() -> Index: return self._reindex_output(result) def _aggregate_named(self, func, *args, **kwargs): - result = {} + result: Dict = {} for name, group in self: group.name = name @@ -531,7 +531,7 @@ def _transform_fast(self, result, func_nm: str) -> Series: out = maybe_cast_result(out, self.obj, how=func_nm) return Series(out, index=self.obj.index, name=self.obj.name) - def filter(self, func, dropna=True, *args, **kwargs): + def filter(self, func: Callable, dropna: bool = True, *args, **kwargs): """ Return a copy of a Series excluding elements from groups that do not satisfy the boolean criterion specified by func. @@ -566,8 +566,8 @@ def filter(self, func, dropna=True, *args, **kwargs): wrapper = lambda x: func(x, *args, **kwargs) # Interpret np.nan as False. - def true_and_notna(x, *args, **kwargs) -> bool: - b = wrapper(x, *args, **kwargs) + def true_and_notna(x) -> bool: + b = wrapper(x) return b and notna(b) try: @@ -1240,9 +1240,10 @@ def first_not_none(values): # this is to silence a DeprecationWarning # TODO: Remove when default dtype of empty Series is object kwargs = v._construct_axes_dict() + backup: NDFrame if v._constructor is Series: backup = create_series_with_explicit_dtype( - **kwargs, dtype_if_empty=object + dtype_if_empty=object, **kwargs ) else: backup = v._constructor(**kwargs) @@ -1870,7 +1871,10 @@ def groupby_series(obj, col=None): boxplot = boxplot_frame_groupby -def _recast_datetimelike_result(result: DataFrame) -> DataFrame: +_T = TypeVar("_T", bound=DataFrame) + + +def _recast_datetimelike_result(result: _T) -> _T: """ If we have date/time like in the original, then coerce dates as we are stacking can easily have object dtypes here. diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 7a7ac58b9d11b..36e56e9d88bff 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -328,7 +328,7 @@ def f(self): f.__name__ = "plot" return self._groupby.apply(f) - def __getattr__(self, name: str): + def __getattr__(self, name): def attr(*args, **kwargs): def f(self): return getattr(self.plot, name)(*args, **kwargs) @@ -573,7 +573,7 @@ def _set_result_index_ordered(self, result): def _dir_additions(self): return self.obj._dir_additions() | self._apply_whitelist - def __getattr__(self, attr: str): + def __getattr__(self, attr): if attr in self._internal_names_set: return object.__getattribute__(self, attr) if attr in self.obj: @@ -2373,8 +2373,6 @@ def head(self, n=5): from the original DataFrame with original index and order preserved (``as_index`` flag is ignored). - Does not work for negative values of `n`. - Returns ------- Series or DataFrame @@ -2388,10 +2386,6 @@ def head(self, n=5): A B 0 1 2 2 5 6 - >>> df.groupby('A').head(-1) - Empty DataFrame - Columns: [A, B] - Index: [] """ self._reset_group_selection() mask = self._cumcount_array() < n @@ -2407,8 +2401,6 @@ def tail(self, n=5): from the original DataFrame with original index and order preserved (``as_index`` flag is ignored). - Does not work for negative values of `n`. - Returns ------- Series or DataFrame @@ -2422,10 +2414,6 @@ def tail(self, n=5): A B 1 a 2 3 b 2 - >>> df.groupby('A').tail(-1) - Empty DataFrame - Columns: [A, B] - Index: [] """ self._reset_group_selection() mask = self._cumcount_array(ascending=False) < n diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 2f50845fda4dc..358cc488bc1c4 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -3,7 +3,7 @@ split-apply-combine paradigm. """ -from typing import Dict, Hashable, List, Optional, Tuple +from typing import Dict, Generic, Hashable, List, Optional, Tuple import numpy as np @@ -33,7 +33,7 @@ from pandas.io.formats.printing import pprint_thing -class Grouper: +class Grouper(Generic[FrameOrSeries]): """ A Grouper allows the user to specify a groupby instruction for an object. @@ -108,7 +108,7 @@ def __init__(self, key=None, level=None, freq=None, axis=0, sort=False): self.sort = sort self.grouper = None - self.obj = None + self.obj: Optional[FrameOrSeries] = None self.indexer = None self.binner = None self._grouper = None @@ -130,6 +130,7 @@ def _get_grouper(self, obj, validate: bool = True): a tuple of binner, grouper, obj (possibly sorted) """ self._set_grouper(obj) + assert self.obj is not None self.grouper, _, self.obj = get_grouper( self.obj, [self.key], @@ -151,8 +152,6 @@ def _set_grouper(self, obj: FrameOrSeries, sort: bool = False): sort : bool, default False whether the resulting grouper should be sorted """ - assert obj is not None - if self.key is not None and self.level is not None: raise ValueError("The Grouper cannot specify both a key and a level!") diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 8d535374a083f..e99b5413df23b 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -7,7 +7,7 @@ """ import collections -from typing import List, Optional, Sequence, Tuple, Type +from typing import Any, Dict, Generic, List, Optional, Sequence, Tuple, Type import numpy as np @@ -42,7 +42,6 @@ from pandas.core.base import SelectionMixin import pandas.core.common as com from pandas.core.frame import DataFrame -from pandas.core.generic import NDFrame from pandas.core.groupby import base, grouper from pandas.core.indexes.api import Index, MultiIndex, ensure_index from pandas.core.series import Series @@ -126,7 +125,9 @@ def get_iterator(self, data: FrameOrSeries, axis: int = 0): for key, (i, group) in zip(keys, splitter): yield key, group - def _get_splitter(self, data: FrameOrSeries, axis: int = 0) -> "DataSplitter": + def _get_splitter( + self, data: FrameOrSeries, axis: int = 0 + ) -> "DataSplitter[FrameOrSeries]": comp_ids, _, ngroups = self.group_info return get_splitter(data, comp_ids, ngroups, axis=axis) @@ -154,7 +155,7 @@ def apply(self, f, data: FrameOrSeries, axis: int = 0): group_keys = self._get_group_keys() result_values = None - sdata: FrameOrSeries = splitter._get_sorted_data() + sdata = splitter._get_sorted_data() if sdata.ndim == 2 and np.any(sdata.dtypes.apply(is_extension_array_dtype)): # calling splitter.fast_apply will raise TypeError via apply_frame_axis0 # if we pass EA instead of ndarray @@ -250,8 +251,8 @@ def groups(self): if len(self.groupings) == 1: return self.groupings[0].groups else: - to_groupby = zip(*(ping.grouper for ping in self.groupings)) - to_groupby = Index(to_groupby) + to_groupby_ = zip(*(ping.grouper for ping in self.groupings)) + to_groupby = Index(to_groupby_) return self.axis.groupby(to_groupby) @cache_readonly @@ -560,7 +561,7 @@ def _cython_operation( if vdim == 1 and arity == 1: result = result[:, 0] - names: Optional[List[str]] = self._name_functions.get(how, None) + names = self._name_functions.get(how, None) if swapped: result = result.swapaxes(0, axis) @@ -787,7 +788,7 @@ def get_iterator(self, data: FrameOrSeries, axis: int = 0): @cache_readonly def indices(self): - indices = collections.defaultdict(list) + indices: Dict[Any, List] = collections.defaultdict(list) i = 0 for label, bin in zip(self.binlabels, self.bins): @@ -871,7 +872,7 @@ def _is_indexed_like(obj, axes) -> bool: # Splitting / application -class DataSplitter: +class DataSplitter(Generic[FrameOrSeries]): def __init__(self, data: FrameOrSeries, labels, ngroups: int, axis: int = 0): self.data = data self.labels = ensure_int64(labels) @@ -906,7 +907,7 @@ def __iter__(self): def _get_sorted_data(self) -> FrameOrSeries: return self.data.take(self.sort_idx, axis=self.axis) - def _chop(self, sdata, slice_obj: slice) -> NDFrame: + def _chop(self, sdata: FrameOrSeries, slice_obj: slice) -> FrameOrSeries: raise AbstractMethodError(self) @@ -930,9 +931,10 @@ def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame: def get_splitter( data: FrameOrSeries, labels: np.ndarray, ngroups: int, axis: int = 0 -) -> DataSplitter: +) -> "DataSplitter[FrameOrSeries]": + klass: Type[DataSplitter] if isinstance(data, Series): - klass: Type[DataSplitter] = SeriesSplitter + klass = SeriesSplitter else: # i.e. DataFrame klass = FrameSplitter diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index df58593bc930c..863612db9fa2e 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2,7 +2,17 @@ from datetime import datetime import operator from textwrap import dedent -from typing import TYPE_CHECKING, Any, FrozenSet, Hashable, Union +from typing import ( + TYPE_CHECKING, + Any, + Callable, + FrozenSet, + Hashable, + Optional, + Sequence, + TypeVar, + Union, +) import warnings import numpy as np @@ -88,6 +98,7 @@ if TYPE_CHECKING: from pandas import Series +str_ = str __all__ = ["Index"] @@ -141,7 +152,7 @@ def index_arithmetic_method(self, other): if isinstance(other, (ABCSeries, ABCDataFrame, ABCTimedeltaIndex)): return NotImplemented - from pandas import Series + from pandas import Series # noqa: F811 result = op(Series(self), other) if isinstance(result, tuple): @@ -181,6 +192,9 @@ def _new_Index(cls, d): return cls.__new__(cls, **d) +_IndexT = TypeVar("_IndexT", bound="Index") + + class Index(IndexOpsMixin, PandasObject): """ Immutable ndarray implementing an ordered, sliceable set. The basic object @@ -281,6 +295,16 @@ def _outer_indexer(self, left, right): str = CachedAccessor("str", StringMethods) + # -------------------------------------------------------------------- + # Type declarations for Generated Arithmetic, Comparison, and Unary Methods + + __eq__: Callable + __ne__: Callable + __lt__: Callable + __gt__: Callable + __le__: Callable + __ge__: Callable + # -------------------------------------------------------------------- # Constructors @@ -801,7 +825,7 @@ def repeat(self, repeats, axis=None): # -------------------------------------------------------------------- # Copying Methods - def copy(self, name=None, deep=False, dtype=None, names=None): + def copy(self: _IndexT, name=None, deep=False, dtype=None, names=None) -> _IndexT: """ Make a copy of this object. @@ -828,6 +852,7 @@ def copy(self, name=None, deep=False, dtype=None, names=None): ``deep``, but if ``deep`` is passed it will attempt to deepcopy. """ if deep: + assert self._data is not None new_index = self._shallow_copy(self._data.copy()) else: new_index = self._shallow_copy() @@ -951,9 +976,9 @@ def _format_with_header(self, header, na_rep="NaN", **kwargs): # could have nans mask = isna(values) if mask.any(): - result = np.array(result) - result[mask] = na_rep - result = result.tolist() + result_ = np.array(result) + result_[mask] = na_rep + result = result_.tolist() else: result = _trim_front(format_array(values, None, justify="left")) @@ -1072,7 +1097,7 @@ def to_series(self, index=None, name=None): Series The dtype will be based on the type of the Index values. """ - from pandas import Series + from pandas import Series # noqa: F811 if index is None: index = self._shallow_copy() @@ -2356,12 +2381,12 @@ def _get_unique_index(self, dropna: bool = False): def __add__(self, other): if isinstance(other, (ABCSeries, ABCDataFrame)): return NotImplemented - from pandas import Series + from pandas import Series # noqa: F811 return Index(Series(self) + other) def __radd__(self, other): - from pandas import Series + from pandas import Series # noqa: F811 return Index(other + Series(self)) @@ -2374,7 +2399,7 @@ def __sub__(self, other): def __rsub__(self, other): # wrap Series to ensure we pin name correctly - from pandas import Series + from pandas import Series # noqa: F811 return Index(other - Series(self)) @@ -4879,7 +4904,9 @@ def _get_string_slice(self, key: str_t, use_lhs: bool = True, use_rhs: bool = Tr # overridden in DatetimeIndex, TimedeltaIndex and PeriodIndex raise NotImplementedError - def slice_indexer(self, start=None, end=None, step=None, kind=None): + def slice_indexer( + self, start=None, end=None, step=None, kind: Optional[str_] = None + ): """ For an ordered or unique index, compute the slice indexer for input labels and step. @@ -5509,7 +5536,9 @@ def ensure_index_from_sequences(sequences, names=None): return MultiIndex.from_arrays(sequences, names=names) -def ensure_index(index_like, copy: bool = False): +def ensure_index( + index_like: Union[Index, ExtensionArray, "Series", Sequence], copy: bool = False +) -> Index: """ Ensure that we have an index from some index-like object. @@ -5544,8 +5573,15 @@ def ensure_index(index_like, copy: bool = False): if copy: index_like = index_like.copy() return index_like + # https://github.com/python/mypy/issues/1424 + # error: Item "ExtensionArray" of "Union[ExtensionArray, Series, Sequence[Any]]" + # has no attribute "name" [union-attr] + # error: Item "Sequence[Any]" of "Union[ExtensionArray, Series, Sequence[Any]]" + # has no attribute "name" [union-attr] if hasattr(index_like, "name"): - return Index(index_like, name=index_like.name, copy=copy) + return Index( + index_like, name=index_like.name, copy=copy # type: ignore + ) if is_iterator(index_like): index_like = list(index_like) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 68d6229e798f5..8e3bf39a0e3b0 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1,6 +1,6 @@ from datetime import date, datetime, time, timedelta, tzinfo import operator -from typing import Optional +from typing import TYPE_CHECKING, Callable, Optional import warnings import numpy as np @@ -31,6 +31,9 @@ from pandas.tseries.frequencies import Resolution, to_offset from pandas.tseries.offsets import prefix_mapping +if TYPE_CHECKING: + from pandas import Int64Index + def _new_DatetimeIndex(cls, d): """ @@ -204,6 +207,12 @@ class DatetimeIndex(DatetimeTimedeltaMixin): `__. """ + # Attributes + dayofweek: "Int64Index" + # Methods + tz_convert: Callable + tz_localize: Callable + _typ = "datetimeindex" _engine_type = libindex.DatetimeEngine @@ -604,7 +613,7 @@ def _maybe_cast_for_get_loc(self, key) -> Timestamp: key = key.tz_convert(self.tz) return key - def _maybe_cast_slice_bound(self, label, side: str, kind): + def _maybe_cast_slice_bound(self, label, side: str, kind: Optional[str]): """ If label is a string, cast it to datetime according to resolution. @@ -649,7 +658,9 @@ def _get_string_slice(self, key: str, use_lhs: bool = True, use_rhs: bool = True loc = self._partial_date_slice(reso, parsed, use_lhs=use_lhs, use_rhs=use_rhs) return loc - def slice_indexer(self, start=None, end=None, step=None, kind=None): + def slice_indexer( + self, start=None, end=None, step=None, kind: Optional[str] = None + ): """ Return indexer for specified label slice. Index.slice_indexer, customized to handle time slicing. @@ -699,7 +710,8 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): end_casted = self._maybe_cast_slice_bound(end, "right", kind) mask = (self <= end_casted) & mask - indexer = mask.nonzero()[0][::step] + # error: "bool" has no attribute "nonzero" [attr-defined] + indexer = mask.nonzero()[0][::step] # type: ignore if len(indexer) == len(self): return slice(None) else: @@ -855,7 +867,7 @@ def date_range( Returns ------- - rng : DatetimeIndex + DatetimeIndex See Also -------- diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 18e995ce4efd7..0c0902a436723 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1,7 +1,7 @@ """ define the IntervalIndex """ from operator import le, lt import textwrap -from typing import Any, Optional, Tuple, Union +from typing import TYPE_CHECKING, Any, Callable, Optional, Set, Tuple, Type, Union, cast import numpy as np @@ -58,6 +58,9 @@ from pandas.tseries.frequencies import to_offset from pandas.tseries.offsets import DateOffset +if TYPE_CHECKING: + from pandas import CategoricalIndex # noqa: F401 + _VALID_CLOSED = {"left", "right", "both", "neither"} _index_doc_kwargs = dict(ibase._index_doc_kwargs) @@ -151,6 +154,23 @@ def func(intvidx_self, other, sort=False): return func +def _setop(op_name: str, sort=None): + @SetopCheck(op_name=op_name) + def func(self, other, sort=sort): + result = getattr(self._multiindex, op_name)(other._multiindex, sort=sort) + result_name = get_op_result_name(self, other) + + # GH 19101: ensure empty results have correct dtype + if result.empty: + result = result._values.astype(self.dtype.subtype) + else: + result = result._values + + return type(self).from_tuples(result, closed=self.closed, name=result_name) + + return func + + @Appender( _interval_shared_docs["class"] % dict( @@ -436,7 +456,7 @@ def is_monotonic_decreasing(self) -> bool: return self[::-1].is_monotonic_increasing @cache_readonly - def is_unique(self): + def is_unique(self) -> bool: """ Return True if the IntervalIndex contains unique elements, else False. """ @@ -449,7 +469,7 @@ def is_unique(self): if left.is_unique or right.is_unique: return True - seen_pairs = set() + seen_pairs: Set[Tuple] = set() check_idx = np.where(left.duplicated(keep=False))[0] for idx in check_idx: pair = (left[idx], right[idx]) @@ -605,7 +625,9 @@ def _maybe_convert_i8(self, key): # convert left/right and reconstruct left = self._maybe_convert_i8(key.left) right = self._maybe_convert_i8(key.right) - constructor = Interval if scalar else IntervalIndex.from_arrays + constructor: Union[ + Type[Interval], Callable[..., IntervalIndex] + ] = Interval if scalar else IntervalIndex.from_arrays return constructor(left, right, closed=self.closed) if scalar: @@ -788,6 +810,7 @@ def get_indexer( right_indexer = self.right.get_indexer(target_as_index.right) indexer = np.where(left_indexer == right_indexer, left_indexer, -1) elif is_categorical_dtype(target_as_index.dtype): + target_as_index = cast("CategoricalIndex", target_as_index) # get an indexer for unique categories then propagate to codes via take_1d categories_indexer = self.get_indexer(target_as_index.categories) indexer = take_1d(categories_indexer, target_as_index.codes, fill_value=-1) @@ -1096,22 +1119,6 @@ def _intersection_non_unique(self, other: "IntervalIndex") -> "IntervalIndex": return self[mask] - def _setop(op_name: str, sort=None): - @SetopCheck(op_name=op_name) - def func(self, other, sort=sort): - result = getattr(self._multiindex, op_name)(other._multiindex, sort=sort) - result_name = get_op_result_name(self, other) - - # GH 19101: ensure empty results have correct dtype - if result.empty: - result = result._values.astype(self.dtype.subtype) - else: - result = result._values - - return type(self).from_tuples(result, closed=self.closed, name=result_name) - - return func - @property def is_all_dates(self) -> bool: """ @@ -1325,6 +1332,7 @@ def interval_range( breaks = maybe_downcast_to_dtype(breaks, "int64") else: # delegate to the appropriate range function + range_func: Callable if isinstance(endpoint, Timestamp): range_func = date_range else: diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 7aa1456846612..07443c83093cd 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -7,6 +7,7 @@ List, Optional, Sequence, + Set, Tuple, Union, ) @@ -240,7 +241,7 @@ class MultiIndex(Index): # initialize to zero-length tuples to make everything work _typ = "multiindex" - _names = FrozenList() + _names: List[Optional[Hashable]] = FrozenList() _levels = FrozenList() _codes = FrozenList() _comparables = ["names"] @@ -497,7 +498,7 @@ def from_tuples(cls, tuples, sortorder=None, names=None): if len(tuples) == 0: if names is None: raise TypeError("Cannot infer number of levels from empty list") - arrays = [[]] * len(names) + arrays: List[List] = [[]] * len(names) elif isinstance(tuples, (np.ndarray, Index)): if isinstance(tuples, Index): tuples = tuples._values @@ -718,6 +719,7 @@ def _set_levels( if level is not None and len(levels) != len(level): raise ValueError("Length of levels must match length of level.") + new_levels: Sequence if level is None: new_levels = FrozenList( ensure_index(lev, copy=copy)._shallow_copy() for lev in levels @@ -885,11 +887,13 @@ def _set_codes( ) else: level_numbers = [self._get_level_number(lev) for lev in level] - new_codes = list(self._codes) + _new_codes = list(self._codes) for lev_num, level_codes in zip(level_numbers, codes): lev = self.levels[lev_num] - new_codes[lev_num] = _coerce_indexer_frozen(level_codes, lev, copy=copy) - new_codes = FrozenList(new_codes) + _new_codes[lev_num] = _coerce_indexer_frozen( + level_codes, lev, copy=copy + ) + new_codes = FrozenList(_new_codes) if verify_integrity: new_codes = self._verify_integrity(codes=new_codes) @@ -2407,9 +2411,8 @@ def _get_partial_string_timestamp_match_key(self, key): """ # GH#10331 if isinstance(key, str) and self.levels[0]._supports_partial_string_indexing: - # Convert key '2016-01-01' to - # ('2016-01-01'[, slice(None, None, None)]+) - key = tuple([key] + [slice(None)] * (len(self.levels) - 1)) + # Convert key '2016-01-01' to ('2016-01-01'[, slice(None, None, None)]+) + key = tuple((key, *([slice(None)] * (len(self.levels) - 1)))) if isinstance(key, tuple): # Convert (..., '2016-01-01', ...) in tuple to @@ -3059,7 +3062,7 @@ def _update_indexer(idxr, indexer=indexer): elif is_list_like(k): # a collection of labels to include from this level (these # are or'd) - indexers = None + indexers: Optional[Int64Index] = None for x in k: try: idxrs = _convert_to_indexer( @@ -3402,7 +3405,7 @@ def intersection(self, other, sort=False): lvals = self._values rvals = other._values - uniq_tuples = None # flag whether _inner_indexer was successful + uniq_tuples: Optional[List] = None # flag whether _inner_indexer was successful if self.is_monotonic and other.is_monotonic: try: uniq_tuples = self._inner_indexer(lvals, rvals)[0] @@ -3412,10 +3415,12 @@ def intersection(self, other, sort=False): if uniq_tuples is None: other_uniq = set(rvals) - seen = set() - uniq_tuples = [ - x for x in lvals if x in other_uniq and not (x in seen or seen.add(x)) - ] + seen: Set = set() + uniq_tuples = [] + for x in lvals: + if x in other_uniq and x not in seen: + uniq_tuples.append(x) + seen.add(x) if sort is None: uniq_tuples = sorted(uniq_tuples) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index e2be58a56018d..d0f07d079c229 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -46,6 +46,7 @@ class NumericIndex(Index): """ _is_numeric_dtype = True + _default_dtype: np.number def __new__(cls, data=None, dtype=None, copy=False, name=None): cls._validate_dtype(dtype) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 1f565828ec7a5..ad29dd68c5877 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -143,6 +143,27 @@ class PeriodIndex(DatetimeIndexOpsMixin, Int64Index): PeriodIndex(['2000Q1', '2002Q3'], dtype='period[Q-DEC]', freq='Q-DEC') """ + # day + # dayofweek + # dayofyear + # days_in_month + # daysinmonth + # end_time + freq: DateOffset + # freqstr + hour: Any + # is_leap_year + minute: Any + # month + # quarter + # qyear + second: Any + # start_time + # week + # weekday + # weekofyear + # year + _typ = "periodindex" _attributes = ["name", "freq"] @@ -151,7 +172,6 @@ class PeriodIndex(DatetimeIndexOpsMixin, Int64Index): _infer_as_myclass = True _data: PeriodArray - freq: DateOffset _engine_type = libindex.PeriodEngine _supports_partial_string_indexing = True diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 22a44d65a947a..36ea7ce42e87e 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -998,6 +998,38 @@ def _multi_take(self, tup: Tuple): # ------------------------------------------------------------------- + def _get_partial_string_timestamp_match_key(self, key, labels): + """ + Translate any partial string timestamp matches in key, returning the + new key. + + (GH 10331) + """ + if isinstance(labels, ABCMultiIndex): + if ( + isinstance(key, str) + and labels.levels[0]._supports_partial_string_indexing + ): + # Convert key '2016-01-01' to + # ('2016-01-01'[, slice(None, None, None)]+) + key = tuple(key, *([slice(None)] * (len(labels.levels) - 1))) + + if isinstance(key, tuple): + # Convert (..., '2016-01-01', ...) in tuple to + # (..., slice('2016-01-01', '2016-01-01', None), ...) + new_key = [] + for i, component in enumerate(key): + if ( + isinstance(component, str) + and labels.levels[i]._supports_partial_string_indexing + ): + new_key.append(slice(component, component, None)) + else: + new_key.append(component) + key = tuple(new_key) + + return key + def _getitem_iterable(self, key, axis: int): """ Index current object with an an iterable collection of keys. @@ -2019,6 +2051,8 @@ class _ScalarAccessIndexer(_NDFrameIndexerBase): Access scalars quickly. """ + _takeable: bool + def _convert_key(self, key, is_setter: bool = False): raise AbstractMethodError(self) diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 720e6799a3bf3..07af62a17e581 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -1,6 +1,7 @@ # TODO: Needs a better name; too many modules are already called "concat" from collections import defaultdict import copy +from typing import Dict, List import numpy as np @@ -89,10 +90,10 @@ def _get_mgr_concatenation_plan(mgr, indexers): """ # Calculate post-reindex shape , save for item axis which will be separate # for each block anyway. - mgr_shape = list(mgr.shape) + mgr_shape_ = list(mgr.shape) for ax, indexer in indexers.items(): - mgr_shape[ax] = len(indexer) - mgr_shape = tuple(mgr_shape) + mgr_shape_[ax] = len(indexer) + mgr_shape = tuple(mgr_shape_) if 0 in indexers: ax0_indexer = indexers.pop(0) @@ -115,9 +116,9 @@ def _get_mgr_concatenation_plan(mgr, indexers): join_unit_indexers = indexers.copy() - shape = list(mgr_shape) - shape[0] = len(placements) - shape = tuple(shape) + shape_ = list(mgr_shape) + shape_[0] = len(placements) + shape = tuple(shape_) if blkno == -1: unit = JoinUnit(None, shape) @@ -342,8 +343,8 @@ def _get_empty_dtype_and_na(join_units): else: dtypes[i] = unit.dtype - upcast_classes = defaultdict(list) - null_upcast_classes = defaultdict(list) + upcast_classes: Dict[str, List] = defaultdict(list) + null_upcast_classes: Dict[str, List] = defaultdict(list) for dtype, unit in zip(dtypes, join_units): if dtype is None: continue diff --git a/pandas/core/missing.py b/pandas/core/missing.py index c46aed999f45a..a256413019870 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -1,6 +1,7 @@ """ Routines for filling missing data. """ +from typing import Set import numpy as np @@ -238,24 +239,24 @@ def interpolate_1d( # set preserve_nans based on direction using _interp_limit if limit_direction == "forward": - preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0)) + preserve_nans_ = start_nans | set(_interp_limit(invalid, limit, 0)) elif limit_direction == "backward": - preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit)) + preserve_nans_ = end_nans | set(_interp_limit(invalid, 0, limit)) else: # both directions... just use _interp_limit - preserve_nans = set(_interp_limit(invalid, limit, limit)) + preserve_nans_ = set(_interp_limit(invalid, limit, limit)) # if limit_area is set, add either mid or outside indices # to preserve_nans GH #16284 if limit_area == "inside": # preserve NaNs on the outside - preserve_nans |= start_nans | end_nans + preserve_nans_ |= start_nans | end_nans elif limit_area == "outside": # preserve NaNs on the inside - preserve_nans |= mid_nans + preserve_nans_ |= mid_nans # sort preserve_nans and covert to list - preserve_nans = sorted(preserve_nans) + preserve_nans = sorted(preserve_nans_) xvalues = getattr(xvalues, "values", xvalues) yvalues = getattr(yvalues, "values", yvalues) @@ -634,8 +635,8 @@ def _interp_limit(invalid, fw_limit, bw_limit): # 1. operate on the reversed array # 2. subtract the returned indices from N - 1 N = len(invalid) - f_idx = set() - b_idx = set() + f_idx: Set = set() + b_idx: Set = set() def inner(invalid, limit): limit = min(limit, N) @@ -659,8 +660,8 @@ def inner(invalid, limit): # just use forwards return f_idx else: - b_idx = list(inner(invalid[::-1], bw_limit)) - b_idx = set(N - 1 - np.asarray(b_idx)) + b_idx_ = list(inner(invalid[::-1], bw_limit)) + b_idx = set(N - 1 - np.asarray(b_idx_)) if fw_limit == 0: return b_idx diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 9494248a423a8..d7d7dd134b9c8 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1,7 +1,7 @@ import functools import itertools import operator -from typing import Any, Optional, Tuple, Union +from typing import TYPE_CHECKING, Any, Callable, Optional, Tuple, TypeVar, Union, cast import numpy as np @@ -34,11 +34,18 @@ from pandas.core.construction import extract_array +if TYPE_CHECKING: + from pandas.core.arrays import ExtensionArray # noqa: F401 + bn = import_optional_dependency("bottleneck", raise_on_missing=False, on_version="warn") _BOTTLENECK_INSTALLED = bn is not None _USE_BOTTLENECK = False +_FuncType = Callable[..., Any] +_F = TypeVar("_F", bound=_FuncType) + + def set_use_bottleneck(v: bool = True) -> None: # set/unset to use bottleneck global _USE_BOTTLENECK @@ -57,7 +64,7 @@ def __init__(self, *dtypes): def check(self, obj) -> bool: return hasattr(obj, "dtype") and issubclass(obj.dtype.type, self.dtypes) - def __call__(self, f): + def __call__(self, f: _F) -> _F: @functools.wraps(f) def _f(*args, **kwargs): obj_iter = itertools.chain(args, kwargs.values()) @@ -78,7 +85,7 @@ def _f(*args, **kwargs): raise TypeError(e) from e raise - return _f + return cast(_F, _f) class bottleneck_switch: @@ -86,7 +93,7 @@ def __init__(self, name=None, **kwargs): self.name = name self.kwargs = kwargs - def __call__(self, alt): + def __call__(self, alt: _F) -> _F: bn_name = self.name or alt.__name__ try: @@ -130,7 +137,7 @@ def f( return result - return f + return cast(_F, f) def _bn_ok_dtype(dtype: Dtype, name: str) -> bool: @@ -520,13 +527,18 @@ def nansum( @bottleneck_switch() -def nanmean(values, axis=None, skipna=True, mask=None): +def nanmean( + values: Union[np.ndarray, "ExtensionArray"], + axis: Optional[int] = None, + skipna: bool = True, + mask=None, +) -> float: """ Compute the mean of the element along an axis ignoring NaNs Parameters ---------- - values : ndarray + values : ndarray or ExtensionArray axis: int, optional skipna : bool, default True mask : ndarray[bool], optional @@ -534,7 +546,7 @@ def nanmean(values, axis=None, skipna=True, mask=None): Returns ------- - result : float + float Unless input is a float array, in which case use the same precision as the input array. @@ -558,7 +570,8 @@ def nanmean(values, axis=None, skipna=True, mask=None): count = _get_counts(values.shape, mask, axis, dtype=dtype_count) the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_sum)) - if axis is not None and getattr(the_sum, "ndim", False): + if not is_scalar(count): + count = cast(np.ndarray, count) with np.errstate(all="ignore"): # suppress division by zero warnings the_mean = the_sum / count @@ -883,7 +896,7 @@ def nanargmax( axis: Optional[int] = None, skipna: bool = True, mask: Optional[np.ndarray] = None, -) -> int: +) -> Union[int, np.ndarray]: """ Parameters ---------- @@ -895,8 +908,10 @@ def nanargmax( Returns ------- - result : int - The index of max value in specified axis or -1 in the NA case + int or ndarray of ints + The index of max value in specified axis or -1 in the NA case. By default, + the index is into the flattened array, otherwise along the specified axis. + This has the same shape as values.shape with the dimension along axis removed. Examples -------- @@ -1211,7 +1226,7 @@ def _get_counts( Returns ------- - count : scalar or array + scalar or array """ dtype = _get_dtype(dtype) if axis is None: diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index 5dd7af454cbd1..4b0ae1705d7e9 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -415,6 +415,8 @@ def fill_bool(x, left=None): filler = fill_int if is_self_int_dtype and is_other_int_dtype else fill_bool res_values = na_logical_op(lvalues, rvalues, op) + # https://github.com/python/mypy/issues/5128 + # error: Cannot call function of unknown type res_values = filler(res_values) # type: ignore return res_values diff --git a/pandas/core/ops/docstrings.py b/pandas/core/ops/docstrings.py index 7b03b4b449ea5..b9d608eec14f7 100644 --- a/pandas/core/ops/docstrings.py +++ b/pandas/core/ops/docstrings.py @@ -1,7 +1,20 @@ """ Templating for ops docstrings """ -from typing import Dict, Optional +from typing import TYPE_CHECKING, Dict, Optional + +if TYPE_CHECKING: + from mypy_extensions import TypedDict + + class _OpDescriptionsBase(TypedDict): + op: str + desc: str + + class _OpDescriptions(_OpDescriptionsBase, total=False): + reverse: Optional[str] + series_examples: Optional[str] + df_examples: Optional[str] + series_returns: Optional[str] def _make_flex_doc(op_name, typ): @@ -36,7 +49,7 @@ def _make_flex_doc(op_name, typ): reverse=op_desc["reverse"], series_returns=op_desc["series_returns"], ) - if op_desc["series_examples"]: + if op_desc["series_examples"] is not None: doc = doc_no_examples + op_desc["series_examples"] else: doc = doc_no_examples @@ -267,7 +280,7 @@ def _make_flex_doc(op_name, typ): _returns_tuple = """2-Tuple of Series\n The result of the operation.""" -_op_descriptions: Dict[str, Dict[str, Optional[str]]] = { +_op_descriptions: Dict[str, "_OpDescriptions"] = { # Arithmetic Operators "add": { "op": "+", diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 281586879a158..7096c1f2ff839 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1,7 +1,7 @@ import copy from datetime import timedelta from textwrap import dedent -from typing import Dict, no_type_check +from typing import TYPE_CHECKING, Any, Dict, Union, no_type_check import numpy as np @@ -31,6 +31,9 @@ from pandas.tseries.frequencies import to_offset from pandas.tseries.offsets import DateOffset, Day, Nano, Tick +if TYPE_CHECKING: + from pandas import Series # noqa: F401 + _shared_docs_kwargs: Dict[str, str] = dict() @@ -413,7 +416,7 @@ def _get_resampler_for_grouping(self, groupby, **kwargs): """ return self._resampler_for_grouping(self, groupby=groupby, **kwargs) - def _wrap_result(self, result): + def _wrap_result(self, result: Union[Any, "Series"]) -> Union[Any, "Series"]: """ Potentially wrap any results. """ @@ -860,7 +863,7 @@ def var(self, ddof=1, *args, **kwargs): def size(self): result = self._downsample("size") if not len(self.ax): - from pandas import Series + from pandas import Series # noqa: F811 if self._selected_obj.ndim == 1: name = self._selected_obj.name @@ -1526,14 +1529,15 @@ def _get_time_period_bins(self, ax): return binner, bins, labels - def _get_period_bins(self, ax): + def _get_period_bins(self, ax: PeriodIndex): if not isinstance(ax, PeriodIndex): raise TypeError( "axis must be a PeriodIndex, but got " f"an instance of {type(ax).__name__}" ) - memb = ax.asfreq(self.freq, how=self.convention) + # error: "PeriodIndex" has no attribute "asfreq" + memb = ax.asfreq(self.freq, how=self.convention) # type: ignore # NaT handling as in pandas._lib.lib.generate_bins_dt64() nat_count = 0 @@ -1762,7 +1766,7 @@ def _adjust_dates_anchored(first, last, offset, closed="right", base=0): return fresult, lresult -def asfreq(obj, freq, method=None, how=None, normalize=False, fill_value=None): +def asfreq(obj, freq, method=None, how=None, normalize: bool = False, fill_value=None): """ Utility frequency conversion method for Series/DataFrame. """ @@ -1774,7 +1778,8 @@ def asfreq(obj, freq, method=None, how=None, normalize=False, fill_value=None): how = "E" new_obj = obj.copy() - new_obj.index = obj.index.asfreq(freq, how=how) + # error: "PeriodIndex" has no attribute "asfreq" + new_obj.index = obj.index.asfreq(freq, how=how) # type: ignore elif len(obj.index) == 0: new_obj = obj.copy() diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index c3e170b0e39c4..760fe565204f2 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -1,5 +1,5 @@ import re -from typing import List +from typing import List, TypeVar import numpy as np @@ -17,6 +17,8 @@ from pandas.core.reshape.concat import concat from pandas.core.tools.numeric import to_numeric +_DFT = TypeVar("_DFT", bound="DataFrame") + @Appender( _shared_docs["melt"] @@ -189,8 +191,8 @@ def lreshape(data: DataFrame, groups, dropna: bool = True, label=None) -> DataFr def wide_to_long( - df: DataFrame, stubnames, i, j, sep: str = "", suffix: str = r"\d+" -) -> DataFrame: + df: _DFT, stubnames, i, j, sep: str = "", suffix: str = r"\d+" +) -> _DFT: r""" Wide panel to long format. Less flexible but more user-friendly than melt. @@ -419,7 +421,7 @@ def get_var_names(df, stub: str, sep: str, suffix: str) -> List[str]: pattern = re.compile(regex) return [col for col in df.columns if pattern.match(col)] - def melt_stub(df, stub: str, i, j, value_vars, sep: str): + def melt_stub(df: _DFT, stub: str, i, j, value_vars, sep: str) -> _DFT: newdf = melt( df, id_vars=i, diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 607a1b75dcfcd..77b27eeeefd8d 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -6,7 +6,7 @@ import datetime from functools import partial import string -from typing import TYPE_CHECKING, Optional, Tuple, Union +from typing import TYPE_CHECKING, List, Optional, Tuple, Union import warnings import numpy as np @@ -966,7 +966,7 @@ def _get_merge_keys(self): """ left_keys = [] right_keys = [] - join_names = [] + join_names: List = [] right_drop = [] left_drop = [] diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 17473ac26dfd6..9ed42f7136cb7 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -1,7 +1,8 @@ -from typing import TYPE_CHECKING, Callable, Dict, List, Tuple, Union +from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Sequence, Tuple, Union import numpy as np +from pandas._typing import Axis from pandas.util._decorators import Appender, Substitution from pandas.core.dtypes.cast import maybe_downcast_to_dtype @@ -467,13 +468,13 @@ def crosstab( index, columns, values=None, - rownames=None, - colnames=None, - aggfunc=None, - margins=False, + rownames: Optional[Sequence] = None, + colnames: Optional[Sequence] = None, + aggfunc: Optional[Callable] = None, + margins: bool = False, margins_name: str = "All", dropna: bool = True, - normalize=False, + normalize: Union[bool, Axis] = False, ) -> "DataFrame": """ Compute a simple cross tabulation of two (or more) factors. By default @@ -624,9 +625,10 @@ def crosstab( return table -def _normalize(table, normalize, margins: bool, margins_name="All"): +def _normalize(table, normalize: Union[bool, Axis], margins: bool, margins_name="All"): if not isinstance(normalize, (bool, str)): + # TODO: can NDFrame._get_axis_name be used here instead? axis_subs = {0: "index", 1: "columns"} try: normalize = axis_subs[normalize] @@ -705,7 +707,7 @@ def _normalize(table, normalize, margins: bool, margins_name="All"): return table -def _get_names(arrs, names, prefix: str = "row"): +def _get_names(arrs, names, prefix: str = "row") -> List: if names is None: names = [] for i, arr in enumerate(arrs): diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 66c2f5c9b927f..75fb1c759a5be 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -1,6 +1,8 @@ """ Quantilization functions and related stuff """ +from typing import Callable + import numpy as np from pandas._libs import Timedelta, Timestamp @@ -517,6 +519,7 @@ def _format_labels( """ based on the dtype, return our labels """ closed = "right" if right else "left" + formatter: Callable if is_datetime64tz_dtype(dtype): formatter = lambda x: Timestamp(x, tz=dtype.tz) adjust = lambda x: x - Timedelta("1ns") diff --git a/pandas/core/series.py b/pandas/core/series.py index 66caa4623f9ad..6ba1c0d37b68f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -14,6 +14,7 @@ Optional, Tuple, Type, + Union, ) import warnings @@ -1354,7 +1355,7 @@ def to_string( float_format=None, header=True, index=True, - length=False, + length: Union[bool, str] = False, dtype=False, name=False, max_rows=None, diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 5496eca46b992..3f728e9263a05 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -242,7 +242,7 @@ def lexsort_indexer(keys, orders=None, na_position: str = "last"): def nargsort( items, kind: str = "quicksort", ascending: bool = True, na_position: str = "last" -): +) -> np.ndarray: """ Intended to be a drop-in replacement for np.argsort which handles NaNs. diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 52d9a81489db4..230e45650e61e 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -2,7 +2,19 @@ from functools import wraps import re import textwrap -from typing import TYPE_CHECKING, Any, Callable, Dict, List, Pattern, Type, Union +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + List, + Optional, + Pattern, + Set, + Tuple, + Type, + Union, +) import warnings import numpy as np @@ -41,6 +53,9 @@ if TYPE_CHECKING: from pandas.arrays import StringArray +if TYPE_CHECKING: + from pandas import Series # noqa: F401 + _cpython_optimized_encoders = ( "utf-8", "utf8", @@ -1174,7 +1189,7 @@ def str_extractall(arr, pat, flags=0): return result -def str_get_dummies(arr, sep="|"): +def str_get_dummies(arr, sep: str = "|") -> Tuple: """ Split each string in the Series by sep and return a DataFrame of dummy/indicator variables. @@ -1214,10 +1229,10 @@ def str_get_dummies(arr, sep="|"): except TypeError: arr = sep + arr.astype(str) + sep - tags = set() + tags_: Set[str] = set() for ts in arr.str.split(sep): - tags.update(ts) - tags = sorted(tags - {""}) + tags_.update(ts) + tags = sorted(tags_ - {""}) dummies = np.empty((len(arr), len(tags)), dtype=np.int64) @@ -2180,7 +2195,7 @@ def _wrap_result( returns_string=True, ): - from pandas import Index, Series, MultiIndex + from pandas import Index, Series, MultiIndex # noqa: F811 # for category, we do the stuff on the categories, so blow it up # to the full series again @@ -2201,6 +2216,7 @@ def _wrap_result( # case we'll want to return the same dtype as the input. # Or we can be wrapping a numeric output, in which case we don't want # to return a StringArray. + dtype: Optional[str] if self._is_string and returns_string: dtype = "string" else: @@ -2270,7 +2286,7 @@ def cons_row(x): result = cons(result, name=name, index=index, dtype=dtype) return result - def _get_series_list(self, others): + def _get_series_list(self, others) -> List["Series"]: """ Auxiliary function for :meth:`str.cat`. Turn potentially mixed input into a list of Series (elements without an index must match the length @@ -2286,7 +2302,7 @@ def _get_series_list(self, others): list of Series Others transformed into list of Series. """ - from pandas import Series, DataFrame + from pandas import Series, DataFrame # noqa: F811 # self._orig is either Series or Index idx = self._orig if isinstance(self._orig, ABCIndexClass) else self._orig.index @@ -2313,7 +2329,7 @@ def _get_series_list(self, others): or (isinstance(x, np.ndarray) and x.ndim == 1) for x in others ): - los = [] + los: List[Series] = [] while others: # iterate through list and append each element los = los + self._get_series_list(others.pop(0)) return los @@ -2462,7 +2478,7 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"): For more examples, see :ref:`here `. """ - from pandas import Index, Series, concat + from pandas import Index, Series, concat # noqa: F811 if isinstance(others, str): raise ValueError("Did you mean to supply a `sep` keyword?") diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index d9c8611c94cdb..77afde5b52ab3 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -2,7 +2,7 @@ data hash pandas / numpy objects """ import itertools -from typing import Optional +from typing import Iterator, Optional import numpy as np @@ -116,7 +116,7 @@ def hash_pandas_object( h = Series(h, index=obj.index, dtype="uint64", copy=False) elif isinstance(obj, ABCDataFrame): - hashes = (hash_array(series.values) for _, series in obj.items()) + hashes: Iterator = (hash_array(series.values) for _, series in obj.items()) num_items = len(obj.columns) if index: index_hash_generator = ( @@ -126,7 +126,7 @@ def hash_pandas_object( encoding=encoding, hash_key=hash_key, categorize=categorize, - ).values # noqa + ).values for _ in [None] ) num_items += 1 diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index 05f19de19f9f7..f91bb655227e8 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -48,6 +48,8 @@ class WindowGroupByMixin(GroupByMixin): Provide the groupby facilities. """ + _shallow_copy: Callable + def __init__(self, obj, *args, **kwargs): kwargs.pop("parent", None) groupby = kwargs.pop("groupby", None) @@ -56,7 +58,9 @@ def __init__(self, obj, *args, **kwargs): self._groupby = groupby self._groupby.mutated = True self._groupby.grouper.mutated = True - super().__init__(obj, *args, **kwargs) + # https://github.com/python/mypy/issues/5887 + # error: Too many arguments for "__init__" of "object" + super().__init__(obj, *args, **kwargs) # type: ignore count = _dispatch("count") corr = _dispatch("corr", other=None, pairwise=None) diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 2759280dc1d1c..94259d6bf05c6 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -1,4 +1,5 @@ from textwrap import dedent +from typing import Any, List import numpy as np @@ -212,7 +213,7 @@ def _apply(self, func, **kwargs): block_list = list(blocks) results = [] - exclude = [] + exclude: List[Any] = [] for i, b in enumerate(blocks): try: values = self._prep_values(b.values) diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index 146c139806bca..f0176925f06de 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -129,8 +129,8 @@ def aggregate(self, func, *args, **kwargs): @Substitution(name="expanding") @Appender(_shared_docs["count"]) - def count(self, **kwargs): - return super().count(**kwargs) + def count(self): + return super().count() @Substitution(name="expanding") @Appender(_shared_docs["apply"]) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 3fdf81c4bb570..c78bb5dbd1ac4 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -74,7 +74,7 @@ def __init__( obj, window=None, min_periods: Optional[int] = None, - center: Optional[bool] = False, + center: bool = False, win_type: Optional[str] = None, axis: Axis = 0, on: Optional[Union[str, Index]] = None, diff --git a/pandas/io/common.py b/pandas/io/common.py index 0fce8f5382686..51be57879dcba 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -128,6 +128,7 @@ def stringify_path( """ if hasattr(filepath_or_buffer, "__fspath__"): # https://github.com/python/mypy/issues/1424 + # error: Item "str" of "Union[str, Path, IO[str]]" has no attribute "__fspath__" return filepath_or_buffer.__fspath__() # type: ignore elif isinstance(filepath_or_buffer, pathlib.Path): return str(filepath_or_buffer) @@ -474,6 +475,18 @@ def get_handle( return f, handles +# error: Definition of "__enter__" in base class "ZipFile" is incompatible with +# definition in base class "BytesIO" [misc] +# error: Definition of "__enter__" in base class "ZipFile" is incompatible with +# definition in base class "BinaryIO" [misc] +# error: Definition of "__enter__" in base class "ZipFile" is incompatible with +# definition in base class "IO" [misc] +# error: Definition of "read" in base class "ZipFile" is incompatible with +# definition in base class "BytesIO" [misc] +# error: Definition of "read" in base class "ZipFile" is incompatible with +# definition in base class "IO" [misc] +# error: Definition of "__exit__" in base class "ZipFile" is incompatible with +# definition in base class "BytesIO" [misc] class _BytesZipFile(zipfile.ZipFile, BytesIO): # type: ignore """ Wrapper for standard library class ZipFile and allow the returned file-like @@ -497,7 +510,11 @@ def __init__( super().__init__(file, mode, zipfile.ZIP_DEFLATED, **kwargs) def write(self, data): + # TODO: fixme, self.filename could be None + # error: Argument 1 to "writestr" of "ZipFile" has incompatible type + # "Optional[str]"; expected "Union[str, ZipInfo]" [arg-type] archive_name = self.filename + assert archive_name is not None if self.archive_name is not None: archive_name = self.archive_name super().writestr(archive_name, data) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index d1139f640cef4..f7e15450e20a3 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -3,6 +3,7 @@ from io import BytesIO import os from textwrap import fill +from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Type, Union from pandas._config import config @@ -30,6 +31,9 @@ ) from pandas.io.parsers import TextParser +if TYPE_CHECKING: + from pandas import Series # noqa: F401 + _read_excel_doc = ( """ Read an Excel file into a pandas DataFrame. @@ -428,7 +432,7 @@ def parse( # handle same-type duplicates. sheets = list(dict.fromkeys(sheets).keys()) - output = {} + output: Dict[Union[str, int], Union["Series", DataFrame]] = {} for asheetname in sheets: if verbose: @@ -643,7 +647,7 @@ def __new__(cls, path, engine=None, **kwargs): @property @abc.abstractmethod - def supported_extensions(self): + def supported_extensions(self) -> Tuple[str, ...]: """Extensions that writer engine supports.""" pass @@ -655,7 +659,12 @@ def engine(self): @abc.abstractmethod def write_cells( - self, cells, sheet_name=None, startrow=0, startcol=0, freeze_panes=None + self, + cells, + sheet_name: Optional[str] = None, + startrow: int = 0, + startcol: int = 0, + freeze_panes: Optional[Tuple[int, int]] = None, ): """ Write given formatted cells into Excel an excel sheet @@ -698,7 +707,7 @@ def __init__( self.check_extension(ext) self.path = path - self.sheets = {} + self.sheets: Dict[str, Any] = {} self.cur_sheet = None if date_format is None: @@ -713,9 +722,10 @@ def __init__( self.mode = mode def __fspath__(self): + assert self.path is not None return stringify_path(self.path) - def _get_sheet_name(self, sheet_name): + def _get_sheet_name(self, sheet_name: Optional[str]): if sheet_name is None: sheet_name = self.cur_sheet if sheet_name is None: # pragma: no cover @@ -802,7 +812,9 @@ class ExcelFile: from pandas.io.excel._xlrd import _XlrdReader from pandas.io.excel._pyxlsb import _PyxlsbReader - _engines = { + _engines: Dict[ + str, Type[Union[_XlrdReader, _OpenpyxlReader, _ODFReader, _PyxlsbReader]] + ] = { "xlrd": _XlrdReader, "openpyxl": _OpenpyxlReader, "odf": _ODFReader, diff --git a/pandas/io/excel/_util.py b/pandas/io/excel/_util.py index 7c8e1abb497bc..9c31bf408da45 100644 --- a/pandas/io/excel/_util.py +++ b/pandas/io/excel/_util.py @@ -1,3 +1,5 @@ +from typing import List, Optional, Tuple + from pandas.compat._optional import import_optional_dependency from pandas.core.dtypes.common import is_integer, is_list_like @@ -83,7 +85,7 @@ def _excel2num(x): return index - 1 -def _range2cols(areas): +def _range2cols(areas: str) -> List[int]: """ Convert comma separated list of column names and ranges to indices. @@ -104,12 +106,12 @@ def _range2cols(areas): >>> _range2cols('A,C,Z:AB') [0, 2, 25, 26, 27] """ - cols = [] + cols: List[int] = [] for rng in areas.split(","): if ":" in rng: - rng = rng.split(":") - cols.extend(range(_excel2num(rng[0]), _excel2num(rng[1]) + 1)) + rng_ = rng.split(":") + cols.extend(range(_excel2num(rng_[0]), _excel2num(rng_[1]) + 1)) else: cols.append(_excel2num(rng)) @@ -145,7 +147,7 @@ def _maybe_convert_usecols(usecols): return usecols -def _validate_freeze_panes(freeze_panes): +def _validate_freeze_panes(freeze_panes: Optional[Tuple[int, int]]) -> bool: if freeze_panes is not None: if len(freeze_panes) == 2 and all( isinstance(item, int) for item in freeze_panes diff --git a/pandas/io/excel/_xlsxwriter.py b/pandas/io/excel/_xlsxwriter.py index 85a1bb031f457..6156c27e16e62 100644 --- a/pandas/io/excel/_xlsxwriter.py +++ b/pandas/io/excel/_xlsxwriter.py @@ -1,3 +1,5 @@ +from typing import Dict, List + import pandas._libs.json as json from pandas.io.excel._base import ExcelWriter @@ -8,7 +10,7 @@ class _XlsxStyler: # Map from openpyxl-oriented styles to flatter xlsxwriter representation # Ordering necessary for both determinism and because some are keyed by # prefixes of others. - STYLE_MAPPING = { + STYLE_MAPPING: Dict[str, List] = { "font": [ (("name",), "font_name"), (("sz",), "font_size"), @@ -190,11 +192,13 @@ def save(self): """ Save workbook to disk. """ + assert self.book is not None return self.book.close() def write_cells( self, cells, sheet_name=None, startrow=0, startcol=0, freeze_panes=None ): + assert self.book is not None # Write the frame cells using xlsxwriter. sheet_name = self._get_sheet_name(sheet_name) diff --git a/pandas/io/excel/_xlwt.py b/pandas/io/excel/_xlwt.py index 78efe77e9fe2d..090a52c3026bc 100644 --- a/pandas/io/excel/_xlwt.py +++ b/pandas/io/excel/_xlwt.py @@ -1,3 +1,5 @@ +from typing import Dict, Optional, Tuple + import pandas._libs.json as json from pandas.io.excel._base import ExcelWriter @@ -32,7 +34,12 @@ def save(self): return self.book.save(self.path) def write_cells( - self, cells, sheet_name=None, startrow=0, startcol=0, freeze_panes=None + self, + cells, + sheet_name: Optional[str] = None, + startrow: int = 0, + startcol: int = 0, + freeze_panes: Optional[Tuple[int, int]] = None, ): # Write the frame cells using xlwt. @@ -41,15 +48,18 @@ def write_cells( if sheet_name in self.sheets: wks = self.sheets[sheet_name] else: + assert self.book is not None wks = self.book.add_sheet(sheet_name) self.sheets[sheet_name] = wks if _validate_freeze_panes(freeze_panes): + assert freeze_panes is not None wks.set_panes_frozen(True) - wks.set_horz_split_pos(freeze_panes[0]) - wks.set_vert_split_pos(freeze_panes[1]) + row, column = freeze_panes + wks.set_horz_split_pos(row) + wks.set_vert_split_pos(column) - style_dict = {} + style_dict: Dict = {} for cell in cells: val, fmt = self._value_with_fmt(cell.val) diff --git a/pandas/io/formats/console.py b/pandas/io/formats/console.py index bed29e1fd4792..fe6f5d12e4e1c 100644 --- a/pandas/io/formats/console.py +++ b/pandas/io/formats/console.py @@ -69,7 +69,8 @@ def check_main(): return not hasattr(main, "__file__") or get_option("mode.sim_interactive") try: - return __IPYTHON__ or check_main() # noqa + # error: Name '__IPYTHON__' is not defined + return __IPYTHON__ or check_main() # type: ignore # noqa except NameError: return check_main() @@ -83,7 +84,8 @@ def in_ipython_frontend(): bool """ try: - ip = get_ipython() # noqa + # error: Name 'get_ipython' is not defined + ip = get_ipython() # type: ignore # noqa return "zmq" in str(type(ip)).lower() except NameError: pass diff --git a/pandas/io/formats/css.py b/pandas/io/formats/css.py index b40d2a57b8106..27e4398cf5b73 100644 --- a/pandas/io/formats/css.py +++ b/pandas/io/formats/css.py @@ -3,6 +3,7 @@ """ import re +from typing import Optional import warnings @@ -73,6 +74,8 @@ def __call__(self, declarations_str, inherited=None): ('font-size', '24pt'), ('font-weight', 'bold')] """ + font_size: Optional[float] + props = dict(self.atomize(self.parse(declarations_str))) if inherited is None: inherited = {} @@ -179,7 +182,10 @@ def _error(): return self.size_to_pt("1!!default", conversions=conversions) try: - val, unit = re.match(r"^(\S*?)([a-zA-Z%!].*)", in_val).groups() + # error: Item "None" of "Optional[Match[Any]]" has no attribute "groups" + val, unit = re.match( # type: ignore + r"^(\S*?)([a-zA-Z%!].*)", in_val + ).groups() except AttributeError: return _error() if val == "": diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index aac1df5dcd396..978852d2a7f66 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -5,7 +5,7 @@ from functools import reduce import itertools import re -from typing import Callable, Dict, Optional, Sequence, Union +from typing import Callable, Dict, Iterable, Optional, Sequence, Union import warnings import numpy as np @@ -531,7 +531,7 @@ def _format_header(self): else: gen = self._format_header_regular() - gen2 = () + gen2: Iterable[ExcelCell] = () if self.df.index.names: row = [x if x is not None else "" for x in self.df.index.names] + [ "" @@ -727,7 +727,11 @@ def write( if isinstance(writer, ExcelWriter): need_save = False else: - writer = ExcelWriter(stringify_path(writer), engine=engine) + # TODO: check ExcelWriter implementation for PEP 3119 compliance + # error: Cannot instantiate abstract class 'ExcelWriter' with abstract + # attributes 'engine', 'save', 'supported_extensions' and 'write_cells' + # [abstract] + writer = ExcelWriter(stringify_path(writer), engine=engine) # type: ignore need_save = True formatted_cells = self.get_formatted_cells() diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 59542a8da535e..acc580a1e48d4 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1199,11 +1199,10 @@ def _format_strings(self) -> List[str]: else: float_format = self.float_format - formatter = ( - self.formatter - if self.formatter is not None - else (lambda x: pprint_thing(x, escape_chars=("\t", "\r", "\n"))) - ) + if self.formatter is None: + formatter = lambda x: pprint_thing(x, escape_chars=("\t", "\r", "\n")) + else: + formatter = self.formatter def _format(x): if self.na_rep is not None and is_scalar(x) and isna(x): @@ -1287,7 +1286,15 @@ def _value_formatter( if float_format: def base_formatter(v): - return float_format(value=v) if notna(v) else self.na_rep + # error: "str" not callable + # error: Unexpected keyword argument "value" for "__call__" of + # "EngFormatter" + # error: "None" not callable + return ( + float_format(value=v) # type: ignore + if notna(v) + else self.na_rep + ) else: @@ -1716,7 +1723,9 @@ def _make_fixed_width( def just(x): if conf_max is not None: - if (conf_max > 3) & (adj.len(x) > max_len): + # https://github.com/python/mypy/issues/2608 + # error: Item "None" of "Optional[TextAdjustment]" has no attribute "len" + if (conf_max > 3) & (adj.len(x) > max_len): # type: ignore x = x[: max_len - 3] + "..." return x diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index 1be0f977f9b20..efa899e0ef457 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -85,6 +85,7 @@ def _get_columns_formatted_values(self) -> Iterable: return self.columns # https://github.com/python/mypy/issues/1237 + # error: Signature of "is_truncated" incompatible with supertype "TableFormatter" @property def is_truncated(self) -> bool: # type: ignore return self.fmt.is_truncated diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index 36e774305b577..6b16a44ff4dea 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -4,6 +4,7 @@ import sys from typing import ( + TYPE_CHECKING, Any, Callable, Dict, @@ -21,6 +22,9 @@ from pandas.core.dtypes.inference import is_sequence +if TYPE_CHECKING: + from pandas import MultiIndex # noqa: F401 + EscapeChars = Union[Mapping[str, str], Iterable[str]] _KT = TypeVar("_KT") _VT = TypeVar("_VT") @@ -495,11 +499,11 @@ def _justify( # error: Incompatible return value type (got "Tuple[List[Sequence[str]], # List[Sequence[str]]]", expected "Tuple[List[Tuple[str, ...]], # List[Tuple[str, ...]]]") - return head, tail # type: ignore + return head, tail # type: ignore # noqa: F723 def format_object_attrs( - obj: Sequence, include_dtype: bool = True + obj: Union[Sequence, "MultiIndex"], include_dtype: bool = True ) -> List[Tuple[str, Union[str, int]]]: """ Return a list of tuples of the (attr, formatted_value) @@ -525,7 +529,9 @@ def format_object_attrs( # error: "Sequence[Any]" has no attribute "name" attrs.append(("name", default_pprint(obj.name))) # type: ignore # error: "Sequence[Any]" has no attribute "names" - elif getattr(obj, "names", None) is not None and any(obj.names): # type: ignore + elif getattr(obj, "names", None) is not None and any( + obj.names # type: ignore + ): # error: "Sequence[Any]" has no attribute "names" attrs.append(("names", default_pprint(obj.names))) # type: ignore max_seq_items = get_option("display.max_seq_items") or len(obj) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index fecdf3b758f0f..8b0b79ffcb6cb 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -36,7 +36,7 @@ import pandas.core.common as com from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame -from pandas.core.indexing import _maybe_numeric_slice, _non_reducing_slice +from pandas.core.indexing import _IndexSlice, _maybe_numeric_slice, _non_reducing_slice jinja2 = import_optional_dependency("jinja2", extra="DataFrame.style requires jinja2.") @@ -59,6 +59,11 @@ def _mpl(func: Callable): raise ImportError(no_mpl_message.format(func.__name__)) +_ApplyArgs = Tuple[ + Callable[[FrameOrSeries], FrameOrSeries], Axis, Optional[_IndexSlice] +] + + class Styler: """ Helps style a DataFrame or Series according to the data with HTML and CSS. @@ -146,7 +151,7 @@ def __init__( na_rep: Optional[str] = None, ): self.ctx: DefaultDict[Tuple[int, int], List[str]] = defaultdict(list) - self._todo: List[Tuple[Callable, Tuple, Dict]] = [] + self._todo: List[Tuple[Callable, _ApplyArgs, Dict]] = [] if not isinstance(data, (pd.Series, pd.DataFrame)): raise TypeError("``data`` must be a Series or DataFrame") @@ -167,7 +172,7 @@ def __init__( self.precision = precision self.table_attributes = table_attributes self.hidden_index = False - self.hidden_columns: Sequence[int] = [] + self.hidden_columns: List[int] = [] self.cell_ids = cell_ids self.na_rep = na_rep @@ -262,7 +267,7 @@ def format_attr(pair): idx_lengths = _get_level_lengths(self.index) col_lengths = _get_level_lengths(self.columns, hidden_columns) - cell_context = dict() + cell_context: Dict[str, Dict] = dict() n_rlvls = self.data.index.nlevels n_clvls = self.data.columns.nlevels @@ -1410,7 +1415,7 @@ def _highlight_extrema( ) @classmethod - def from_custom_template(cls, searchpath, name): + def from_custom_template(cls, searchpath, name: str): """ Factory function for creating a subclass of ``Styler``. @@ -1430,7 +1435,10 @@ def from_custom_template(cls, searchpath, name): """ loader = jinja2.ChoiceLoader([jinja2.FileSystemLoader(searchpath), cls.loader]) - class MyStyler(cls): + # https://github.com/python/mypy/issues/2477 + # error: Variable "cls" is not valid as a type [valid-type] + # error: Invalid base class "cls" + class MyStyler(cls): # type: ignore env = jinja2.Environment(loader=loader) template = env.get_template(name) diff --git a/pandas/io/html.py b/pandas/io/html.py index c4ffe332e3020..c13e0b18c152b 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -8,6 +8,7 @@ import numbers import os import re +from typing import Iterable, List, Set, Tuple, TypeVar from pandas.compat._optional import import_optional_dependency from pandas.errors import AbstractMethodError, EmptyDataError @@ -26,6 +27,8 @@ _HAS_LXML = False _HAS_HTML5LIB = False +_T = TypeVar("_T") + def _importers(): # import things we need @@ -436,7 +439,7 @@ def _expand_colspan_rowspan(self, rows): to subsequent cells. """ all_texts = [] # list of rows, each a list of str - remainder = [] # list of (index, text, nrows) + remainder: List[Tuple[int, str, int]] = [] # list of (index, text, nrows) for tr in rows: texts = [] # the output for this row @@ -488,7 +491,9 @@ def _expand_colspan_rowspan(self, rows): return all_texts - def _handle_hidden_tables(self, tbl_list, attr_name): + def _handle_hidden_tables( + self, tbl_list: Iterable[_T], attr_name: str + ) -> Iterable[_T]: """ Return list of tables, potentially removing hidden elements @@ -544,7 +549,7 @@ def _parse_tables(self, doc, match, attrs): raise ValueError("No tables found") result = [] - unique_tables = set() + unique_tables: Set[Iterable] = set() tables = self._handle_hidden_tables(tables, "attrs") for table in tables: @@ -737,7 +742,7 @@ def _build_doc(self): return r def _parse_thead_tr(self, table): - rows = [] + rows: List = [] for thead in table.xpath(".//thead"): rows.extend(thead.xpath("./tr")) @@ -911,7 +916,9 @@ def _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs): else: break else: - raise retained + # https://github.com/pandas-dev/pandas/commit/a38a004629f8a2d4da9392133e3e1162261b1e3f#r35680919 + # error: Exception must be derived from BaseException [misc] + raise retained # type: ignore ret = [] for table in tables: diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 20724a498b397..c01b017754ad3 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -115,6 +115,7 @@ def __init__( self.obj = obj if orient is None: + # error: "Writer" has no attribute "_default_orient" orient = self._default_orient # type: ignore self.orient = orient diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index 6061af72901a5..01aa60dc2c2cd 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -3,10 +3,11 @@ https://specs.frictionlessdata.io/json-table-schema/ """ +from typing import TYPE_CHECKING, Any, Dict, Optional, Union, cast import warnings import pandas._libs.json as json -from pandas._typing import DtypeObj +from pandas._typing import DtypeObj, JSONSerializable from pandas.core.dtypes.common import ( is_bool_dtype, @@ -20,10 +21,15 @@ is_timedelta64_dtype, ) from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.core.dtypes.generic import ABCMultiIndex from pandas import DataFrame import pandas.core.common as com +if TYPE_CHECKING: + from pandas import Series # noqa: F401 + from pandas.core.indexes.multi import MultiIndex # noqa: F401 + loads = json.loads @@ -103,7 +109,10 @@ def convert_pandas_type_to_json_field(arr): name = "values" else: name = arr.name - field = {"name": name, "type": as_json_table_type(dtype)} + field: Dict[str, JSONSerializable] = { + "name": name, + "type": as_json_table_type(dtype), + } if is_categorical_dtype(arr): if hasattr(arr, "categories"): @@ -188,7 +197,12 @@ def convert_json_field_to_pandas_type(field): raise ValueError(f"Unsupported or invalid field type: {typ}") -def build_table_schema(data, index=True, primary_key=None, version=True): +def build_table_schema( + data: Union["Series", DataFrame], + index: bool = True, + primary_key: Optional[bool] = None, + version: bool = True, +) -> Dict[str, Any]: """ Create a Table schema from ``data``. @@ -237,12 +251,13 @@ def build_table_schema(data, index=True, primary_key=None, version=True): if index is True: data = set_default_names(data) - schema = {} + schema: Dict[str, Any] = {} fields = [] if index: - if data.index.nlevels > 1: - for level, name in zip(data.index.levels, data.index.names): + if isinstance(data.index, ABCMultiIndex): + _index = cast("MultiIndex", data.index) + for level, name in zip(_index.levels, _index.names): new_field = convert_pandas_type_to_json_field(level) new_field["name"] = name fields.append(new_field) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index a22251b29da54..3dd87ae6ed758 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1916,9 +1916,7 @@ def is_indexed(self) -> bool: if not hasattr(self.table, "cols"): # e.g. if infer hasn't been called yet, self.table will be None. return False - # GH#29692 mypy doesn't recognize self.table as having a "cols" attribute - # 'error: "None" has no attribute "cols"' - return getattr(self.table.cols, self.cname).is_indexed # type: ignore + return getattr(self.table.cols, self.cname).is_indexed def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str): """ diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index d47dd2c71b86f..e3821856191e3 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -16,6 +16,7 @@ from collections import abc from datetime import datetime import struct +from typing import Any, List import numpy as np @@ -29,11 +30,19 @@ class _subheader_pointer: - pass + offset: Any + length: Any + compression: Any + ptype: Any class _column: - pass + col_id: Any + name: Any + label: Any + format: Any + ctype: Any + length: Any # SAS7BDAT represents a SAS data file in SAS7BDAT format. @@ -87,17 +96,17 @@ def __init__( self.convert_header_text = convert_header_text self.default_encoding = "latin-1" - self.compression = "" - self.column_names_strings = [] - self.column_names = [] - self.column_formats = [] - self.columns = [] + self.compression = b"" + self.column_names_strings: List = [] + self.column_names: List = [] + self.column_formats: List = [] + self.columns: List = [] - self._current_page_data_subheader_pointers = [] + self._current_page_data_subheader_pointers: List = [] self._cached_page = None - self._column_data_lengths = [] - self._column_data_offsets = [] - self._column_types = [] + self._column_data_lengths: List = [] + self._column_data_offsets: List = [] + self._column_types: List = [] self._current_row_in_file_index = 0 self._current_row_on_page_index = 0 @@ -362,7 +371,7 @@ def _get_subheader_index(self, signature, compression, ptype): if index is None: f1 = (compression == const.compressed_subheader_id) or (compression == 0) f2 = ptype == const.compressed_subheader_type - if (self.compression != "") and f1 and f2: + if (self.compression != b"") and f1 and f2: index = const.SASIndex.data_subheader_index else: self.close() @@ -481,7 +490,7 @@ def _process_columntext_subheader(self, offset, length): self.column_names_strings.append(cname) if len(self.column_names_strings) == 1: - compression_literal = "" + compression_literal = b"" for cl in const.compression_literals: if cl in cname_raw: compression_literal = cl @@ -494,7 +503,7 @@ def _process_columntext_subheader(self, offset, length): buf = self._read_bytes(offset1, self._lcp) compression_literal = buf.rstrip(b"\x00") - if compression_literal == "": + if compression_literal == b"": self._lcs = 0 offset1 = offset + 32 if self.U64: diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py index 27d56d4ede403..4c8658ff224d1 100644 --- a/pandas/io/sas/sasreader.py +++ b/pandas/io/sas/sasreader.py @@ -1,16 +1,24 @@ """ Read SAS sas7bdat or xport files. """ +from typing import TYPE_CHECKING, AnyStr, Optional, Union + +from pandas._typing import FilePathOrBuffer + from pandas.io.common import stringify_path +if TYPE_CHECKING: + from pandas.io.sas.sas_xport import XportReader # noqa: F401 + from pandas.io.sas.sas7bdat import SAS7BDATReader # noqa: F401 + def read_sas( - filepath_or_buffer, - format=None, + filepath_or_buffer: FilePathOrBuffer[AnyStr], + format: Optional[str] = None, index=None, - encoding=None, - chunksize=None, - iterator=False, + encoding: Optional[str] = None, + chunksize: Optional[int] = None, + iterator: bool = False, ): """ Read SAS files stored as either XPORT or SAS7BDAT format files. @@ -62,14 +70,15 @@ def read_sas( else: raise ValueError("unable to infer format of SAS file") + reader: Union["XportReader", "SAS7BDATReader"] if format.lower() == "xport": - from pandas.io.sas.sas_xport import XportReader + from pandas.io.sas.sas_xport import XportReader # noqa: F811 reader = XportReader( filepath_or_buffer, index=index, encoding=encoding, chunksize=chunksize ) elif format.lower() == "sas7bdat": - from pandas.io.sas.sas7bdat import SAS7BDATReader + from pandas.io.sas.sas7bdat import SAS7BDATReader # noqa: F811 reader = SAS7BDATReader( filepath_or_buffer, index=index, encoding=encoding, chunksize=chunksize diff --git a/pandas/plotting/_matplotlib/converter.py b/pandas/plotting/_matplotlib/converter.py index 8260684c02ea6..8f3c9d6fdb467 100644 --- a/pandas/plotting/_matplotlib/converter.py +++ b/pandas/plotting/_matplotlib/converter.py @@ -2,6 +2,7 @@ import datetime as pydt from datetime import datetime, timedelta import functools +from typing import List from dateutil.relativedelta import relativedelta import matplotlib.dates as dates @@ -529,7 +530,7 @@ def _get_default_annual_spacing(nyears): return (min_spacing, maj_spacing) -def period_break(dates, period): +def period_break(dates: PeriodIndex, period): """ Returns the indices where the given period changes. @@ -1052,7 +1053,7 @@ def __init__(self, freq, minor_locator=False, dynamic_mode=True, plot_obj=None): freq = get_freq(freq) self.format = None self.freq = freq - self.locs = [] + self.locs: List[float] = [] self.formatdict = None self.isminor = minor_locator self.isdynamic = dynamic_mode diff --git a/pandas/plotting/_matplotlib/misc.py b/pandas/plotting/_matplotlib/misc.py index 7319e8de3ec6e..0cfb1a0f75d0b 100644 --- a/pandas/plotting/_matplotlib/misc.py +++ b/pandas/plotting/_matplotlib/misc.py @@ -1,4 +1,5 @@ import random +from typing import Dict, Set import matplotlib.lines as mlines import matplotlib.patches as patches @@ -130,7 +131,7 @@ def normalize(series): if ax is None: ax = plt.gca(xlim=[-1, 1], ylim=[-1, 1]) - to_plot = {} + to_plot: Dict = {} colors = _get_standard_colors( num_colors=len(classes), colormap=colormap, color_type="random", color=color ) @@ -231,7 +232,7 @@ def f(t): classes = frame[class_column].drop_duplicates() df = frame.drop(class_column, axis=1) t = np.linspace(-np.pi, np.pi, samples) - used_legends = set() + used_legends: Set = set() color_values = _get_standard_colors( num_colors=len(classes), colormap=colormap, color_type="random", color=color @@ -332,7 +333,7 @@ def parallel_coordinates( else: df = frame[cols] - used_legends = set() + used_legends: Set = set() ncols = len(df.columns) diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 0811f2f822198..59b346de0c390 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -1287,7 +1287,7 @@ def test_register_writer(self): class DummyClass(ExcelWriter): called_save = False called_write_cells = False - supported_extensions = ["xlsx", "xls"] + supported_extensions = ("xlsx", "xls") engine = "dummy" def save(self): diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index 304033f82c7a2..4fc506554d0b3 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -1438,7 +1438,10 @@ def test_period_immutable(): @pytest.mark.xfail( - StrictVersion(dateutil.__version__.split(".dev")[0]) < StrictVersion("2.7.0"), + # https://github.com/python/typeshed/tree/master/third_party/2and3/dateutil + # error: Module has no attribute "__version__" [attr-defined] + StrictVersion(dateutil.__version__.split(".dev")[0]) # type: ignore + < StrictVersion("2.7.0"), reason="Bug in dateutil < 2.7.0 when parsing old dates: Period('0001-01-07', 'D')", strict=False, ) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index d2049892705ea..4dca79b3b9257 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -290,19 +290,45 @@ def test_to_datetime_format_weeks(self, cache): [ "%Y-%m-%d %H:%M:%S%z", ["2010-01-01 12:00:00+0100"] * 2, - [pd.Timestamp("2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(60))] * 2, + [ + pd.Timestamp( + "2010-01-01 12:00:00", + # https://github.com/python/typeshed/pull/3393 + # error: Module has no attribute "FixedOffset" + tzinfo=pytz.FixedOffset(60), # type:ignore + ) + ] + * 2, ], [ "%Y-%m-%d %H:%M:%S %z", ["2010-01-01 12:00:00 +0100"] * 2, - [pd.Timestamp("2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(60))] * 2, + [ + pd.Timestamp( + "2010-01-01 12:00:00", + # https://github.com/python/typeshed/pull/3393 + # error: Module has no attribute "FixedOffset" + tzinfo=pytz.FixedOffset(60), # type:ignore + ) + ] + * 2, ], [ "%Y-%m-%d %H:%M:%S %z", ["2010-01-01 12:00:00 +0100", "2010-01-01 12:00:00 -0100"], [ - pd.Timestamp("2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(60)), - pd.Timestamp("2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(-60)), + pd.Timestamp( + "2010-01-01 12:00:00", + # https://github.com/python/typeshed/pull/3393 + # error: Module has no attribute "FixedOffset" + tzinfo=pytz.FixedOffset(60), # type:ignore + ), + pd.Timestamp( + "2010-01-01 12:00:00", + # https://github.com/python/typeshed/pull/3393 + # error: Module has no attribute "FixedOffset" + tzinfo=pytz.FixedOffset(-60), # type:ignore + ), ], ], [ @@ -310,9 +336,17 @@ def test_to_datetime_format_weeks(self, cache): ["2010-01-01 12:00:00 Z", "2010-01-01 12:00:00 Z"], [ pd.Timestamp( - "2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(0) + "2010-01-01 12:00:00", + # https://github.com/python/typeshed/pull/3393 + # error: Module has no attribute "FixedOffset" + tzinfo=pytz.FixedOffset(0), # type:ignore ), # pytz coerces to UTC - pd.Timestamp("2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(0)), + pd.Timestamp( + "2010-01-01 12:00:00", + # https://github.com/python/typeshed/pull/3393 + # error: Module has no attribute "FixedOffset" + tzinfo=pytz.FixedOffset(0), # type:ignore + ), ], ], ], @@ -2164,12 +2198,16 @@ def test_parsers_timestring(self, cache): [ ( "2013-01-01 05:45+0545", - pytz.FixedOffset(345), + # https://github.com/python/typeshed/pull/3393 + # error: Module has no attribute "FixedOffset" + pytz.FixedOffset(345), # type:ignore "Timestamp('2013-01-01 05:45:00+0545', tz='pytz.FixedOffset(345)')", ), ( "2013-01-01 05:30+0530", - pytz.FixedOffset(330), + # https://github.com/python/typeshed/pull/3393 + # error: Module has no attribute "FixedOffset" + pytz.FixedOffset(330), # type:ignore "Timestamp('2013-01-01 05:30:00+0530', tz='pytz.FixedOffset(330)')", ), ], diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py index 8ab37f787bd10..c2c08be7fce47 100644 --- a/pandas/tseries/holiday.py +++ b/pandas/tseries/holiday.py @@ -1,5 +1,7 @@ +# mypy: ignore_errors + from datetime import datetime, timedelta -from typing import List +from typing import Iterable, List, Optional, Union, cast import warnings from dateutil.relativedelta import FR, MO, SA, SU, TH, TU, WE # noqa @@ -11,6 +13,8 @@ from pandas.tseries.offsets import Day, Easter +_DatetimeLike = Union[datetime, Timestamp, str, float] + def next_monday(dt): """ @@ -217,7 +221,12 @@ def __repr__(self) -> str: repr = f"Holiday: {self.name} ({info})" return repr - def dates(self, start_date, end_date, return_name=False): + def dates( + self, + start_date: Optional[_DatetimeLike], + end_date: Optional[_DatetimeLike], + return_name: bool = False, + ) -> Union[DatetimeIndex, List[Timestamp], Series]: """ Calculate holidays observed between start date and end date @@ -257,6 +266,8 @@ def dates(self, start_date, end_date, return_name=False): filter_end_date = min( self.end_date.tz_localize(filter_end_date.tz), filter_end_date ) + # TODO: comparison ops are created dynamically + # error: Unsupported left operand type for >= ("DatetimeIndex") [operator] holiday_dates = holiday_dates[ (holiday_dates >= filter_start_date) & (holiday_dates <= filter_end_date) ] @@ -264,7 +275,9 @@ def dates(self, start_date, end_date, return_name=False): return Series(self.name, index=holiday_dates) return holiday_dates - def _reference_dates(self, start_date, end_date): + def _reference_dates( + self, start_date: Timestamp, end_date: Timestamp + ) -> DatetimeIndex: """ Get reference dates for the holiday. @@ -297,7 +310,7 @@ def _reference_dates(self, start_date, end_date): return dates - def _apply_rule(self, dates): + def _apply_rule(self, dates: DatetimeIndex) -> DatetimeIndex: """ Apply the given offset/observance to a DatetimeIndex of dates. @@ -363,12 +376,14 @@ class AbstractHolidayCalendar(metaclass=HolidayCalendarMetaClass): Abstract interface to create holidays following certain rules. """ - rules: List[Holiday] = [] + rules: Iterable[Holiday] = [] start_date = Timestamp(datetime(1970, 1, 1)) end_date = Timestamp(datetime(2200, 12, 31)) _cache = None - def __init__(self, name=None, rules=None): + def __init__( + self, name: Optional[str] = None, rules: Optional[Iterable[Holiday]] = None + ): """ Initializes holiday object with a given set a rules. Normally classes just have the rules defined within them. @@ -395,7 +410,12 @@ def rule_from_name(self, name): return None - def holidays(self, start=None, end=None, return_name=False): + def holidays( + self, + start: Optional[_DatetimeLike] = None, + end: Optional[_DatetimeLike] = None, + return_name: bool = False, + ) -> Union[DatetimeIndex, Series]: """ Returns a curve with holidays between start_date and end_date @@ -434,15 +454,18 @@ def holidays(self, start=None, end=None, return_name=False): else: holidays = Series(index=DatetimeIndex([]), dtype=object) - self._cache = (start, end, holidays.sort_index()) + # TODO: overload rule.dates(return_name=True) to return Series + self._cache = (start, end, cast(Series, holidays).sort_index()) - holidays = self._cache[2] - holidays = holidays[start:end] + # TODO: concreate subclass must have rules + holidays = cast(Series, self._cache[2]) + # TODO: slice indexing on a Series should return Series + holidays = cast(Series, holidays[start:end]) if return_name: return holidays else: - return holidays.index + return cast(DatetimeIndex, holidays.index) @staticmethod def merge_class(base, other): diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index bc20d784c8dee..f6c0ad6507547 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -1,7 +1,7 @@ from datetime import date, datetime, timedelta import functools import operator -from typing import Any, Optional +from typing import Any, Optional, Tuple import warnings from dateutil.easter import easter @@ -248,6 +248,8 @@ def __add__(date): Timestamp('2017-03-01 09:10:11') """ + # https://github.com/python/mypy/issues/6185 + # error: "Callable[[_BaseOffset], Tuple[Any, ...]]" has no attribute "fget" _params = cache_readonly(BaseOffset._params.fget) _use_relativedelta = False _adjust_dst = False @@ -499,6 +501,10 @@ class _CustomMixin: and weekdays attributes. """ + weekmask: str + holidays: Tuple[np.datetime64, ...] + calendar: np.busdaycalendar + def __init__(self, weekmask, holidays, calendar): calendar, holidays = _get_calendar( weekmask=weekmask, holidays=holidays, calendar=calendar @@ -1123,6 +1129,9 @@ class MonthOffset(SingleConstructorOffset): _adjust_dst = True _attributes = frozenset(["n", "normalize"]) + # error: Incompatible types in assignment (expression has type + # "Callable[[_BaseOffset, Any, Any], Any]", base class "object" defined the type + # as "Callable[[object], None]") [assignment] __init__ = BaseOffset.__init__ @property @@ -2468,6 +2477,9 @@ class Easter(DateOffset): _adjust_dst = True _attributes = frozenset(["n", "normalize"]) + # error: Incompatible types in assignment (expression has type + # "Callable[[_BaseOffset, Any, Any], Any]", base class "object" defined the type + # as "Callable[[object], None]") [assignment] __init__ = BaseOffset.__init__ @apply_wraps diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index 71d02db10c7ba..d603b983da552 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -100,7 +100,7 @@ def deprecate_kwarg( new_arg_name: Optional[str], mapping: Optional[Union[Mapping[Any, Any], Callable[[Any], Any]]] = None, stacklevel: int = 2, -) -> Callable[..., Any]: +) -> Callable[[F], F]: """ Decorator to deprecate a keyword argument of a function. diff --git a/setup.cfg b/setup.cfg index fda4ba4065e2f..a5a4590ea37e4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -22,7 +22,13 @@ ignore = C406, # Unnecessary list literal - rewrite as a dict literal. C408, # Unnecessary dict call - rewrite as a literal. C409, # Unnecessary list passed to tuple() - rewrite as a tuple literal. - S001 # found modulo formatter (incorrect picks up mod operations) + S001, # found modulo formatter (incorrect picks up mod operations) + # pydocstyle + D100, D101, D102, D103, D104, D105, D106, D107, + D200, D202, D203, D204, D205, D207, D209, D212, D213, + D301, + D400, D401, D402, D403, D413, + exclude = doc/sphinxext/*.py, doc/build/*.py, @@ -122,10 +128,42 @@ skip_glob = env, skip = pandas/__init__.py,pandas/core/api.py [mypy] -ignore_missing_imports=True -no_implicit_optional=True -check_untyped_defs=True -strict_equality=True +platform = linux-64 +python_version = 3.6 +ignore_missing_imports = True +show_column_numbers = True +strict_equality = True +show_error_codes = True +# --strict settings +# Strict mode is not supported in configuration files: specify individual flags +# instead (see 'mypy -h' for the list of flags enabled in strict mode) +warn_unused_configs = True +# disallow_subclassing_any = True +# disallow_any_generics = True +# disallow_untyped_calls = True +# disallow_untyped_defs = True +# disallow_incomplete_defs = True +check_untyped_defs = True +# disallow_untyped_decorators = True +no_implicit_optional = True +warn_redundant_casts = True +# warn_unused_ignores = True +# warn_return_any = True +# no_implicit_reexport = True + +files = + pandas/_config, + pandas/_libs, + pandas/api, + pandas/arrays, + pandas/compat, + pandas/core, + pandas/errors, + pandas/io, + pandas/plotting, + pandas/tseries, + pandas/util, + pandas/*.py [mypy-pandas.tests.*] check_untyped_defs=False @@ -133,13 +171,13 @@ check_untyped_defs=False [mypy-pandas.conftest] ignore_errors=True -[mypy-pandas.tests.arithmetic.test_datetime64] +[mypy-pandas.tests.indexes.datetimes.test_tools] ignore_errors=True -[mypy-pandas.tests.tools.test_to_datetime] +[mypy-pandas.tests.scalar.period.test_period] ignore_errors=True -[mypy-pandas.tests.scalar.period.test_period] +[mypy-pandas.io.formats.style] ignore_errors=True [mypy-pandas._testing] @@ -148,135 +186,36 @@ check_untyped_defs=False [mypy-pandas._version] check_untyped_defs=False -[mypy-pandas.core.arrays.interval] -check_untyped_defs=False - -[mypy-pandas.core.base] -check_untyped_defs=False - [mypy-pandas.core.computation.expr] check_untyped_defs=False -[mypy-pandas.core.computation.expressions] -check_untyped_defs=False - -[mypy-pandas.core.computation.pytables] -check_untyped_defs=False - -[mypy-pandas.core.computation.scope] -check_untyped_defs=False - -[mypy-pandas.core.dtypes.cast] -check_untyped_defs=False - -[mypy-pandas.core.frame] -check_untyped_defs=False - -[mypy-pandas.core.generic] -check_untyped_defs=False - -[mypy-pandas.core.groupby.generic] -check_untyped_defs=False - [mypy-pandas.core.groupby.grouper] check_untyped_defs=False -[mypy-pandas.core.groupby.ops] -check_untyped_defs=False - -[mypy-pandas.core.indexes.base] -check_untyped_defs=False - -[mypy-pandas.core.indexes.datetimes] -check_untyped_defs=False - -[mypy-pandas.core.indexes.interval] -check_untyped_defs=False - -[mypy-pandas.core.indexes.multi] -check_untyped_defs=False - [mypy-pandas.core.internals.blocks] check_untyped_defs=False -[mypy-pandas.core.internals.concat] -check_untyped_defs=False - [mypy-pandas.core.internals.construction] check_untyped_defs=False [mypy-pandas.core.internals.managers] check_untyped_defs=False -[mypy-pandas.core.missing] -check_untyped_defs=False - -[mypy-pandas.core.nanops] -check_untyped_defs=False - -[mypy-pandas.core.ops.docstrings] -check_untyped_defs=False - [mypy-pandas.core.resample] check_untyped_defs=False -[mypy-pandas.core.reshape.merge] -check_untyped_defs=False - -[mypy-pandas.core.strings] -check_untyped_defs=False - -[mypy-pandas.core.window.common] -check_untyped_defs=False - -[mypy-pandas.core.window.ewm] -check_untyped_defs=False - -[mypy-pandas.core.window.expanding] -check_untyped_defs=False - [mypy-pandas.core.window.rolling] check_untyped_defs=False [mypy-pandas.io.clipboard] check_untyped_defs=False -[mypy-pandas.io.excel._base] -check_untyped_defs=False - [mypy-pandas.io.excel._openpyxl] check_untyped_defs=False -[mypy-pandas.io.excel._util] -check_untyped_defs=False - -[mypy-pandas.io.excel._xlwt] -check_untyped_defs=False - -[mypy-pandas.io.formats.console] -check_untyped_defs=False - -[mypy-pandas.io.formats.css] -check_untyped_defs=False - -[mypy-pandas.io.formats.excel] -check_untyped_defs=False - -[mypy-pandas.io.formats.format] -check_untyped_defs=False - -[mypy-pandas.io.formats.style] -check_untyped_defs=False - -[mypy-pandas.io.html] -check_untyped_defs=False - [mypy-pandas.io.json._json] check_untyped_defs=False -[mypy-pandas.io.json._table_schema] -check_untyped_defs=False - [mypy-pandas.io.parsers] check_untyped_defs=False @@ -286,26 +225,8 @@ check_untyped_defs=False [mypy-pandas.io.sas.sas_xport] check_untyped_defs=False -[mypy-pandas.io.sas.sas7bdat] -check_untyped_defs=False - -[mypy-pandas.io.sas.sasreader] -check_untyped_defs=False - [mypy-pandas.io.stata] check_untyped_defs=False -[mypy-pandas.plotting._matplotlib.converter] -check_untyped_defs=False - [mypy-pandas.plotting._matplotlib.core] check_untyped_defs=False - -[mypy-pandas.plotting._matplotlib.misc] -check_untyped_defs=False - -[mypy-pandas.tseries.holiday] -check_untyped_defs=False - -[mypy-pandas.tseries.offsets] -check_untyped_defs=False