diff --git a/pandas/core/base.py b/pandas/core/base.py index 10b83116dee58..6af537dcd149a 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -4,12 +4,22 @@ import builtins import textwrap -from typing import Any, Callable, Dict, FrozenSet, Optional, TypeVar, Union +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + FrozenSet, + Optional, + TypeVar, + Union, + cast, +) import numpy as np import pandas._libs.lib as lib -from pandas._typing import IndexLabel +from pandas._typing import DtypeObj, IndexLabel from pandas.compat import PYPY from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError @@ -33,6 +43,9 @@ from pandas.core.construction import create_series_with_explicit_dtype import pandas.core.nanops as nanops +if TYPE_CHECKING: + from pandas import Categorical + _shared_docs: Dict[str, str] = dict() _indexops_doc_kwargs = dict( klass="IndexOpsMixin", @@ -238,7 +251,7 @@ def _gotitem(self, key, ndim: int, subset=None): Parameters ---------- key : str / list of selections - ndim : 1,2 + ndim : {1, 2} requested ndim of result subset : object, default None subset to act on @@ -305,6 +318,11 @@ class IndexOpsMixin(OpsMixin): ["tolist"] # tolist is not deprecated, just suppressed in the __dir__ ) + @property + def dtype(self) -> DtypeObj: + # must be defined here as a property for mypy + raise AbstractMethodError(self) + @property def _values(self) -> Union[ExtensionArray, np.ndarray]: # must be defined here as a property for mypy @@ -832,6 +850,7 @@ def _map_values(self, mapper, na_action=None): if is_categorical_dtype(self.dtype): # use the built in categorical series mapper which saves # time by mapping the categories instead of all values + self = cast("Categorical", self) return self._values.map(mapper) values = self._values diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 8038bc6bf1c72..a5e8edca80873 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -164,6 +164,7 @@ class CategoricalIndex(ExtensionIndex, accessor.PandasDelegate): codes: np.ndarray categories: Index _data: Categorical + _values: Categorical @property def _engine_type(self): diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 604b7e12ec243..3461652f4ea24 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -785,14 +785,19 @@ def _value_with_fmt(self, val): return val, fmt @classmethod - def check_extension(cls, ext): + def check_extension(cls, ext: str): """ checks that path's extension against the Writer's supported extensions. If it isn't supported, raises UnsupportedFiletypeError. """ if ext.startswith("."): ext = ext[1:] - if not any(ext in extension for extension in cls.supported_extensions): + # error: "Callable[[ExcelWriter], Any]" has no attribute "__iter__" + # (not iterable) [attr-defined] + if not any( + ext in extension + for extension in cls.supported_extensions # type: ignore[attr-defined] + ): raise ValueError(f"Invalid extension for engine '{cls.engine}': '{ext}'") else: return True diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 3e4780ec21378..2977a78f02785 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1835,9 +1835,11 @@ def _make_fixed_width( return strings if adj is None: - adj = get_adjustment() + adjustment = get_adjustment() + else: + adjustment = adj - max_len = max(adj.len(x) for x in strings) + max_len = max(adjustment.len(x) for x in strings) if minimum is not None: max_len = max(minimum, max_len) @@ -1846,14 +1848,14 @@ def _make_fixed_width( if conf_max is not None and max_len > conf_max: max_len = conf_max - def just(x): + def just(x: str) -> str: if conf_max is not None: - if (conf_max > 3) & (adj.len(x) > max_len): + if (conf_max > 3) & (adjustment.len(x) > max_len): x = x[: max_len - 3] + "..." return x strings = [just(x) for x in strings] - result = adj.justify(strings, max_len, mode=justify) + result = adjustment.justify(strings, max_len, mode=justify) return result diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 63c3f9899d915..1184b0436b93d 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1661,7 +1661,7 @@ def _get_name(icol): return index - def _agg_index(self, index, try_parse_dates=True): + def _agg_index(self, index, try_parse_dates=True) -> Index: arrays = [] for i, arr in enumerate(index): diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 2903ede1d5c0b..5160773455067 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -565,6 +565,7 @@ def __fspath__(self): def root(self): """ return the root node """ self._check_if_open() + assert self._handle is not None # for mypy return self._handle.root @property @@ -1393,6 +1394,8 @@ def groups(self): """ _tables() self._check_if_open() + assert self._handle is not None # for mypy + assert _table_mod is not None # for mypy return [ g for g in self._handle.walk_groups() @@ -1437,6 +1440,9 @@ def walk(self, where="/"): """ _tables() self._check_if_open() + assert self._handle is not None # for mypy + assert _table_mod is not None # for mypy + for g in self._handle.walk_groups(where): if getattr(g._v_attrs, "pandas_type", None) is not None: continue @@ -1862,6 +1868,8 @@ def __init__( def __iter__(self): # iterate current = self.start + if self.coordinates is None: + raise ValueError("Cannot iterate until get_result is called.") while current < self.stop: stop = min(current + self.chunksize, self.stop) value = self.func(None, None, self.coordinates[current:stop]) @@ -3196,7 +3204,7 @@ class Table(Fixed): pandas_kind = "wide_table" format_type: str = "table" # GH#30962 needed by dask table_type: str - levels = 1 + levels: Union[int, List[Label]] = 1 is_table = True index_axes: List[IndexCol] @@ -3292,7 +3300,9 @@ def is_multi_index(self) -> bool: """the levels attribute is 1 or a list in the case of a multi-index""" return isinstance(self.levels, list) - def validate_multiindex(self, obj): + def validate_multiindex( + self, obj: FrameOrSeriesUnion + ) -> Tuple[DataFrame, List[Label]]: """ validate that we can store the multi-index; reset and return the new object @@ -3301,11 +3311,13 @@ def validate_multiindex(self, obj): l if l is not None else f"level_{i}" for i, l in enumerate(obj.index.names) ] try: - return obj.reset_index(), levels + reset_obj = obj.reset_index() except ValueError as err: raise ValueError( "duplicate names/columns in the multi-index when storing as a table" ) from err + assert isinstance(reset_obj, DataFrame) # for mypy + return reset_obj, levels @property def nrows_expected(self) -> int: @@ -3433,7 +3445,7 @@ def get_attrs(self): self.nan_rep = getattr(self.attrs, "nan_rep", None) self.encoding = _ensure_encoding(getattr(self.attrs, "encoding", None)) self.errors = _ensure_decoded(getattr(self.attrs, "errors", "strict")) - self.levels = getattr(self.attrs, "levels", None) or [] + self.levels: List[Label] = getattr(self.attrs, "levels", None) or [] self.index_axes = [a for a in self.indexables if a.is_an_indexable] self.values_axes = [a for a in self.indexables if not a.is_an_indexable] @@ -4562,11 +4574,12 @@ class AppendableMultiSeriesTable(AppendableSeriesTable): def write(self, obj, **kwargs): """ we are going to write this as a frame table """ name = obj.name or "values" - obj, self.levels = self.validate_multiindex(obj) + newobj, self.levels = self.validate_multiindex(obj) + assert isinstance(self.levels, list) # for mypy cols = list(self.levels) cols.append(name) - obj.columns = cols - return super().write(obj=obj, **kwargs) + newobj.columns = Index(cols) + return super().write(obj=newobj, **kwargs) class GenericTable(AppendableFrameTable): @@ -4576,6 +4589,7 @@ class GenericTable(AppendableFrameTable): table_type = "generic_table" ndim = 2 obj_type = DataFrame + levels: List[Label] @property def pandas_type(self) -> str: @@ -4609,7 +4623,7 @@ def indexables(self): name="index", axis=0, table=self.table, meta=meta, metadata=md ) - _indexables = [index_col] + _indexables: List[Union[GenericIndexCol, GenericDataIndexableCol]] = [index_col] for i, n in enumerate(d._v_names): assert isinstance(n, str) @@ -4652,6 +4666,7 @@ def write(self, obj, data_columns=None, **kwargs): elif data_columns is True: data_columns = obj.columns.tolist() obj, self.levels = self.validate_multiindex(obj) + assert isinstance(self.levels, list) # for mypy for n in self.levels: if n not in data_columns: data_columns.insert(0, n) @@ -5173,7 +5188,7 @@ def select_coords(self): start = 0 elif start < 0: start += nrows - if self.stop is None: + if stop is None: stop = nrows elif stop < 0: stop += nrows diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 55dde374048b6..c128c56f496cc 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -378,8 +378,8 @@ def parse_dates_safe(dates, delta=False, year=False, days=False): d["delta"] = time_delta._values.astype(np.int64) // 1000 # microseconds if days or year: date_index = DatetimeIndex(dates) - d["year"] = date_index.year - d["month"] = date_index.month + d["year"] = date_index._data.year + d["month"] = date_index._data.month if days: days_in_ns = dates.astype(np.int64) - to_datetime( d["year"], format="%Y" @@ -887,7 +887,9 @@ def __init__(self): (65530, np.int8), ] ) - self.TYPE_MAP = list(range(251)) + list("bhlfd") + # error: Argument 1 to "list" has incompatible type "str"; + # expected "Iterable[int]" [arg-type] + self.TYPE_MAP = list(range(251)) + list("bhlfd") # type: ignore[arg-type] self.TYPE_MAP_XML = dict( [ # Not really a Q, unclear how to handle byteswap diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index f806325d60eca..a69767df267fc 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -82,6 +82,8 @@ def _kind(self): _default_rot = 0 orientation: Optional[str] = None + axes: np.ndarray # of Axes objects + def __init__( self, data, @@ -177,7 +179,7 @@ def __init__( self.ax = ax self.fig = fig - self.axes = None + self.axes = np.array([], dtype=object) # "real" version get set in `generate` # parse errorbar input if given xerr = kwds.pop("xerr", None) @@ -697,7 +699,7 @@ def _get_ax_layer(cls, ax, primary=True): else: return getattr(ax, "right_ax", ax) - def _get_ax(self, i): + def _get_ax(self, i: int): # get the twinx ax if appropriate if self.subplots: ax = self.axes[i] diff --git a/pandas/plotting/_matplotlib/tools.py b/pandas/plotting/_matplotlib/tools.py index 832957dd73ec7..bec1f48f5e64a 100644 --- a/pandas/plotting/_matplotlib/tools.py +++ b/pandas/plotting/_matplotlib/tools.py @@ -401,11 +401,11 @@ def handle_shared_axes( _remove_labels_from_axis(ax.yaxis) -def flatten_axes(axes: Union["Axes", Sequence["Axes"]]) -> Sequence["Axes"]: +def flatten_axes(axes: Union["Axes", Sequence["Axes"]]) -> np.ndarray: if not is_list_like(axes): return np.array([axes]) elif isinstance(axes, (np.ndarray, ABCIndexClass)): - return axes.ravel() + return np.asarray(axes).ravel() return np.array(axes) diff --git a/setup.cfg b/setup.cfg index 447adc9188951..ad72374fa325d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -223,12 +223,6 @@ check_untyped_defs=False [mypy-pandas.io.parsers] check_untyped_defs=False -[mypy-pandas.io.pytables] -check_untyped_defs=False - -[mypy-pandas.io.stata] -check_untyped_defs=False - [mypy-pandas.plotting._matplotlib.converter] check_untyped_defs=False