TYP: mostly io, plotting (#37059)

jbrockmendel · web-flow · commit 3d29aeee1387 · 2020-10-14T08:27:18.000-04:00
diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -4,12 +4,22 @@
 
 import builtins
 import textwrap
-from typing import Any, Callable, Dict, FrozenSet, Optional, TypeVar, Union
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Dict,
+    FrozenSet,
+    Optional,
+    TypeVar,
+    Union,
+    cast,
+)
 
 import numpy as np
 
 import pandas._libs.lib as lib
-from pandas._typing import IndexLabel
+from pandas._typing import DtypeObj, IndexLabel
 from pandas.compat import PYPY
 from pandas.compat.numpy import function as nv
 from pandas.errors import AbstractMethodError
@@ -33,6 +43,9 @@
 from pandas.core.construction import create_series_with_explicit_dtype
 import pandas.core.nanops as nanops
 
+if TYPE_CHECKING:
+    from pandas import Categorical
+
 _shared_docs: Dict[str, str] = dict()
 _indexops_doc_kwargs = dict(
     klass="IndexOpsMixin",
@@ -238,7 +251,7 @@ def _gotitem(self, key, ndim: int, subset=None):
         Parameters
         ----------
         key : str / list of selections
-        ndim : 1,2
+        ndim : {1, 2}
             requested ndim of result
         subset : object, default None
             subset to act on
@@ -305,6 +318,11 @@ class IndexOpsMixin(OpsMixin):
         ["tolist"]  # tolist is not deprecated, just suppressed in the __dir__
     )
 
+    @property
+    def dtype(self) -> DtypeObj:
+        # must be defined here as a property for mypy
+        raise AbstractMethodError(self)
+
     @property
     def _values(self) -> Union[ExtensionArray, np.ndarray]:
         # must be defined here as a property for mypy
@@ -832,6 +850,7 @@ def _map_values(self, mapper, na_action=None):
             if is_categorical_dtype(self.dtype):
                 # use the built in categorical series mapper which saves
                 # time by mapping the categories instead of all values
+                self = cast("Categorical", self)
                 return self._values.map(mapper)
 
             values = self._values
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
@@ -164,6 +164,7 @@ class CategoricalIndex(ExtensionIndex, accessor.PandasDelegate):
     codes: np.ndarray
     categories: Index
     _data: Categorical
+    _values: Categorical
 
     @property
     def _engine_type(self):
diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
@@ -785,14 +785,19 @@ def _value_with_fmt(self, val):
         return val, fmt
 
     @classmethod
-    def check_extension(cls, ext):
+    def check_extension(cls, ext: str):
         """
         checks that path's extension against the Writer's supported
         extensions.  If it isn't supported, raises UnsupportedFiletypeError.
         """
         if ext.startswith("."):
             ext = ext[1:]
-        if not any(ext in extension for extension in cls.supported_extensions):
+        # error: "Callable[[ExcelWriter], Any]" has no attribute "__iter__"
+        #  (not iterable)  [attr-defined]
+        if not any(
+            ext in extension
+            for extension in cls.supported_extensions  # type: ignore[attr-defined]
+        ):
             raise ValueError(f"Invalid extension for engine '{cls.engine}': '{ext}'")
         else:
             return True
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
@@ -1835,9 +1835,11 @@ def _make_fixed_width(
         return strings
 
     if adj is None:
-        adj = get_adjustment()
+        adjustment = get_adjustment()
+    else:
+        adjustment = adj
 
-    max_len = max(adj.len(x) for x in strings)
+    max_len = max(adjustment.len(x) for x in strings)
 
     if minimum is not None:
         max_len = max(minimum, max_len)
@@ -1846,14 +1848,14 @@ def _make_fixed_width(
     if conf_max is not None and max_len > conf_max:
         max_len = conf_max
 
-    def just(x):
+    def just(x: str) -> str:
         if conf_max is not None:
-            if (conf_max > 3) & (adj.len(x) > max_len):
+            if (conf_max > 3) & (adjustment.len(x) > max_len):
                 x = x[: max_len - 3] + "..."
         return x
 
     strings = [just(x) for x in strings]
-    result = adj.justify(strings, max_len, mode=justify)
+    result = adjustment.justify(strings, max_len, mode=justify)
     return result
 
 
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -1661,7 +1661,7 @@ def _get_name(icol):
 
         return index
 
-    def _agg_index(self, index, try_parse_dates=True):
+    def _agg_index(self, index, try_parse_dates=True) -> Index:
         arrays = []
 
         for i, arr in enumerate(index):
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
@@ -565,6 +565,7 @@ def __fspath__(self):
     def root(self):
         """ return the root node """
         self._check_if_open()
+        assert self._handle is not None  # for mypy
         return self._handle.root
 
     @property
@@ -1393,6 +1394,8 @@ def groups(self):
         """
         _tables()
         self._check_if_open()
+        assert self._handle is not None  # for mypy
+        assert _table_mod is not None  # for mypy
         return [
             g
             for g in self._handle.walk_groups()
@@ -1437,6 +1440,9 @@ def walk(self, where="/"):
         """
         _tables()
         self._check_if_open()
+        assert self._handle is not None  # for mypy
+        assert _table_mod is not None  # for mypy
+
         for g in self._handle.walk_groups(where):
             if getattr(g._v_attrs, "pandas_type", None) is not None:
                 continue
@@ -1862,6 +1868,8 @@ def __init__(
     def __iter__(self):
         # iterate
         current = self.start
+        if self.coordinates is None:
+            raise ValueError("Cannot iterate until get_result is called.")
         while current < self.stop:
             stop = min(current + self.chunksize, self.stop)
             value = self.func(None, None, self.coordinates[current:stop])
@@ -3196,7 +3204,7 @@ class Table(Fixed):
     pandas_kind = "wide_table"
     format_type: str = "table"  # GH#30962 needed by dask
     table_type: str
-    levels = 1
+    levels: Union[int, List[Label]] = 1
     is_table = True
 
     index_axes: List[IndexCol]
@@ -3292,7 +3300,9 @@ def is_multi_index(self) -> bool:
         """the levels attribute is 1 or a list in the case of a multi-index"""
         return isinstance(self.levels, list)
 
-    def validate_multiindex(self, obj):
+    def validate_multiindex(
+        self, obj: FrameOrSeriesUnion
+    ) -> Tuple[DataFrame, List[Label]]:
         """
         validate that we can store the multi-index; reset and return the
         new object
@@ -3301,11 +3311,13 @@ def validate_multiindex(self, obj):
             l if l is not None else f"level_{i}" for i, l in enumerate(obj.index.names)
         ]
         try:
-            return obj.reset_index(), levels
+            reset_obj = obj.reset_index()
         except ValueError as err:
             raise ValueError(
                 "duplicate names/columns in the multi-index when storing as a table"
             ) from err
+        assert isinstance(reset_obj, DataFrame)  # for mypy
+        return reset_obj, levels
 
     @property
     def nrows_expected(self) -> int:
@@ -3433,7 +3445,7 @@ def get_attrs(self):
         self.nan_rep = getattr(self.attrs, "nan_rep", None)
         self.encoding = _ensure_encoding(getattr(self.attrs, "encoding", None))
         self.errors = _ensure_decoded(getattr(self.attrs, "errors", "strict"))
-        self.levels = getattr(self.attrs, "levels", None) or []
+        self.levels: List[Label] = getattr(self.attrs, "levels", None) or []
         self.index_axes = [a for a in self.indexables if a.is_an_indexable]
         self.values_axes = [a for a in self.indexables if not a.is_an_indexable]
 
@@ -4562,11 +4574,12 @@ class AppendableMultiSeriesTable(AppendableSeriesTable):
     def write(self, obj, **kwargs):
         """ we are going to write this as a frame table """
         name = obj.name or "values"
-        obj, self.levels = self.validate_multiindex(obj)
+        newobj, self.levels = self.validate_multiindex(obj)
+        assert isinstance(self.levels, list)  # for mypy
         cols = list(self.levels)
         cols.append(name)
-        obj.columns = cols
-        return super().write(obj=obj, **kwargs)
+        newobj.columns = Index(cols)
+        return super().write(obj=newobj, **kwargs)
 
 
 class GenericTable(AppendableFrameTable):
@@ -4576,6 +4589,7 @@ class GenericTable(AppendableFrameTable):
     table_type = "generic_table"
     ndim = 2
     obj_type = DataFrame
+    levels: List[Label]
 
     @property
     def pandas_type(self) -> str:
@@ -4609,7 +4623,7 @@ def indexables(self):
             name="index", axis=0, table=self.table, meta=meta, metadata=md
         )
 
-        _indexables = [index_col]
+        _indexables: List[Union[GenericIndexCol, GenericDataIndexableCol]] = [index_col]
 
         for i, n in enumerate(d._v_names):
             assert isinstance(n, str)
@@ -4652,6 +4666,7 @@ def write(self, obj, data_columns=None, **kwargs):
         elif data_columns is True:
             data_columns = obj.columns.tolist()
         obj, self.levels = self.validate_multiindex(obj)
+        assert isinstance(self.levels, list)  # for mypy
         for n in self.levels:
             if n not in data_columns:
                 data_columns.insert(0, n)
@@ -5173,7 +5188,7 @@ def select_coords(self):
             start = 0
         elif start < 0:
             start += nrows
-        if self.stop is None:
+        if stop is None:
             stop = nrows
         elif stop < 0:
             stop += nrows
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
@@ -378,8 +378,8 @@ def parse_dates_safe(dates, delta=False, year=False, days=False):
                 d["delta"] = time_delta._values.astype(np.int64) // 1000  # microseconds
             if days or year:
                 date_index = DatetimeIndex(dates)
-                d["year"] = date_index.year
-                d["month"] = date_index.month
+                d["year"] = date_index._data.year
+                d["month"] = date_index._data.month
             if days:
                 days_in_ns = dates.astype(np.int64) - to_datetime(
                     d["year"], format="%Y"
@@ -887,7 +887,9 @@ def __init__(self):
                 (65530, np.int8),
             ]
         )
-        self.TYPE_MAP = list(range(251)) + list("bhlfd")
+        # error: Argument 1 to "list" has incompatible type "str";
+        #  expected "Iterable[int]"  [arg-type]
+        self.TYPE_MAP = list(range(251)) + list("bhlfd")  # type: ignore[arg-type]
         self.TYPE_MAP_XML = dict(
             [
                 # Not really a Q, unclear how to handle byteswap
diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
@@ -82,6 +82,8 @@ def _kind(self):
     _default_rot = 0
     orientation: Optional[str] = None
 
+    axes: np.ndarray  # of Axes objects
+
     def __init__(
         self,
         data,
@@ -177,7 +179,7 @@ def __init__(
 
         self.ax = ax
         self.fig = fig
-        self.axes = None
+        self.axes = np.array([], dtype=object)  # "real" version get set in `generate`
 
         # parse errorbar input if given
         xerr = kwds.pop("xerr", None)
@@ -697,7 +699,7 @@ def _get_ax_layer(cls, ax, primary=True):
         else:
             return getattr(ax, "right_ax", ax)
 
-    def _get_ax(self, i):
+    def _get_ax(self, i: int):
         # get the twinx ax if appropriate
         if self.subplots:
             ax = self.axes[i]
diff --git a/pandas/plotting/_matplotlib/tools.py b/pandas/plotting/_matplotlib/tools.py
@@ -401,11 +401,11 @@ def handle_shared_axes(
                     _remove_labels_from_axis(ax.yaxis)
 
 
-def flatten_axes(axes: Union["Axes", Sequence["Axes"]]) -> Sequence["Axes"]:
+def flatten_axes(axes: Union["Axes", Sequence["Axes"]]) -> np.ndarray:
     if not is_list_like(axes):
         return np.array([axes])
     elif isinstance(axes, (np.ndarray, ABCIndexClass)):
-        return axes.ravel()
+        return np.asarray(axes).ravel()
     return np.array(axes)
 
 
diff --git a/setup.cfg b/setup.cfg
@@ -223,12 +223,6 @@ check_untyped_defs=False
 [mypy-pandas.io.parsers]
 check_untyped_defs=False
 
-[mypy-pandas.io.pytables]
-check_untyped_defs=False
-
-[mypy-pandas.io.stata]
-check_untyped_defs=False
-
 [mypy-pandas.plotting._matplotlib.converter]
 check_untyped_defs=False