TYP: io broken off from pandas-dev#41059

jbrockmendel · jbrockmendel · commit d90a3a67ff90 · 2021-05-27T14:58:17.000-07:00
diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
@@ -8,6 +8,7 @@
     Any,
     Callable,
     DefaultDict,
+    Hashable,
     Iterable,
     Sequence,
     cast,
@@ -114,6 +115,8 @@
 class ParserBase:
     _implicit_index: bool = False
     _first_chunk: bool
+    index_col: int | Sequence[int] | None
+    index_names: list[Hashable] | None
 
     def __init__(self, kwds):
 
@@ -123,7 +126,7 @@ def __init__(self, kwds):
 
         self.index_col = kwds.get("index_col", None)
         self.unnamed_cols: set = set()
-        self.index_names: list | None = None
+        self.index_names: list[Hashable] | None = None
         self.col_names = None
 
         self.parse_dates = _validate_parse_dates_arg(kwds.pop("parse_dates", False))
@@ -171,8 +174,14 @@ def __init__(self, kwds):
             if self.index_col is not None:
                 is_sequence = isinstance(self.index_col, (list, tuple, np.ndarray))
                 if not (
-                    is_sequence
-                    and all(map(is_integer, self.index_col))
+                    # error: Argument 2 to "map" has incompatible type
+                    # "Union[int, Sequence[int]]"; expected "Iterable[int]"
+                    (
+                        is_sequence
+                        and all(
+                            map(is_integer, self.index_col)  # type: ignore[arg-type]
+                        )
+                    )
                     or is_integer(self.index_col)
                 ):
                     raise ValueError(
@@ -287,8 +296,12 @@ def _should_parse_dates(self, i: int) -> bool:
                 name = self.index_names[i]
             else:
                 name = None
-            j = i if self.index_col is None else self.index_col[i]
-
+            # error: Value of type "Union[int, Sequence[int]]" is not indexable
+            j = (
+                i
+                if self.index_col is None
+                else self.index_col[i]  # type: ignore[index]
+            )
             if is_scalar(self.parse_dates):
                 return (j == self.parse_dates) or (
                     name is not None and name == self.parse_dates
@@ -317,7 +330,9 @@ def _extract_multi_indexer_columns(
             ic = []
 
         if not isinstance(ic, (list, tuple, np.ndarray)):
-            ic = [ic]
+            # error: List item 0 has incompatible type
+            # "Union[int, Sequence[int]]"; expected "int"
+            ic = [ic]  # type: ignore[list-item]
         sic = set(ic)
 
         # clean the index_names
@@ -333,7 +348,9 @@ def extract(r):
             return tuple(r[i] for i in range(field_count) if i not in sic)
 
         columns = list(zip(*(extract(r) for r in header)))
-        names = ic + columns
+        # error: No overload variant of "__add__" of "tuple" matches argument
+        # type "List[Any]"
+        names = ic + columns  # type: ignore[operator]
 
         # If we find unnamed columns all in a single
         # level, then our header was too long.
@@ -368,7 +385,12 @@ def _maybe_dedup_names(self, names):
         if self.mangle_dupe_cols:
             names = list(names)  # so we can index
             counts: DefaultDict[int | str | tuple, int] = defaultdict(int)
-            is_potential_mi = _is_potential_multi_index(names, self.index_col)
+            # error: Argument 2 to "_is_potential_multi_index" has incompatible
+            # type "Union[int, Sequence[int], None]"; expected
+            # "Union[bool, Sequence[int], None]"
+            is_potential_mi = _is_potential_multi_index(
+                names, self.index_col  # type: ignore[arg-type]
+            )
 
             for i, col in enumerate(names):
                 cur_count = counts[col]
@@ -431,7 +453,11 @@ def ix(col):
 
         to_remove = []
         index = []
-        for idx in self.index_col:
+        # error: Item "int" of "Union[int, Sequence[int], None]" has no
+        # attribute "__iter__" (not iterable)
+        # error: Item "None" of "Union[int, Sequence[int], None]" has no
+        # attribute "__iter__" (not iterable
+        for idx in self.index_col:  # type: ignore[union-attr]
             i = ix(idx)
             to_remove.append(i)
             index.append(data[i])
@@ -460,7 +486,11 @@ def _get_name(icol):
 
         to_remove = []
         index = []
-        for idx in self.index_col:
+        # error: Item "int" of "Union[int, Sequence[int], None]" has no
+        # attribute "__iter__" (not iterable)
+        # error: Item "None" of "Union[int, Sequence[int], None]" has no
+        # attribute "__iter__" (not iterable
+        for idx in self.index_col:  # type: ignore[union-attr]
             name = _get_name(idx)
             to_remove.append(name)
             index.append(data[name])
diff --git a/pandas/io/parsers/c_parser_wrapper.py b/pandas/io/parsers/c_parser_wrapper.py
@@ -39,10 +39,7 @@ def __init__(self, src: FilePathOrBuffer, **kwds):
         self.low_memory = kwds.pop("low_memory", False)
 
         # #2442
-        # error: Cannot determine type of 'index_col'
-        kwds["allow_leading_cols"] = (
-            self.index_col is not False  # type: ignore[has-type]
-        )
+        kwds["allow_leading_cols"] = self.index_col is not False
 
         # GH20529, validate usecol arg before TextReader
         kwds["usecols"] = self.usecols
@@ -71,7 +68,6 @@ def __init__(self, src: FilePathOrBuffer, **kwds):
             if len(self._reader.header) > 1:
                 # we have a multi index in the columns
                 # error: Cannot determine type of 'names'
-                # error: Cannot determine type of 'index_names'
                 # error: Cannot determine type of 'col_names'
                 (
                     self.names,  # type: ignore[has-type]
@@ -80,7 +76,7 @@ def __init__(self, src: FilePathOrBuffer, **kwds):
                     passed_names,
                 ) = self._extract_multi_indexer_columns(
                     self._reader.header,
-                    self.index_names,  # type: ignore[has-type]
+                    self.index_names,
                     self.col_names,  # type: ignore[has-type]
                     passed_names,
                 )
@@ -149,10 +145,7 @@ def __init__(self, src: FilePathOrBuffer, **kwds):
         self.orig_names = self.names  # type: ignore[has-type]
 
         if not self._has_complex_date_col:
-            # error: Cannot determine type of 'index_col'
-            if self._reader.leading_cols == 0 and is_index_col(
-                self.index_col  # type: ignore[has-type]
-            ):
+            if self._reader.leading_cols == 0 and is_index_col(self.index_col):
 
                 self._name_processed = True
                 (
@@ -163,8 +156,7 @@ def __init__(self, src: FilePathOrBuffer, **kwds):
                 ) = self._clean_index_names(
                     # error: Cannot determine type of 'names'
                     self.names,  # type: ignore[has-type]
-                    # error: Cannot determine type of 'index_col'
-                    self.index_col,  # type: ignore[has-type]
+                    self.index_col,
                     self.unnamed_cols,
                 )
 
@@ -258,7 +250,8 @@ def read(self, nrows=None):
                 if self.index_col is None:
                     values = data.pop(i)
                 else:
-                    values = data.pop(self.index_col[i])
+                    # error: Value of type "Union[int, Sequence[int]]" is not indexable
+                    values = data.pop(self.index_col[i])  # type: ignore[index]
 
                 values = self._maybe_parse_dates(values, i, try_parse_dates=True)
                 arrays.append(values)
diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
@@ -123,7 +123,6 @@ def __init__(self, f: Union[FilePathOrBuffer, list], **kwds):
         # The original set is stored in self.original_columns.
         if len(self.columns) > 1:
             # we are processing a multi index column
-            # error: Cannot determine type of 'index_names'
             # error: Cannot determine type of 'col_names'
             (
                 self.columns,
@@ -132,7 +131,7 @@ def __init__(self, f: Union[FilePathOrBuffer, list], **kwds):
                 _,
             ) = self._extract_multi_indexer_columns(
                 self.columns,
-                self.index_names,  # type: ignore[has-type]
+                self.index_names,
                 self.col_names,  # type: ignore[has-type]
             )
             # Update list of original names to include all indices.
@@ -262,10 +261,9 @@ def read(self, rows=None):
         if not len(content):  # pragma: no cover
             # DataFrame with the right metadata, even though it's length 0
             names = self._maybe_dedup_names(self.orig_names)
-            # error: Cannot determine type of 'index_col'
             index, columns, col_dict = self._get_empty_meta(
                 names,
-                self.index_col,  # type: ignore[has-type]
+                self.index_col,
                 self.index_names,
                 self.dtype,
             )
@@ -294,8 +292,9 @@ def _exclude_implicit_index(self, alldata):
 
         offset = 0
         if self._implicit_index:
-            # error: Cannot determine type of 'index_col'
-            offset = len(self.index_col)  # type: ignore[has-type]
+            # error: Argument 1 to "len" has incompatible type
+            # "Union[int, Sequence[int], None]"; expected "Sized"
+            offset = len(self.index_col)  # type: ignore[arg-type]
 
         len_alldata = len(alldata)
         return {
@@ -444,9 +443,12 @@ def _infer_columns(self):
                     # line for the rest of the parsing code
                     if hr == header[-1]:
                         lc = len(this_columns)
-                        # error: Cannot determine type of 'index_col'
-                        sic = self.index_col  # type: ignore[has-type]
-                        ic = len(sic) if sic is not None else 0
+                        sic = self.index_col
+                        # error: Argument 1 to "len" has incompatible type
+                        # "Union[int, Sequence[int]]"; expected "Sized"
+                        ic = (
+                            len(sic) if sic is not None else 0  # type: ignore[arg-type]
+                        )
                         unnamed_count = len(this_unnamed_cols)
 
                         # if wrong number of blanks or no index, not our format
@@ -881,8 +883,7 @@ def _get_index_name(self, columns):
         if line is not None:
             # leave it 0, #2442
             # Case 1
-            # error: Cannot determine type of 'index_col'
-            index_col = self.index_col  # type: ignore[has-type]
+            index_col = self.index_col
             if index_col is not False:
                 implicit_first_cols = len(line) - self.num_original_columns
 
@@ -921,20 +922,16 @@ def _rows_to_cols(self, content):
         col_len = self.num_original_columns
 
         if self._implicit_index:
-            col_len += len(self.index_col)
+            # error: Argument 1 to "len" has incompatible type
+            # "Union[int, Sequence[int]]"; expected "Sized"
+            col_len += len(self.index_col)  # type: ignore[arg-type]
 
         max_len = max(len(row) for row in content)
 
         # Check that there are no rows with too many
         # elements in their row (rows with too few
         # elements are padded with NaN).
-        # error: Non-overlapping identity check (left operand type: "List[int]",
-        # right operand type: "Literal[False]")
-        if (
-            max_len > col_len
-            and self.index_col is not False  # type: ignore[comparison-overlap]
-            and self.usecols is None
-        ):
+        if max_len > col_len and self.index_col is not False and self.usecols is None:
 
             footers = self.skipfooter if self.skipfooter else 0
             bad_lines = []
@@ -983,13 +980,13 @@ def _rows_to_cols(self, content):
             col_indices = self._col_indices
 
             if self._implicit_index:
+                # error: Argument 1 to "len" has incompatible type
+                # "Union[int, Sequence[int]]"; expected "Sized"
+                lic = len(self.index_col)  # type: ignore[arg-type]
                 zipped_content = [
                     a
                     for i, a in enumerate(zipped_content)
-                    if (
-                        i < len(self.index_col)
-                        or i - len(self.index_col) in col_indices
-                    )
+                    if (i < lic or i - lic in col_indices)
                 ]
             else:
                 zipped_content = [