TYP: Fix mypy ignores in parsers (#39342)

phofl · web-flow · commit ca4f20434c2a · 2021-02-04T10:55:16.000-05:00
diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
@@ -2,7 +2,19 @@
 import csv
 import datetime
 import itertools
-from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Union, cast
+from typing import (
+    Any,
+    DefaultDict,
+    Dict,
+    Iterable,
+    List,
+    Optional,
+    Sequence,
+    Set,
+    Tuple,
+    Union,
+    cast,
+)
 import warnings
 
 import numpy as np
@@ -335,9 +347,7 @@ def _maybe_dedup_names(self, names):
         # would be nice!
         if self.mangle_dupe_cols:
             names = list(names)  # so we can index
-            # pandas\io\parsers.py:1559: error: Need type annotation for
-            # 'counts'  [var-annotated]
-            counts = defaultdict(int)  # type: ignore[var-annotated]
+            counts: DefaultDict[Union[int, str, Tuple], int] = defaultdict(int)
             is_potential_mi = _is_potential_multi_index(names, self.index_col)
 
             for i, col in enumerate(names):
@@ -382,9 +392,8 @@ def _make_index(self, data, alldata, columns, indexnamerow=False):
         # add names for the index
         if indexnamerow:
             coffset = len(indexnamerow) - len(columns)
-            # pandas\io\parsers.py:1604: error: Item "None" of "Optional[Any]"
-            # has no attribute "set_names"  [union-attr]
-            index = index.set_names(indexnamerow[:coffset])  # type: ignore[union-attr]
+            assert index is not None
+            index = index.set_names(indexnamerow[:coffset])
 
         # maybe create a mi on the columns
         columns = self._maybe_make_multi_index_columns(columns, self.col_names)
@@ -458,9 +467,8 @@ def _agg_index(self, index, try_parse_dates=True) -> Index:
                 col_na_fvalues = set()
 
             if isinstance(self.na_values, dict):
-                # pandas\io\parsers.py:1678: error: Value of type
-                # "Optional[Any]" is not indexable  [index]
-                col_name = self.index_names[i]  # type: ignore[index]
+                assert self.index_names is not None
+                col_name = self.index_names[i]
                 if col_name is not None:
                     col_na_values, col_na_fvalues = _get_na_values(
                         col_name, self.na_values, self.na_fvalues, self.keep_default_na
@@ -549,7 +557,7 @@ def _convert_to_ndarrays(
         return result
 
     def _set_noconvert_dtype_columns(
-        self, col_indices: List[int], names: List[Union[int, str]]
+        self, col_indices: List[int], names: List[Union[int, str, Tuple]]
     ) -> Set[int]:
         """
         Set the columns that should not undergo dtype conversions.
@@ -850,7 +858,7 @@ def _clean_index_names(self, columns, index_col, unnamed_cols):
             return [None] * len(index_col), columns, index_col
 
         cp_cols = list(columns)
-        index_names = []
+        index_names: List[Optional[Union[int, str]]] = []
 
         # don't mutate
         index_col = list(index_col)
@@ -871,10 +879,7 @@ def _clean_index_names(self, columns, index_col, unnamed_cols):
         # Only clean index names that were placeholders.
         for i, name in enumerate(index_names):
             if isinstance(name, str) and name in unnamed_cols:
-                # pandas\io\parsers.py:3445: error: No overload variant of
-                # "__setitem__" of "list" matches argument types "int", "None"
-                # [call-overload]
-                index_names[i] = None  # type: ignore[call-overload]
+                index_names[i] = None
 
         return index_names, columns, index_col
 
diff --git a/pandas/io/parsers/c_parser_wrapper.py b/pandas/io/parsers/c_parser_wrapper.py
@@ -131,12 +131,8 @@ def __init__(self, src: FilePathOrBuffer, **kwds):
                     self.index_names = index_names
 
             if self._reader.header is None and not passed_names:
-                # pandas\io\parsers.py:1997: error: Argument 1 to "len" has
-                # incompatible type "Optional[Any]"; expected "Sized"
-                # [arg-type]
-                self.index_names = [None] * len(
-                    self.index_names  # type: ignore[arg-type]
-                )
+                assert self.index_names is not None
+                self.index_names = [None] * len(self.index_names)
 
         self._implicit_index = self._reader.leading_cols > 0
 
diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
@@ -3,7 +3,7 @@
 from io import StringIO
 import re
 import sys
-from typing import Iterator, List, Optional, Set, cast
+from typing import DefaultDict, Iterator, List, Optional, Set, Tuple, cast
 
 import numpy as np
 
@@ -118,7 +118,7 @@ def __init__(self, f: Union[FilePathOrBuffer, List], **kwds):
             self.columns = self.columns[0]
 
         # get popped off for index
-        self.orig_names = list(self.columns)
+        self.orig_names: List[Union[int, str, Tuple]] = list(self.columns)
 
         # needs to be cleaned/refactored
         # multiple date column thing turning into a real spaghetti factory
@@ -236,10 +236,7 @@ def read(self, rows=None):
         # done with first read, next time raise StopIteration
         self._first_chunk = False
 
-        # pandas\io\parsers.py:2480: error: Argument 1 to "list" has
-        # incompatible type "Optional[Any]"; expected "Iterable[Any]"
-        # [arg-type]
-        columns = list(self.orig_names)  # type: ignore[arg-type]
+        columns = list(self.orig_names)
         if not len(content):  # pragma: no cover
             # DataFrame with the right metadata, even though it's length 0
             names = self._maybe_dedup_names(self.orig_names)
@@ -292,15 +289,8 @@ def _clean_mapping(mapping):
             """converts col numbers to names"""
             clean = {}
             for col, v in mapping.items():
-                # pandas\io\parsers.py:2537: error: Unsupported right operand
-                # type for in ("Optional[Any]")  [operator]
-                if (
-                    isinstance(col, int)
-                    and col not in self.orig_names  # type: ignore[operator]
-                ):
-                    # pandas\io\parsers.py:2538: error: Value of type
-                    # "Optional[Any]" is not indexable  [index]
-                    col = self.orig_names[col]  # type: ignore[index]
+                if isinstance(col, int) and col not in self.orig_names:
+                    col = self.orig_names[col]
                 clean[col] = v
             return clean
 
@@ -320,15 +310,8 @@ def _clean_mapping(mapping):
                 na_value = self.na_values[col]
                 na_fvalue = self.na_fvalues[col]
 
-                # pandas\io\parsers.py:2558: error: Unsupported right operand
-                # type for in ("Optional[Any]")  [operator]
-                if (
-                    isinstance(col, int)
-                    and col not in self.orig_names  # type: ignore[operator]
-                ):
-                    # pandas\io\parsers.py:2559: error: Value of type
-                    # "Optional[Any]" is not indexable  [index]
-                    col = self.orig_names[col]  # type: ignore[index]
+                if isinstance(col, int) and col not in self.orig_names:
+                    col = self.orig_names[col]
 
                 clean_na_values[col] = na_value
                 clean_na_fvalues[col] = na_fvalue
@@ -349,10 +332,7 @@ def _infer_columns(self):
         names = self.names
         num_original_columns = 0
         clear_buffer = True
-        # pandas\io\parsers.py:2580: error: Need type annotation for
-        # 'unnamed_cols' (hint: "unnamed_cols: Set[<type>] = ...")
-        # [var-annotated]
-        unnamed_cols = set()  # type: ignore[var-annotated]
+        unnamed_cols: Set[Optional[Union[int, str]]] = set()
 
         if self.header is not None:
             header = self.header
@@ -366,9 +346,7 @@ def _infer_columns(self):
                 have_mi_columns = False
                 header = [header]
 
-            # pandas\io\parsers.py:2594: error: Need type annotation for
-            # 'columns' (hint: "columns: List[<type>] = ...")  [var-annotated]
-            columns = []  # type: ignore[var-annotated]
+            columns: List[List[Optional[Union[int, str]]]] = []
             for level, hr in enumerate(header):
                 try:
                     line = self._buffered_line()
@@ -397,7 +375,7 @@ def _infer_columns(self):
 
                     line = self.names[:]
 
-                this_columns = []
+                this_columns: List[Optional[Union[int, str]]] = []
                 this_unnamed_cols = []
 
                 for i, c in enumerate(line):
@@ -413,9 +391,7 @@ def _infer_columns(self):
                         this_columns.append(c)
 
                 if not have_mi_columns and self.mangle_dupe_cols:
-                    # pandas\io\parsers.py:2639: error: Need type annotation
-                    # for 'counts'  [var-annotated]
-                    counts = defaultdict(int)  # type: ignore[var-annotated]
+                    counts: DefaultDict = defaultdict(int)
 
                     for i, col in enumerate(this_columns):
                         cur_count = counts[col]
@@ -439,16 +415,10 @@ def _infer_columns(self):
 
                         if lc != unnamed_count and lc - ic > unnamed_count:
                             clear_buffer = False
-                            # pandas\io\parsers.py:2663: error: List item 0 has
-                            # incompatible type "None"; expected "str"
-                            # [list-item]
-                            this_columns = [None] * lc  # type: ignore[list-item]
+                            this_columns = [None] * lc
                             self.buf = [self.buf[-1]]
 
-                # pandas\io\parsers.py:2666: error: Argument 1 to "append" of
-                # "list" has incompatible type "List[str]"; expected
-                # "List[None]"  [arg-type]
-                columns.append(this_columns)  # type: ignore[arg-type]
+                columns.append(this_columns)
                 unnamed_cols.update({this_columns[i] for i in this_unnamed_cols})
 
                 if len(columns) == 1:
@@ -490,19 +460,9 @@ def _infer_columns(self):
 
             if not names:
                 if self.prefix:
-                    # pandas\io\parsers.py:2711: error: List comprehension has
-                    # incompatible type List[str]; expected List[None]  [misc]
-                    columns = [
-                        [
-                            f"{self.prefix}{i}"  # type: ignore[misc]
-                            for i in range(ncols)
-                        ]
-                    ]
+                    columns = [[f"{self.prefix}{i}" for i in range(ncols)]]
                 else:
-                    # pandas\io\parsers.py:2713: error: Argument 1 to "list"
-                    # has incompatible type "range"; expected "Iterable[None]"
-                    # [arg-type]
-                    columns = [list(range(ncols))]  # type: ignore[arg-type]
+                    columns = [list(range(ncols))]
                 columns = self._handle_usecols(columns, columns[0])
             else:
                 if self.usecols is None or len(names) >= num_original_columns:
diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
@@ -1,12 +1,11 @@
 """
 Module contains tools for processing files into DataFrames or other objects
 """
-
 from collections import abc
 import csv
 import sys
 from textwrap import fill
-from typing import Any, Dict, Optional, Set, Type
+from typing import Any, Dict, List, Optional, Set, Type
 import warnings
 
 import numpy as np
@@ -727,6 +726,7 @@ def _get_options_with_defaults(self, engine):
         kwds = self.orig_options
 
         options = {}
+        default: Optional[object]
 
         for argname, default in parser_defaults.items():
             value = kwds.get(argname, default)
@@ -756,10 +756,7 @@ def _get_options_with_defaults(self, engine):
             options[argname] = value
 
         if engine == "python-fwf":
-            # pandas\io\parsers.py:907: error: Incompatible types in assignment
-            # (expression has type "object", variable has type "Union[int, str,
-            # None]")  [assignment]
-            for argname, default in _fwf_defaults.items():  # type: ignore[assignment]
+            for argname, default in _fwf_defaults.items():
                 options[argname] = kwds.get(argname, default)
 
         return options
@@ -1053,15 +1050,13 @@ def TextParser(*args, **kwds):
 
 
 def _clean_na_values(na_values, keep_default_na=True):
-
+    na_fvalues: Union[Set, Dict]
     if na_values is None:
         if keep_default_na:
             na_values = STR_NA_VALUES
         else:
             na_values = set()
-        # pandas\io\parsers.py:3387: error: Need type annotation for
-        # 'na_fvalues' (hint: "na_fvalues: Set[<type>] = ...")  [var-annotated]
-        na_fvalues = set()  # type: ignore[var-annotated]
+        na_fvalues = set()
     elif isinstance(na_values, dict):
         old_na_values = na_values.copy()
         na_values = {}  # Prevent aliasing.
@@ -1078,12 +1073,7 @@ def _clean_na_values(na_values, keep_default_na=True):
                 v = set(v) | STR_NA_VALUES
 
             na_values[k] = v
-        # pandas\io\parsers.py:3404: error: Incompatible types in assignment
-        # (expression has type "Dict[Any, Any]", variable has type "Set[Any]")
-        # [assignment]
-        na_fvalues = {  # type: ignore[assignment]
-            k: _floatify_na_values(v) for k, v in na_values.items()
-        }
+        na_fvalues = {k: _floatify_na_values(v) for k, v in na_values.items()}
     else:
         if not is_list_like(na_values):
             na_values = [na_values]
@@ -1111,7 +1101,7 @@ def _floatify_na_values(na_values):
 
 def _stringify_na_values(na_values):
     """ return a stringified and numeric for these values """
-    result = []
+    result: List[Union[int, str, float]] = []
     for x in na_values:
         result.append(str(x))
         result.append(x)
@@ -1124,15 +1114,11 @@ def _stringify_na_values(na_values):
                 result.append(f"{v}.0")
                 result.append(str(v))
 
-            # pandas\io\parsers.py:3522: error: Argument 1 to "append" of
-            # "list" has incompatible type "float"; expected "str"  [arg-type]
-            result.append(v)  # type: ignore[arg-type]
+            result.append(v)
         except (TypeError, ValueError, OverflowError):
             pass
         try:
-            # pandas\io\parsers.py:3526: error: Argument 1 to "append" of
-            # "list" has incompatible type "int"; expected "str"  [arg-type]
-            result.append(int(x))  # type: ignore[arg-type]
+            result.append(int(x))
         except (TypeError, ValueError, OverflowError):
             pass
     return set(result)