TYP: __future__ annotations in parsers (pandas-dev#41620)

jbrockmendel · TLouf · commit 42eaf6e74524 · 2021-06-01T18:01:59.000+02:00
diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from collections import defaultdict
 import csv
 import datetime
@@ -6,14 +8,8 @@
     Any,
     Callable,
     DefaultDict,
-    Dict,
     Iterable,
-    List,
-    Optional,
     Sequence,
-    Set,
-    Tuple,
-    Union,
     cast,
 )
 import warnings
@@ -122,12 +118,12 @@ class ParserBase:
     def __init__(self, kwds):
 
         self.names = kwds.get("names")
-        self.orig_names: Optional[List] = None
+        self.orig_names: list | None = None
         self.prefix = kwds.pop("prefix", None)
 
         self.index_col = kwds.get("index_col", None)
-        self.unnamed_cols: Set = set()
-        self.index_names: Optional[List] = None
+        self.unnamed_cols: set = set()
+        self.index_names: list | None = None
         self.col_names = None
 
         self.parse_dates = _validate_parse_dates_arg(kwds.pop("parse_dates", False))
@@ -205,9 +201,9 @@ def __init__(self, kwds):
 
         self.usecols, self.usecols_dtype = self._validate_usecols_arg(kwds["usecols"])
 
-        self.handles: Optional[IOHandles] = None
+        self.handles: IOHandles | None = None
 
-    def _open_handles(self, src: FilePathOrBuffer, kwds: Dict[str, Any]) -> None:
+    def _open_handles(self, src: FilePathOrBuffer, kwds: dict[str, Any]) -> None:
         """
         Let the readers open IOHanldes after they are done with their potential raises.
         """
@@ -221,7 +217,7 @@ def _open_handles(self, src: FilePathOrBuffer, kwds: Dict[str, Any]) -> None:
             errors=kwds.get("encoding_errors", "strict"),
         )
 
-    def _validate_parse_dates_presence(self, columns: List[str]) -> None:
+    def _validate_parse_dates_presence(self, columns: list[str]) -> None:
         """
         Check if parse_dates are in columns.
 
@@ -371,7 +367,7 @@ def _maybe_dedup_names(self, names):
         # would be nice!
         if self.mangle_dupe_cols:
             names = list(names)  # so we can index
-            counts: DefaultDict[Union[int, str, Tuple], int] = defaultdict(int)
+            counts: DefaultDict[int | str | tuple, int] = defaultdict(int)
             is_potential_mi = _is_potential_multi_index(names, self.index_col)
 
             for i, col in enumerate(names):
@@ -596,8 +592,8 @@ def _convert_to_ndarrays(
 
     @final
     def _set_noconvert_dtype_columns(
-        self, col_indices: List[int], names: List[Union[int, str, Tuple]]
-    ) -> Set[int]:
+        self, col_indices: list[int], names: list[int | str | tuple]
+    ) -> set[int]:
         """
         Set the columns that should not undergo dtype conversions.
 
@@ -615,7 +611,7 @@ def _set_noconvert_dtype_columns(
         -------
         A set of integers containing the positions of the columns not to convert.
         """
-        usecols: Optional[Union[List[int], List[str]]]
+        usecols: list[int] | list[str] | None
         noconvert_columns = set()
         if self.usecols_dtype == "integer":
             # A set of integers will be converted to a list in
@@ -900,7 +896,7 @@ def _clean_index_names(self, columns, index_col, unnamed_cols):
             return [None] * len(index_col), columns, index_col
 
         cp_cols = list(columns)
-        index_names: List[Optional[Union[int, str]]] = []
+        index_names: list[str | int | None] = []
 
         # don't mutate
         index_col = list(index_col)
@@ -926,7 +922,7 @@ def _clean_index_names(self, columns, index_col, unnamed_cols):
         return index_names, columns, index_col
 
     def _get_empty_meta(
-        self, columns, index_col, index_names, dtype: Optional[DtypeArg] = None
+        self, columns, index_col, index_names, dtype: DtypeArg | None = None
     ):
         columns = list(columns)
 
@@ -1150,7 +1146,7 @@ def _get_na_values(col, na_values, na_fvalues, keep_default_na):
 
 
 def _is_potential_multi_index(
-    columns, index_col: Optional[Union[bool, Sequence[int]]] = None
+    columns, index_col: bool | Sequence[int] | None = None
 ) -> bool:
     """
     Check whether or not the `columns` parameter
diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from collections import (
     abc,
     defaultdict,
@@ -9,10 +11,6 @@
 from typing import (
     DefaultDict,
     Iterator,
-    List,
-    Optional,
-    Set,
-    Tuple,
     cast,
 )
 import warnings
@@ -44,14 +42,14 @@
 
 
 class PythonParser(ParserBase):
-    def __init__(self, f: Union[FilePathOrBuffer, List], **kwds):
+    def __init__(self, f: Union[FilePathOrBuffer, list], **kwds):
         """
         Workhorse function for processing nested list into DataFrame
         """
         ParserBase.__init__(self, kwds)
 
-        self.data: Optional[Iterator[str]] = None
-        self.buf: List = []
+        self.data: Iterator[str] | None = None
+        self.buf: list = []
         self.pos = 0
         self.line_pos = 0
 
@@ -110,7 +108,7 @@ def __init__(self, f: Union[FilePathOrBuffer, List], **kwds):
 
         # Get columns in two steps: infer from data, then
         # infer column indices from self.usecols if it is specified.
-        self._col_indices: Optional[List[int]] = None
+        self._col_indices: list[int] | None = None
         try:
             (
                 self.columns,
@@ -143,7 +141,7 @@ def __init__(self, f: Union[FilePathOrBuffer, List], **kwds):
             self.columns = self.columns[0]
 
         # get popped off for index
-        self.orig_names: List[Union[int, str, Tuple]] = list(self.columns)
+        self.orig_names: list[int | str | tuple] = list(self.columns)
 
         # needs to be cleaned/refactored
         # multiple date column thing turning into a real spaghetti factory
@@ -160,7 +158,7 @@ def __init__(self, f: Union[FilePathOrBuffer, List], **kwds):
             self._col_indices = list(range(len(self.columns)))
 
         self._validate_parse_dates_presence(self.columns)
-        no_thousands_columns: Optional[Set[int]] = None
+        no_thousands_columns: set[int] | None = None
         if self.parse_dates:
             no_thousands_columns = self._set_noconvert_dtype_columns(
                 self._col_indices, self.columns
@@ -360,7 +358,7 @@ def _infer_columns(self):
         names = self.names
         num_original_columns = 0
         clear_buffer = True
-        unnamed_cols: Set[Optional[Union[int, str]]] = set()
+        unnamed_cols: set[str | int | None] = set()
 
         if self.header is not None:
             header = self.header
@@ -374,7 +372,7 @@ def _infer_columns(self):
                 have_mi_columns = False
                 header = [header]
 
-            columns: List[List[Optional[Union[int, str]]]] = []
+            columns: list[list[int | str | None]] = []
             for level, hr in enumerate(header):
                 try:
                     line = self._buffered_line()
@@ -403,7 +401,7 @@ def _infer_columns(self):
 
                     line = self.names[:]
 
-                this_columns: List[Optional[Union[int, str]]] = []
+                this_columns: list[int | str | None] = []
                 this_unnamed_cols = []
 
                 for i, c in enumerate(line):
@@ -531,8 +529,8 @@ def _infer_columns(self):
 
     def _handle_usecols(
         self,
-        columns: List[List[Union[Optional[str], Optional[int]]]],
-        usecols_key: List[Union[Optional[str], Optional[int]]],
+        columns: list[list[str | int | None]],
+        usecols_key: list[str | int | None],
         num_original_columns: int,
     ):
         """
@@ -1209,7 +1207,7 @@ def _make_reader(self, f):
             self.infer_nrows,
         )
 
-    def _remove_empty_lines(self, lines) -> List:
+    def _remove_empty_lines(self, lines) -> list:
         """
         Returns the list of lines without the empty ones. With fixed-width
         fields, empty lines become arrays of empty strings.