From 8efbf724b52b62e2b9e1e0539f177f6cf9a1ca22 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 22 May 2021 11:22:46 -0700 Subject: [PATCH] TYP: __future__ annotations in parsers --- pandas/io/parsers/base_parser.py | 34 +++++++++++++----------------- pandas/io/parsers/python_parser.py | 30 ++++++++++++-------------- 2 files changed, 29 insertions(+), 35 deletions(-) diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index df2bfa7d2a870..02084f91d9966 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from collections import defaultdict import csv import datetime @@ -6,14 +8,8 @@ Any, Callable, DefaultDict, - Dict, Iterable, - List, - Optional, Sequence, - Set, - Tuple, - Union, cast, ) import warnings @@ -122,12 +118,12 @@ class ParserBase: def __init__(self, kwds): self.names = kwds.get("names") - self.orig_names: Optional[List] = None + self.orig_names: list | None = None self.prefix = kwds.pop("prefix", None) self.index_col = kwds.get("index_col", None) - self.unnamed_cols: Set = set() - self.index_names: Optional[List] = None + self.unnamed_cols: set = set() + self.index_names: list | None = None self.col_names = None self.parse_dates = _validate_parse_dates_arg(kwds.pop("parse_dates", False)) @@ -205,9 +201,9 @@ def __init__(self, kwds): self.usecols, self.usecols_dtype = self._validate_usecols_arg(kwds["usecols"]) - self.handles: Optional[IOHandles] = None + self.handles: IOHandles | None = None - def _open_handles(self, src: FilePathOrBuffer, kwds: Dict[str, Any]) -> None: + def _open_handles(self, src: FilePathOrBuffer, kwds: dict[str, Any]) -> None: """ Let the readers open IOHanldes after they are done with their potential raises. """ @@ -221,7 +217,7 @@ def _open_handles(self, src: FilePathOrBuffer, kwds: Dict[str, Any]) -> None: errors=kwds.get("encoding_errors", "strict"), ) - def _validate_parse_dates_presence(self, columns: List[str]) -> None: + def _validate_parse_dates_presence(self, columns: list[str]) -> None: """ Check if parse_dates are in columns. @@ -371,7 +367,7 @@ def _maybe_dedup_names(self, names): # would be nice! if self.mangle_dupe_cols: names = list(names) # so we can index - counts: DefaultDict[Union[int, str, Tuple], int] = defaultdict(int) + counts: DefaultDict[int | str | tuple, int] = defaultdict(int) is_potential_mi = _is_potential_multi_index(names, self.index_col) for i, col in enumerate(names): @@ -596,8 +592,8 @@ def _convert_to_ndarrays( @final def _set_noconvert_dtype_columns( - self, col_indices: List[int], names: List[Union[int, str, Tuple]] - ) -> Set[int]: + self, col_indices: list[int], names: list[int | str | tuple] + ) -> set[int]: """ Set the columns that should not undergo dtype conversions. @@ -615,7 +611,7 @@ def _set_noconvert_dtype_columns( ------- A set of integers containing the positions of the columns not to convert. """ - usecols: Optional[Union[List[int], List[str]]] + usecols: list[int] | list[str] | None noconvert_columns = set() if self.usecols_dtype == "integer": # A set of integers will be converted to a list in @@ -900,7 +896,7 @@ def _clean_index_names(self, columns, index_col, unnamed_cols): return [None] * len(index_col), columns, index_col cp_cols = list(columns) - index_names: List[Optional[Union[int, str]]] = [] + index_names: list[str | int | None] = [] # don't mutate index_col = list(index_col) @@ -926,7 +922,7 @@ def _clean_index_names(self, columns, index_col, unnamed_cols): return index_names, columns, index_col def _get_empty_meta( - self, columns, index_col, index_names, dtype: Optional[DtypeArg] = None + self, columns, index_col, index_names, dtype: DtypeArg | None = None ): columns = list(columns) @@ -1150,7 +1146,7 @@ def _get_na_values(col, na_values, na_fvalues, keep_default_na): def _is_potential_multi_index( - columns, index_col: Optional[Union[bool, Sequence[int]]] = None + columns, index_col: bool | Sequence[int] | None = None ) -> bool: """ Check whether or not the `columns` parameter diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index 9082e41698913..8f6d95f001d91 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from collections import ( abc, defaultdict, @@ -9,10 +11,6 @@ from typing import ( DefaultDict, Iterator, - List, - Optional, - Set, - Tuple, cast, ) import warnings @@ -44,14 +42,14 @@ class PythonParser(ParserBase): - def __init__(self, f: Union[FilePathOrBuffer, List], **kwds): + def __init__(self, f: Union[FilePathOrBuffer, list], **kwds): """ Workhorse function for processing nested list into DataFrame """ ParserBase.__init__(self, kwds) - self.data: Optional[Iterator[str]] = None - self.buf: List = [] + self.data: Iterator[str] | None = None + self.buf: list = [] self.pos = 0 self.line_pos = 0 @@ -110,7 +108,7 @@ def __init__(self, f: Union[FilePathOrBuffer, List], **kwds): # Get columns in two steps: infer from data, then # infer column indices from self.usecols if it is specified. - self._col_indices: Optional[List[int]] = None + self._col_indices: list[int] | None = None try: ( self.columns, @@ -143,7 +141,7 @@ def __init__(self, f: Union[FilePathOrBuffer, List], **kwds): self.columns = self.columns[0] # get popped off for index - self.orig_names: List[Union[int, str, Tuple]] = list(self.columns) + self.orig_names: list[int | str | tuple] = list(self.columns) # needs to be cleaned/refactored # multiple date column thing turning into a real spaghetti factory @@ -160,7 +158,7 @@ def __init__(self, f: Union[FilePathOrBuffer, List], **kwds): self._col_indices = list(range(len(self.columns))) self._validate_parse_dates_presence(self.columns) - no_thousands_columns: Optional[Set[int]] = None + no_thousands_columns: set[int] | None = None if self.parse_dates: no_thousands_columns = self._set_noconvert_dtype_columns( self._col_indices, self.columns @@ -360,7 +358,7 @@ def _infer_columns(self): names = self.names num_original_columns = 0 clear_buffer = True - unnamed_cols: Set[Optional[Union[int, str]]] = set() + unnamed_cols: set[str | int | None] = set() if self.header is not None: header = self.header @@ -374,7 +372,7 @@ def _infer_columns(self): have_mi_columns = False header = [header] - columns: List[List[Optional[Union[int, str]]]] = [] + columns: list[list[int | str | None]] = [] for level, hr in enumerate(header): try: line = self._buffered_line() @@ -403,7 +401,7 @@ def _infer_columns(self): line = self.names[:] - this_columns: List[Optional[Union[int, str]]] = [] + this_columns: list[int | str | None] = [] this_unnamed_cols = [] for i, c in enumerate(line): @@ -531,8 +529,8 @@ def _infer_columns(self): def _handle_usecols( self, - columns: List[List[Union[Optional[str], Optional[int]]]], - usecols_key: List[Union[Optional[str], Optional[int]]], + columns: list[list[str | int | None]], + usecols_key: list[str | int | None], num_original_columns: int, ): """ @@ -1209,7 +1207,7 @@ def _make_reader(self, f): self.infer_nrows, ) - def _remove_empty_lines(self, lines) -> List: + def _remove_empty_lines(self, lines) -> list: """ Returns the list of lines without the empty ones. With fixed-width fields, empty lines become arrays of empty strings.