Skip to content

TYP: __future__ annotations in parsers #41620

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 15 additions & 19 deletions pandas/io/parsers/base_parser.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

from collections import defaultdict
import csv
import datetime
Expand All @@ -6,14 +8,8 @@
Any,
Callable,
DefaultDict,
Dict,
Iterable,
List,
Optional,
Sequence,
Set,
Tuple,
Union,
cast,
)
import warnings
Expand Down Expand Up @@ -122,12 +118,12 @@ class ParserBase:
def __init__(self, kwds):

self.names = kwds.get("names")
self.orig_names: Optional[List] = None
self.orig_names: list | None = None
self.prefix = kwds.pop("prefix", None)

self.index_col = kwds.get("index_col", None)
self.unnamed_cols: Set = set()
self.index_names: Optional[List] = None
self.unnamed_cols: set = set()
self.index_names: list | None = None
self.col_names = None

self.parse_dates = _validate_parse_dates_arg(kwds.pop("parse_dates", False))
Expand Down Expand Up @@ -205,9 +201,9 @@ def __init__(self, kwds):

self.usecols, self.usecols_dtype = self._validate_usecols_arg(kwds["usecols"])

self.handles: Optional[IOHandles] = None
self.handles: IOHandles | None = None

def _open_handles(self, src: FilePathOrBuffer, kwds: Dict[str, Any]) -> None:
def _open_handles(self, src: FilePathOrBuffer, kwds: dict[str, Any]) -> None:
"""
Let the readers open IOHanldes after they are done with their potential raises.
"""
Expand All @@ -221,7 +217,7 @@ def _open_handles(self, src: FilePathOrBuffer, kwds: Dict[str, Any]) -> None:
errors=kwds.get("encoding_errors", "strict"),
)

def _validate_parse_dates_presence(self, columns: List[str]) -> None:
def _validate_parse_dates_presence(self, columns: list[str]) -> None:
"""
Check if parse_dates are in columns.

Expand Down Expand Up @@ -371,7 +367,7 @@ def _maybe_dedup_names(self, names):
# would be nice!
if self.mangle_dupe_cols:
names = list(names) # so we can index
counts: DefaultDict[Union[int, str, Tuple], int] = defaultdict(int)
counts: DefaultDict[int | str | tuple, int] = defaultdict(int)
is_potential_mi = _is_potential_multi_index(names, self.index_col)

for i, col in enumerate(names):
Expand Down Expand Up @@ -596,8 +592,8 @@ def _convert_to_ndarrays(

@final
def _set_noconvert_dtype_columns(
self, col_indices: List[int], names: List[Union[int, str, Tuple]]
) -> Set[int]:
self, col_indices: list[int], names: list[int | str | tuple]
) -> set[int]:
"""
Set the columns that should not undergo dtype conversions.

Expand All @@ -615,7 +611,7 @@ def _set_noconvert_dtype_columns(
-------
A set of integers containing the positions of the columns not to convert.
"""
usecols: Optional[Union[List[int], List[str]]]
usecols: list[int] | list[str] | None
noconvert_columns = set()
if self.usecols_dtype == "integer":
# A set of integers will be converted to a list in
Expand Down Expand Up @@ -900,7 +896,7 @@ def _clean_index_names(self, columns, index_col, unnamed_cols):
return [None] * len(index_col), columns, index_col

cp_cols = list(columns)
index_names: List[Optional[Union[int, str]]] = []
index_names: list[str | int | None] = []

# don't mutate
index_col = list(index_col)
Expand All @@ -926,7 +922,7 @@ def _clean_index_names(self, columns, index_col, unnamed_cols):
return index_names, columns, index_col

def _get_empty_meta(
self, columns, index_col, index_names, dtype: Optional[DtypeArg] = None
self, columns, index_col, index_names, dtype: DtypeArg | None = None
):
columns = list(columns)

Expand Down Expand Up @@ -1150,7 +1146,7 @@ def _get_na_values(col, na_values, na_fvalues, keep_default_na):


def _is_potential_multi_index(
columns, index_col: Optional[Union[bool, Sequence[int]]] = None
columns, index_col: bool | Sequence[int] | None = None
) -> bool:
"""
Check whether or not the `columns` parameter
Expand Down
30 changes: 14 additions & 16 deletions pandas/io/parsers/python_parser.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

from collections import (
abc,
defaultdict,
Expand All @@ -9,10 +11,6 @@
from typing import (
DefaultDict,
Iterator,
List,
Optional,
Set,
Tuple,
cast,
)
import warnings
Expand Down Expand Up @@ -44,14 +42,14 @@


class PythonParser(ParserBase):
def __init__(self, f: Union[FilePathOrBuffer, List], **kwds):
def __init__(self, f: Union[FilePathOrBuffer, list], **kwds):
"""
Workhorse function for processing nested list into DataFrame
"""
ParserBase.__init__(self, kwds)

self.data: Optional[Iterator[str]] = None
self.buf: List = []
self.data: Iterator[str] | None = None
self.buf: list = []
self.pos = 0
self.line_pos = 0

Expand Down Expand Up @@ -110,7 +108,7 @@ def __init__(self, f: Union[FilePathOrBuffer, List], **kwds):

# Get columns in two steps: infer from data, then
# infer column indices from self.usecols if it is specified.
self._col_indices: Optional[List[int]] = None
self._col_indices: list[int] | None = None
try:
(
self.columns,
Expand Down Expand Up @@ -143,7 +141,7 @@ def __init__(self, f: Union[FilePathOrBuffer, List], **kwds):
self.columns = self.columns[0]

# get popped off for index
self.orig_names: List[Union[int, str, Tuple]] = list(self.columns)
self.orig_names: list[int | str | tuple] = list(self.columns)

# needs to be cleaned/refactored
# multiple date column thing turning into a real spaghetti factory
Expand All @@ -160,7 +158,7 @@ def __init__(self, f: Union[FilePathOrBuffer, List], **kwds):
self._col_indices = list(range(len(self.columns)))

self._validate_parse_dates_presence(self.columns)
no_thousands_columns: Optional[Set[int]] = None
no_thousands_columns: set[int] | None = None
if self.parse_dates:
no_thousands_columns = self._set_noconvert_dtype_columns(
self._col_indices, self.columns
Expand Down Expand Up @@ -360,7 +358,7 @@ def _infer_columns(self):
names = self.names
num_original_columns = 0
clear_buffer = True
unnamed_cols: Set[Optional[Union[int, str]]] = set()
unnamed_cols: set[str | int | None] = set()

if self.header is not None:
header = self.header
Expand All @@ -374,7 +372,7 @@ def _infer_columns(self):
have_mi_columns = False
header = [header]

columns: List[List[Optional[Union[int, str]]]] = []
columns: list[list[int | str | None]] = []
for level, hr in enumerate(header):
try:
line = self._buffered_line()
Expand Down Expand Up @@ -403,7 +401,7 @@ def _infer_columns(self):

line = self.names[:]

this_columns: List[Optional[Union[int, str]]] = []
this_columns: list[int | str | None] = []
this_unnamed_cols = []

for i, c in enumerate(line):
Expand Down Expand Up @@ -531,8 +529,8 @@ def _infer_columns(self):

def _handle_usecols(
self,
columns: List[List[Union[Optional[str], Optional[int]]]],
usecols_key: List[Union[Optional[str], Optional[int]]],
columns: list[list[str | int | None]],
usecols_key: list[str | int | None],
num_original_columns: int,
):
"""
Expand Down Expand Up @@ -1209,7 +1207,7 @@ def _make_reader(self, f):
self.infer_nrows,
)

def _remove_empty_lines(self, lines) -> List:
def _remove_empty_lines(self, lines) -> list:
"""
Returns the list of lines without the empty ones. With fixed-width
fields, empty lines become arrays of empty strings.
Expand Down