Skip to content

TYP: Fix mypy ignores in parsers #39342

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Feb 4, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 23 additions & 17 deletions pandas/io/parsers/base_parser.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,21 @@
from __future__ import annotations

from collections import defaultdict
import csv
import datetime
import itertools
from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Union, cast
from typing import (
Any,
DefaultDict,
Dict,
Iterable,
List,
Optional,
Sequence,
Set,
Union,
cast,
)
import warnings

import numpy as np
Expand Down Expand Up @@ -335,9 +348,7 @@ def _maybe_dedup_names(self, names):
# would be nice!
if self.mangle_dupe_cols:
names = list(names) # so we can index
# pandas\io\parsers.py:1559: error: Need type annotation for
# 'counts' [var-annotated]
counts = defaultdict(int) # type: ignore[var-annotated]
counts: DefaultDict[int | str, int] = defaultdict(int)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if an mi, the dict key is a tuple?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right, thx. I looked them through again and fixed 2 more I think

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if an mi, the dict key is a tuple?

is_potential_mi = _is_potential_multi_index(names, self.index_col)

for i, col in enumerate(names):
Expand Down Expand Up @@ -382,9 +393,8 @@ def _make_index(self, data, alldata, columns, indexnamerow=False):
# add names for the index
if indexnamerow:
coffset = len(indexnamerow) - len(columns)
# pandas\io\parsers.py:1604: error: Item "None" of "Optional[Any]"
# has no attribute "set_names" [union-attr]
index = index.set_names(indexnamerow[:coffset]) # type: ignore[union-attr]
assert index is not None
index = index.set_names(indexnamerow[:coffset])

# maybe create a mi on the columns
columns = self._maybe_make_multi_index_columns(columns, self.col_names)
Expand Down Expand Up @@ -458,9 +468,8 @@ def _agg_index(self, index, try_parse_dates=True) -> Index:
col_na_fvalues = set()

if isinstance(self.na_values, dict):
# pandas\io\parsers.py:1678: error: Value of type
# "Optional[Any]" is not indexable [index]
col_name = self.index_names[i] # type: ignore[index]
assert self.index_names is not None
col_name = self.index_names[i]
if col_name is not None:
col_na_values, col_na_fvalues = _get_na_values(
col_name, self.na_values, self.na_fvalues, self.keep_default_na
Expand Down Expand Up @@ -549,7 +558,7 @@ def _convert_to_ndarrays(
return result

def _set_noconvert_dtype_columns(
self, col_indices: List[int], names: List[Union[int, str]]
self, col_indices: List[int], names: List[int | str]
) -> Set[int]:
"""
Set the columns that should not undergo dtype conversions.
Expand All @@ -568,7 +577,7 @@ def _set_noconvert_dtype_columns(
-------
A set of integers containing the positions of the columns not to convert.
"""
usecols: Optional[Union[List[int], List[str]]]
usecols: List[int] | List[str] | None
noconvert_columns = set()
if self.usecols_dtype == "integer":
# A set of integers will be converted to a list in
Expand Down Expand Up @@ -850,7 +859,7 @@ def _clean_index_names(self, columns, index_col, unnamed_cols):
return [None] * len(index_col), columns, index_col

cp_cols = list(columns)
index_names = []
index_names: List[int | str | None] = []

# don't mutate
index_col = list(index_col)
Expand All @@ -871,10 +880,7 @@ def _clean_index_names(self, columns, index_col, unnamed_cols):
# Only clean index names that were placeholders.
for i, name in enumerate(index_names):
if isinstance(name, str) and name in unnamed_cols:
# pandas\io\parsers.py:3445: error: No overload variant of
# "__setitem__" of "list" matches argument types "int", "None"
# [call-overload]
index_names[i] = None # type: ignore[call-overload]
index_names[i] = None

return index_names, columns, index_col

Expand Down
8 changes: 2 additions & 6 deletions pandas/io/parsers/c_parser_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,12 +131,8 @@ def __init__(self, src: FilePathOrBuffer, **kwds):
self.index_names = index_names

if self._reader.header is None and not passed_names:
# pandas\io\parsers.py:1997: error: Argument 1 to "len" has
# incompatible type "Optional[Any]"; expected "Sized"
# [arg-type]
self.index_names = [None] * len(
self.index_names # type: ignore[arg-type]
)
assert self.index_names is not None
self.index_names = [None] * len(self.index_names)

self._implicit_index = self._reader.leading_cols > 0

Expand Down
72 changes: 17 additions & 55 deletions pandas/io/parsers/python_parser.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from __future__ import annotations

from collections import abc, defaultdict
import csv
from io import StringIO
import re
import sys
from typing import Iterator, List, Optional, Set, cast
from typing import DefaultDict, Iterator, List, Optional, Set, cast

import numpy as np

Expand Down Expand Up @@ -118,7 +120,7 @@ def __init__(self, f: Union[FilePathOrBuffer, List], **kwds):
self.columns = self.columns[0]

# get popped off for index
self.orig_names = list(self.columns)
self.orig_names: List[int | str] = list(self.columns)

# needs to be cleaned/refactored
# multiple date column thing turning into a real spaghetti factory
Expand Down Expand Up @@ -236,10 +238,7 @@ def read(self, rows=None):
# done with first read, next time raise StopIteration
self._first_chunk = False

# pandas\io\parsers.py:2480: error: Argument 1 to "list" has
# incompatible type "Optional[Any]"; expected "Iterable[Any]"
# [arg-type]
columns = list(self.orig_names) # type: ignore[arg-type]
columns = list(self.orig_names)
if not len(content): # pragma: no cover
# DataFrame with the right metadata, even though it's length 0
names = self._maybe_dedup_names(self.orig_names)
Expand Down Expand Up @@ -292,15 +291,8 @@ def _clean_mapping(mapping):
"""converts col numbers to names"""
clean = {}
for col, v in mapping.items():
# pandas\io\parsers.py:2537: error: Unsupported right operand
# type for in ("Optional[Any]") [operator]
if (
isinstance(col, int)
and col not in self.orig_names # type: ignore[operator]
):
# pandas\io\parsers.py:2538: error: Value of type
# "Optional[Any]" is not indexable [index]
col = self.orig_names[col] # type: ignore[index]
if isinstance(col, int) and col not in self.orig_names:
col = self.orig_names[col]
clean[col] = v
return clean

Expand All @@ -320,15 +312,8 @@ def _clean_mapping(mapping):
na_value = self.na_values[col]
na_fvalue = self.na_fvalues[col]

# pandas\io\parsers.py:2558: error: Unsupported right operand
# type for in ("Optional[Any]") [operator]
if (
isinstance(col, int)
and col not in self.orig_names # type: ignore[operator]
):
# pandas\io\parsers.py:2559: error: Value of type
# "Optional[Any]" is not indexable [index]
col = self.orig_names[col] # type: ignore[index]
if isinstance(col, int) and col not in self.orig_names:
col = self.orig_names[col]

clean_na_values[col] = na_value
clean_na_fvalues[col] = na_fvalue
Expand All @@ -349,10 +334,7 @@ def _infer_columns(self):
names = self.names
num_original_columns = 0
clear_buffer = True
# pandas\io\parsers.py:2580: error: Need type annotation for
# 'unnamed_cols' (hint: "unnamed_cols: Set[<type>] = ...")
# [var-annotated]
unnamed_cols = set() # type: ignore[var-annotated]
unnamed_cols: Set[int | str | None] = set()

if self.header is not None:
header = self.header
Expand All @@ -366,9 +348,7 @@ def _infer_columns(self):
have_mi_columns = False
header = [header]

# pandas\io\parsers.py:2594: error: Need type annotation for
# 'columns' (hint: "columns: List[<type>] = ...") [var-annotated]
columns = [] # type: ignore[var-annotated]
columns: List[List[int | str | None]] = []
for level, hr in enumerate(header):
try:
line = self._buffered_line()
Expand Down Expand Up @@ -397,7 +377,7 @@ def _infer_columns(self):

line = self.names[:]

this_columns = []
this_columns: List[int | str | None] = []
this_unnamed_cols = []

for i, c in enumerate(line):
Expand All @@ -413,9 +393,7 @@ def _infer_columns(self):
this_columns.append(c)

if not have_mi_columns and self.mangle_dupe_cols:
# pandas\io\parsers.py:2639: error: Need type annotation
# for 'counts' [var-annotated]
counts = defaultdict(int) # type: ignore[var-annotated]
counts: DefaultDict = defaultdict(int)

for i, col in enumerate(this_columns):
cur_count = counts[col]
Expand All @@ -439,16 +417,10 @@ def _infer_columns(self):

if lc != unnamed_count and lc - ic > unnamed_count:
clear_buffer = False
# pandas\io\parsers.py:2663: error: List item 0 has
# incompatible type "None"; expected "str"
# [list-item]
this_columns = [None] * lc # type: ignore[list-item]
this_columns = [None] * lc
self.buf = [self.buf[-1]]

# pandas\io\parsers.py:2666: error: Argument 1 to "append" of
# "list" has incompatible type "List[str]"; expected
# "List[None]" [arg-type]
columns.append(this_columns) # type: ignore[arg-type]
columns.append(this_columns)
unnamed_cols.update({this_columns[i] for i in this_unnamed_cols})

if len(columns) == 1:
Expand Down Expand Up @@ -490,19 +462,9 @@ def _infer_columns(self):

if not names:
if self.prefix:
# pandas\io\parsers.py:2711: error: List comprehension has
# incompatible type List[str]; expected List[None] [misc]
columns = [
[
f"{self.prefix}{i}" # type: ignore[misc]
for i in range(ncols)
]
]
columns = [[f"{self.prefix}{i}" for i in range(ncols)]]
else:
# pandas\io\parsers.py:2713: error: Argument 1 to "list"
# has incompatible type "range"; expected "Iterable[None]"
# [arg-type]
columns = [list(range(ncols))] # type: ignore[arg-type]
columns = [list(range(ncols))]
columns = self._handle_usecols(columns, columns[0])
else:
if self.usecols is None or len(names) >= num_original_columns:
Expand Down
32 changes: 10 additions & 22 deletions pandas/io/parsers/readers.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
"""
Module contains tools for processing files into DataFrames or other objects
"""
from __future__ import annotations

from collections import abc
import csv
import sys
from textwrap import fill
from typing import Any, Dict, Optional, Set, Type
from typing import Any, Dict, List, Optional, Set, Type
import warnings

import numpy as np
Expand Down Expand Up @@ -722,6 +723,7 @@ def _get_options_with_defaults(self, engine):
kwds = self.orig_options

options = {}
default: Optional[object]

for argname, default in parser_defaults.items():
value = kwds.get(argname, default)
Expand Down Expand Up @@ -751,10 +753,7 @@ def _get_options_with_defaults(self, engine):
options[argname] = value

if engine == "python-fwf":
# pandas\io\parsers.py:907: error: Incompatible types in assignment
# (expression has type "object", variable has type "Union[int, str,
# None]") [assignment]
for argname, default in _fwf_defaults.items(): # type: ignore[assignment]
for argname, default in _fwf_defaults.items():
options[argname] = kwds.get(argname, default)

return options
Expand Down Expand Up @@ -1048,15 +1047,13 @@ def TextParser(*args, **kwds):


def _clean_na_values(na_values, keep_default_na=True):

na_fvalues: Set | Dict
if na_values is None:
if keep_default_na:
na_values = STR_NA_VALUES
else:
na_values = set()
# pandas\io\parsers.py:3387: error: Need type annotation for
# 'na_fvalues' (hint: "na_fvalues: Set[<type>] = ...") [var-annotated]
na_fvalues = set() # type: ignore[var-annotated]
na_fvalues = set()
elif isinstance(na_values, dict):
old_na_values = na_values.copy()
na_values = {} # Prevent aliasing.
Expand All @@ -1073,12 +1070,7 @@ def _clean_na_values(na_values, keep_default_na=True):
v = set(v) | STR_NA_VALUES

na_values[k] = v
# pandas\io\parsers.py:3404: error: Incompatible types in assignment
# (expression has type "Dict[Any, Any]", variable has type "Set[Any]")
# [assignment]
na_fvalues = { # type: ignore[assignment]
k: _floatify_na_values(v) for k, v in na_values.items()
}
na_fvalues = {k: _floatify_na_values(v) for k, v in na_values.items()}
else:
if not is_list_like(na_values):
na_values = [na_values]
Expand Down Expand Up @@ -1106,7 +1098,7 @@ def _floatify_na_values(na_values):

def _stringify_na_values(na_values):
""" return a stringified and numeric for these values """
result = []
result: List[int | str | float] = []
for x in na_values:
result.append(str(x))
result.append(x)
Expand All @@ -1119,15 +1111,11 @@ def _stringify_na_values(na_values):
result.append(f"{v}.0")
result.append(str(v))

# pandas\io\parsers.py:3522: error: Argument 1 to "append" of
# "list" has incompatible type "float"; expected "str" [arg-type]
result.append(v) # type: ignore[arg-type]
result.append(v)
except (TypeError, ValueError, OverflowError):
pass
try:
# pandas\io\parsers.py:3526: error: Argument 1 to "append" of
# "list" has incompatible type "int"; expected "str" [arg-type]
result.append(int(x)) # type: ignore[arg-type]
result.append(int(x))
except (TypeError, ValueError, OverflowError):
pass
return set(result)
Expand Down