Skip to content

Commit ca4f204

Browse files
authored
TYP: Fix mypy ignores in parsers (#39342)
1 parent 5e8fabb commit ca4f204

File tree

4 files changed

+47
-100
lines changed

4 files changed

+47
-100
lines changed

pandas/io/parsers/base_parser.py

+21-16
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,19 @@
22
import csv
33
import datetime
44
import itertools
5-
from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Union, cast
5+
from typing import (
6+
Any,
7+
DefaultDict,
8+
Dict,
9+
Iterable,
10+
List,
11+
Optional,
12+
Sequence,
13+
Set,
14+
Tuple,
15+
Union,
16+
cast,
17+
)
618
import warnings
719

820
import numpy as np
@@ -335,9 +347,7 @@ def _maybe_dedup_names(self, names):
335347
# would be nice!
336348
if self.mangle_dupe_cols:
337349
names = list(names) # so we can index
338-
# pandas\io\parsers.py:1559: error: Need type annotation for
339-
# 'counts' [var-annotated]
340-
counts = defaultdict(int) # type: ignore[var-annotated]
350+
counts: DefaultDict[Union[int, str, Tuple], int] = defaultdict(int)
341351
is_potential_mi = _is_potential_multi_index(names, self.index_col)
342352

343353
for i, col in enumerate(names):
@@ -382,9 +392,8 @@ def _make_index(self, data, alldata, columns, indexnamerow=False):
382392
# add names for the index
383393
if indexnamerow:
384394
coffset = len(indexnamerow) - len(columns)
385-
# pandas\io\parsers.py:1604: error: Item "None" of "Optional[Any]"
386-
# has no attribute "set_names" [union-attr]
387-
index = index.set_names(indexnamerow[:coffset]) # type: ignore[union-attr]
395+
assert index is not None
396+
index = index.set_names(indexnamerow[:coffset])
388397

389398
# maybe create a mi on the columns
390399
columns = self._maybe_make_multi_index_columns(columns, self.col_names)
@@ -458,9 +467,8 @@ def _agg_index(self, index, try_parse_dates=True) -> Index:
458467
col_na_fvalues = set()
459468

460469
if isinstance(self.na_values, dict):
461-
# pandas\io\parsers.py:1678: error: Value of type
462-
# "Optional[Any]" is not indexable [index]
463-
col_name = self.index_names[i] # type: ignore[index]
470+
assert self.index_names is not None
471+
col_name = self.index_names[i]
464472
if col_name is not None:
465473
col_na_values, col_na_fvalues = _get_na_values(
466474
col_name, self.na_values, self.na_fvalues, self.keep_default_na
@@ -549,7 +557,7 @@ def _convert_to_ndarrays(
549557
return result
550558

551559
def _set_noconvert_dtype_columns(
552-
self, col_indices: List[int], names: List[Union[int, str]]
560+
self, col_indices: List[int], names: List[Union[int, str, Tuple]]
553561
) -> Set[int]:
554562
"""
555563
Set the columns that should not undergo dtype conversions.
@@ -850,7 +858,7 @@ def _clean_index_names(self, columns, index_col, unnamed_cols):
850858
return [None] * len(index_col), columns, index_col
851859

852860
cp_cols = list(columns)
853-
index_names = []
861+
index_names: List[Optional[Union[int, str]]] = []
854862

855863
# don't mutate
856864
index_col = list(index_col)
@@ -871,10 +879,7 @@ def _clean_index_names(self, columns, index_col, unnamed_cols):
871879
# Only clean index names that were placeholders.
872880
for i, name in enumerate(index_names):
873881
if isinstance(name, str) and name in unnamed_cols:
874-
# pandas\io\parsers.py:3445: error: No overload variant of
875-
# "__setitem__" of "list" matches argument types "int", "None"
876-
# [call-overload]
877-
index_names[i] = None # type: ignore[call-overload]
882+
index_names[i] = None
878883

879884
return index_names, columns, index_col
880885

pandas/io/parsers/c_parser_wrapper.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -131,12 +131,8 @@ def __init__(self, src: FilePathOrBuffer, **kwds):
131131
self.index_names = index_names
132132

133133
if self._reader.header is None and not passed_names:
134-
# pandas\io\parsers.py:1997: error: Argument 1 to "len" has
135-
# incompatible type "Optional[Any]"; expected "Sized"
136-
# [arg-type]
137-
self.index_names = [None] * len(
138-
self.index_names # type: ignore[arg-type]
139-
)
134+
assert self.index_names is not None
135+
self.index_names = [None] * len(self.index_names)
140136

141137
self._implicit_index = self._reader.leading_cols > 0
142138

pandas/io/parsers/python_parser.py

+15-55
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from io import StringIO
44
import re
55
import sys
6-
from typing import Iterator, List, Optional, Set, cast
6+
from typing import DefaultDict, Iterator, List, Optional, Set, Tuple, cast
77

88
import numpy as np
99

@@ -118,7 +118,7 @@ def __init__(self, f: Union[FilePathOrBuffer, List], **kwds):
118118
self.columns = self.columns[0]
119119

120120
# get popped off for index
121-
self.orig_names = list(self.columns)
121+
self.orig_names: List[Union[int, str, Tuple]] = list(self.columns)
122122

123123
# needs to be cleaned/refactored
124124
# multiple date column thing turning into a real spaghetti factory
@@ -236,10 +236,7 @@ def read(self, rows=None):
236236
# done with first read, next time raise StopIteration
237237
self._first_chunk = False
238238

239-
# pandas\io\parsers.py:2480: error: Argument 1 to "list" has
240-
# incompatible type "Optional[Any]"; expected "Iterable[Any]"
241-
# [arg-type]
242-
columns = list(self.orig_names) # type: ignore[arg-type]
239+
columns = list(self.orig_names)
243240
if not len(content): # pragma: no cover
244241
# DataFrame with the right metadata, even though it's length 0
245242
names = self._maybe_dedup_names(self.orig_names)
@@ -292,15 +289,8 @@ def _clean_mapping(mapping):
292289
"""converts col numbers to names"""
293290
clean = {}
294291
for col, v in mapping.items():
295-
# pandas\io\parsers.py:2537: error: Unsupported right operand
296-
# type for in ("Optional[Any]") [operator]
297-
if (
298-
isinstance(col, int)
299-
and col not in self.orig_names # type: ignore[operator]
300-
):
301-
# pandas\io\parsers.py:2538: error: Value of type
302-
# "Optional[Any]" is not indexable [index]
303-
col = self.orig_names[col] # type: ignore[index]
292+
if isinstance(col, int) and col not in self.orig_names:
293+
col = self.orig_names[col]
304294
clean[col] = v
305295
return clean
306296

@@ -320,15 +310,8 @@ def _clean_mapping(mapping):
320310
na_value = self.na_values[col]
321311
na_fvalue = self.na_fvalues[col]
322312

323-
# pandas\io\parsers.py:2558: error: Unsupported right operand
324-
# type for in ("Optional[Any]") [operator]
325-
if (
326-
isinstance(col, int)
327-
and col not in self.orig_names # type: ignore[operator]
328-
):
329-
# pandas\io\parsers.py:2559: error: Value of type
330-
# "Optional[Any]" is not indexable [index]
331-
col = self.orig_names[col] # type: ignore[index]
313+
if isinstance(col, int) and col not in self.orig_names:
314+
col = self.orig_names[col]
332315

333316
clean_na_values[col] = na_value
334317
clean_na_fvalues[col] = na_fvalue
@@ -349,10 +332,7 @@ def _infer_columns(self):
349332
names = self.names
350333
num_original_columns = 0
351334
clear_buffer = True
352-
# pandas\io\parsers.py:2580: error: Need type annotation for
353-
# 'unnamed_cols' (hint: "unnamed_cols: Set[<type>] = ...")
354-
# [var-annotated]
355-
unnamed_cols = set() # type: ignore[var-annotated]
335+
unnamed_cols: Set[Optional[Union[int, str]]] = set()
356336

357337
if self.header is not None:
358338
header = self.header
@@ -366,9 +346,7 @@ def _infer_columns(self):
366346
have_mi_columns = False
367347
header = [header]
368348

369-
# pandas\io\parsers.py:2594: error: Need type annotation for
370-
# 'columns' (hint: "columns: List[<type>] = ...") [var-annotated]
371-
columns = [] # type: ignore[var-annotated]
349+
columns: List[List[Optional[Union[int, str]]]] = []
372350
for level, hr in enumerate(header):
373351
try:
374352
line = self._buffered_line()
@@ -397,7 +375,7 @@ def _infer_columns(self):
397375

398376
line = self.names[:]
399377

400-
this_columns = []
378+
this_columns: List[Optional[Union[int, str]]] = []
401379
this_unnamed_cols = []
402380

403381
for i, c in enumerate(line):
@@ -413,9 +391,7 @@ def _infer_columns(self):
413391
this_columns.append(c)
414392

415393
if not have_mi_columns and self.mangle_dupe_cols:
416-
# pandas\io\parsers.py:2639: error: Need type annotation
417-
# for 'counts' [var-annotated]
418-
counts = defaultdict(int) # type: ignore[var-annotated]
394+
counts: DefaultDict = defaultdict(int)
419395

420396
for i, col in enumerate(this_columns):
421397
cur_count = counts[col]
@@ -439,16 +415,10 @@ def _infer_columns(self):
439415

440416
if lc != unnamed_count and lc - ic > unnamed_count:
441417
clear_buffer = False
442-
# pandas\io\parsers.py:2663: error: List item 0 has
443-
# incompatible type "None"; expected "str"
444-
# [list-item]
445-
this_columns = [None] * lc # type: ignore[list-item]
418+
this_columns = [None] * lc
446419
self.buf = [self.buf[-1]]
447420

448-
# pandas\io\parsers.py:2666: error: Argument 1 to "append" of
449-
# "list" has incompatible type "List[str]"; expected
450-
# "List[None]" [arg-type]
451-
columns.append(this_columns) # type: ignore[arg-type]
421+
columns.append(this_columns)
452422
unnamed_cols.update({this_columns[i] for i in this_unnamed_cols})
453423

454424
if len(columns) == 1:
@@ -490,19 +460,9 @@ def _infer_columns(self):
490460

491461
if not names:
492462
if self.prefix:
493-
# pandas\io\parsers.py:2711: error: List comprehension has
494-
# incompatible type List[str]; expected List[None] [misc]
495-
columns = [
496-
[
497-
f"{self.prefix}{i}" # type: ignore[misc]
498-
for i in range(ncols)
499-
]
500-
]
463+
columns = [[f"{self.prefix}{i}" for i in range(ncols)]]
501464
else:
502-
# pandas\io\parsers.py:2713: error: Argument 1 to "list"
503-
# has incompatible type "range"; expected "Iterable[None]"
504-
# [arg-type]
505-
columns = [list(range(ncols))] # type: ignore[arg-type]
465+
columns = [list(range(ncols))]
506466
columns = self._handle_usecols(columns, columns[0])
507467
else:
508468
if self.usecols is None or len(names) >= num_original_columns:

pandas/io/parsers/readers.py

+9-23
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
"""
22
Module contains tools for processing files into DataFrames or other objects
33
"""
4-
54
from collections import abc
65
import csv
76
import sys
87
from textwrap import fill
9-
from typing import Any, Dict, Optional, Set, Type
8+
from typing import Any, Dict, List, Optional, Set, Type
109
import warnings
1110

1211
import numpy as np
@@ -727,6 +726,7 @@ def _get_options_with_defaults(self, engine):
727726
kwds = self.orig_options
728727

729728
options = {}
729+
default: Optional[object]
730730

731731
for argname, default in parser_defaults.items():
732732
value = kwds.get(argname, default)
@@ -756,10 +756,7 @@ def _get_options_with_defaults(self, engine):
756756
options[argname] = value
757757

758758
if engine == "python-fwf":
759-
# pandas\io\parsers.py:907: error: Incompatible types in assignment
760-
# (expression has type "object", variable has type "Union[int, str,
761-
# None]") [assignment]
762-
for argname, default in _fwf_defaults.items(): # type: ignore[assignment]
759+
for argname, default in _fwf_defaults.items():
763760
options[argname] = kwds.get(argname, default)
764761

765762
return options
@@ -1053,15 +1050,13 @@ def TextParser(*args, **kwds):
10531050

10541051

10551052
def _clean_na_values(na_values, keep_default_na=True):
1056-
1053+
na_fvalues: Union[Set, Dict]
10571054
if na_values is None:
10581055
if keep_default_na:
10591056
na_values = STR_NA_VALUES
10601057
else:
10611058
na_values = set()
1062-
# pandas\io\parsers.py:3387: error: Need type annotation for
1063-
# 'na_fvalues' (hint: "na_fvalues: Set[<type>] = ...") [var-annotated]
1064-
na_fvalues = set() # type: ignore[var-annotated]
1059+
na_fvalues = set()
10651060
elif isinstance(na_values, dict):
10661061
old_na_values = na_values.copy()
10671062
na_values = {} # Prevent aliasing.
@@ -1078,12 +1073,7 @@ def _clean_na_values(na_values, keep_default_na=True):
10781073
v = set(v) | STR_NA_VALUES
10791074

10801075
na_values[k] = v
1081-
# pandas\io\parsers.py:3404: error: Incompatible types in assignment
1082-
# (expression has type "Dict[Any, Any]", variable has type "Set[Any]")
1083-
# [assignment]
1084-
na_fvalues = { # type: ignore[assignment]
1085-
k: _floatify_na_values(v) for k, v in na_values.items()
1086-
}
1076+
na_fvalues = {k: _floatify_na_values(v) for k, v in na_values.items()}
10871077
else:
10881078
if not is_list_like(na_values):
10891079
na_values = [na_values]
@@ -1111,7 +1101,7 @@ def _floatify_na_values(na_values):
11111101

11121102
def _stringify_na_values(na_values):
11131103
""" return a stringified and numeric for these values """
1114-
result = []
1104+
result: List[Union[int, str, float]] = []
11151105
for x in na_values:
11161106
result.append(str(x))
11171107
result.append(x)
@@ -1124,15 +1114,11 @@ def _stringify_na_values(na_values):
11241114
result.append(f"{v}.0")
11251115
result.append(str(v))
11261116

1127-
# pandas\io\parsers.py:3522: error: Argument 1 to "append" of
1128-
# "list" has incompatible type "float"; expected "str" [arg-type]
1129-
result.append(v) # type: ignore[arg-type]
1117+
result.append(v)
11301118
except (TypeError, ValueError, OverflowError):
11311119
pass
11321120
try:
1133-
# pandas\io\parsers.py:3526: error: Argument 1 to "append" of
1134-
# "list" has incompatible type "int"; expected "str" [arg-type]
1135-
result.append(int(x)) # type: ignore[arg-type]
1121+
result.append(int(x))
11361122
except (TypeError, ValueError, OverflowError):
11371123
pass
11381124
return set(result)

0 commit comments

Comments
 (0)