Skip to content

Commit d90a3a6

Browse files
committed
TYP: io broken off from pandas-dev#41059
1 parent 1a20d13 commit d90a3a6

File tree

3 files changed

+66
-46
lines changed

3 files changed

+66
-46
lines changed

pandas/io/parsers/base_parser.py

+40-10
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
Any,
99
Callable,
1010
DefaultDict,
11+
Hashable,
1112
Iterable,
1213
Sequence,
1314
cast,
@@ -114,6 +115,8 @@
114115
class ParserBase:
115116
_implicit_index: bool = False
116117
_first_chunk: bool
118+
index_col: int | Sequence[int] | None
119+
index_names: list[Hashable] | None
117120

118121
def __init__(self, kwds):
119122

@@ -123,7 +126,7 @@ def __init__(self, kwds):
123126

124127
self.index_col = kwds.get("index_col", None)
125128
self.unnamed_cols: set = set()
126-
self.index_names: list | None = None
129+
self.index_names: list[Hashable] | None = None
127130
self.col_names = None
128131

129132
self.parse_dates = _validate_parse_dates_arg(kwds.pop("parse_dates", False))
@@ -171,8 +174,14 @@ def __init__(self, kwds):
171174
if self.index_col is not None:
172175
is_sequence = isinstance(self.index_col, (list, tuple, np.ndarray))
173176
if not (
174-
is_sequence
175-
and all(map(is_integer, self.index_col))
177+
# error: Argument 2 to "map" has incompatible type
178+
# "Union[int, Sequence[int]]"; expected "Iterable[int]"
179+
(
180+
is_sequence
181+
and all(
182+
map(is_integer, self.index_col) # type: ignore[arg-type]
183+
)
184+
)
176185
or is_integer(self.index_col)
177186
):
178187
raise ValueError(
@@ -287,8 +296,12 @@ def _should_parse_dates(self, i: int) -> bool:
287296
name = self.index_names[i]
288297
else:
289298
name = None
290-
j = i if self.index_col is None else self.index_col[i]
291-
299+
# error: Value of type "Union[int, Sequence[int]]" is not indexable
300+
j = (
301+
i
302+
if self.index_col is None
303+
else self.index_col[i] # type: ignore[index]
304+
)
292305
if is_scalar(self.parse_dates):
293306
return (j == self.parse_dates) or (
294307
name is not None and name == self.parse_dates
@@ -317,7 +330,9 @@ def _extract_multi_indexer_columns(
317330
ic = []
318331

319332
if not isinstance(ic, (list, tuple, np.ndarray)):
320-
ic = [ic]
333+
# error: List item 0 has incompatible type
334+
# "Union[int, Sequence[int]]"; expected "int"
335+
ic = [ic] # type: ignore[list-item]
321336
sic = set(ic)
322337

323338
# clean the index_names
@@ -333,7 +348,9 @@ def extract(r):
333348
return tuple(r[i] for i in range(field_count) if i not in sic)
334349

335350
columns = list(zip(*(extract(r) for r in header)))
336-
names = ic + columns
351+
# error: No overload variant of "__add__" of "tuple" matches argument
352+
# type "List[Any]"
353+
names = ic + columns # type: ignore[operator]
337354

338355
# If we find unnamed columns all in a single
339356
# level, then our header was too long.
@@ -368,7 +385,12 @@ def _maybe_dedup_names(self, names):
368385
if self.mangle_dupe_cols:
369386
names = list(names) # so we can index
370387
counts: DefaultDict[int | str | tuple, int] = defaultdict(int)
371-
is_potential_mi = _is_potential_multi_index(names, self.index_col)
388+
# error: Argument 2 to "_is_potential_multi_index" has incompatible
389+
# type "Union[int, Sequence[int], None]"; expected
390+
# "Union[bool, Sequence[int], None]"
391+
is_potential_mi = _is_potential_multi_index(
392+
names, self.index_col # type: ignore[arg-type]
393+
)
372394

373395
for i, col in enumerate(names):
374396
cur_count = counts[col]
@@ -431,7 +453,11 @@ def ix(col):
431453

432454
to_remove = []
433455
index = []
434-
for idx in self.index_col:
456+
# error: Item "int" of "Union[int, Sequence[int], None]" has no
457+
# attribute "__iter__" (not iterable)
458+
# error: Item "None" of "Union[int, Sequence[int], None]" has no
459+
# attribute "__iter__" (not iterable
460+
for idx in self.index_col: # type: ignore[union-attr]
435461
i = ix(idx)
436462
to_remove.append(i)
437463
index.append(data[i])
@@ -460,7 +486,11 @@ def _get_name(icol):
460486

461487
to_remove = []
462488
index = []
463-
for idx in self.index_col:
489+
# error: Item "int" of "Union[int, Sequence[int], None]" has no
490+
# attribute "__iter__" (not iterable)
491+
# error: Item "None" of "Union[int, Sequence[int], None]" has no
492+
# attribute "__iter__" (not iterable
493+
for idx in self.index_col: # type: ignore[union-attr]
464494
name = _get_name(idx)
465495
to_remove.append(name)
466496
index.append(data[name])

pandas/io/parsers/c_parser_wrapper.py

+6-13
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,7 @@ def __init__(self, src: FilePathOrBuffer, **kwds):
3939
self.low_memory = kwds.pop("low_memory", False)
4040

4141
# #2442
42-
# error: Cannot determine type of 'index_col'
43-
kwds["allow_leading_cols"] = (
44-
self.index_col is not False # type: ignore[has-type]
45-
)
42+
kwds["allow_leading_cols"] = self.index_col is not False
4643

4744
# GH20529, validate usecol arg before TextReader
4845
kwds["usecols"] = self.usecols
@@ -71,7 +68,6 @@ def __init__(self, src: FilePathOrBuffer, **kwds):
7168
if len(self._reader.header) > 1:
7269
# we have a multi index in the columns
7370
# error: Cannot determine type of 'names'
74-
# error: Cannot determine type of 'index_names'
7571
# error: Cannot determine type of 'col_names'
7672
(
7773
self.names, # type: ignore[has-type]
@@ -80,7 +76,7 @@ def __init__(self, src: FilePathOrBuffer, **kwds):
8076
passed_names,
8177
) = self._extract_multi_indexer_columns(
8278
self._reader.header,
83-
self.index_names, # type: ignore[has-type]
79+
self.index_names,
8480
self.col_names, # type: ignore[has-type]
8581
passed_names,
8682
)
@@ -149,10 +145,7 @@ def __init__(self, src: FilePathOrBuffer, **kwds):
149145
self.orig_names = self.names # type: ignore[has-type]
150146

151147
if not self._has_complex_date_col:
152-
# error: Cannot determine type of 'index_col'
153-
if self._reader.leading_cols == 0 and is_index_col(
154-
self.index_col # type: ignore[has-type]
155-
):
148+
if self._reader.leading_cols == 0 and is_index_col(self.index_col):
156149

157150
self._name_processed = True
158151
(
@@ -163,8 +156,7 @@ def __init__(self, src: FilePathOrBuffer, **kwds):
163156
) = self._clean_index_names(
164157
# error: Cannot determine type of 'names'
165158
self.names, # type: ignore[has-type]
166-
# error: Cannot determine type of 'index_col'
167-
self.index_col, # type: ignore[has-type]
159+
self.index_col,
168160
self.unnamed_cols,
169161
)
170162

@@ -258,7 +250,8 @@ def read(self, nrows=None):
258250
if self.index_col is None:
259251
values = data.pop(i)
260252
else:
261-
values = data.pop(self.index_col[i])
253+
# error: Value of type "Union[int, Sequence[int]]" is not indexable
254+
values = data.pop(self.index_col[i]) # type: ignore[index]
262255

263256
values = self._maybe_parse_dates(values, i, try_parse_dates=True)
264257
arrays.append(values)

pandas/io/parsers/python_parser.py

+20-23
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,6 @@ def __init__(self, f: Union[FilePathOrBuffer, list], **kwds):
123123
# The original set is stored in self.original_columns.
124124
if len(self.columns) > 1:
125125
# we are processing a multi index column
126-
# error: Cannot determine type of 'index_names'
127126
# error: Cannot determine type of 'col_names'
128127
(
129128
self.columns,
@@ -132,7 +131,7 @@ def __init__(self, f: Union[FilePathOrBuffer, list], **kwds):
132131
_,
133132
) = self._extract_multi_indexer_columns(
134133
self.columns,
135-
self.index_names, # type: ignore[has-type]
134+
self.index_names,
136135
self.col_names, # type: ignore[has-type]
137136
)
138137
# Update list of original names to include all indices.
@@ -262,10 +261,9 @@ def read(self, rows=None):
262261
if not len(content): # pragma: no cover
263262
# DataFrame with the right metadata, even though it's length 0
264263
names = self._maybe_dedup_names(self.orig_names)
265-
# error: Cannot determine type of 'index_col'
266264
index, columns, col_dict = self._get_empty_meta(
267265
names,
268-
self.index_col, # type: ignore[has-type]
266+
self.index_col,
269267
self.index_names,
270268
self.dtype,
271269
)
@@ -294,8 +292,9 @@ def _exclude_implicit_index(self, alldata):
294292

295293
offset = 0
296294
if self._implicit_index:
297-
# error: Cannot determine type of 'index_col'
298-
offset = len(self.index_col) # type: ignore[has-type]
295+
# error: Argument 1 to "len" has incompatible type
296+
# "Union[int, Sequence[int], None]"; expected "Sized"
297+
offset = len(self.index_col) # type: ignore[arg-type]
299298

300299
len_alldata = len(alldata)
301300
return {
@@ -444,9 +443,12 @@ def _infer_columns(self):
444443
# line for the rest of the parsing code
445444
if hr == header[-1]:
446445
lc = len(this_columns)
447-
# error: Cannot determine type of 'index_col'
448-
sic = self.index_col # type: ignore[has-type]
449-
ic = len(sic) if sic is not None else 0
446+
sic = self.index_col
447+
# error: Argument 1 to "len" has incompatible type
448+
# "Union[int, Sequence[int]]"; expected "Sized"
449+
ic = (
450+
len(sic) if sic is not None else 0 # type: ignore[arg-type]
451+
)
450452
unnamed_count = len(this_unnamed_cols)
451453

452454
# if wrong number of blanks or no index, not our format
@@ -881,8 +883,7 @@ def _get_index_name(self, columns):
881883
if line is not None:
882884
# leave it 0, #2442
883885
# Case 1
884-
# error: Cannot determine type of 'index_col'
885-
index_col = self.index_col # type: ignore[has-type]
886+
index_col = self.index_col
886887
if index_col is not False:
887888
implicit_first_cols = len(line) - self.num_original_columns
888889

@@ -921,20 +922,16 @@ def _rows_to_cols(self, content):
921922
col_len = self.num_original_columns
922923

923924
if self._implicit_index:
924-
col_len += len(self.index_col)
925+
# error: Argument 1 to "len" has incompatible type
926+
# "Union[int, Sequence[int]]"; expected "Sized"
927+
col_len += len(self.index_col) # type: ignore[arg-type]
925928

926929
max_len = max(len(row) for row in content)
927930

928931
# Check that there are no rows with too many
929932
# elements in their row (rows with too few
930933
# elements are padded with NaN).
931-
# error: Non-overlapping identity check (left operand type: "List[int]",
932-
# right operand type: "Literal[False]")
933-
if (
934-
max_len > col_len
935-
and self.index_col is not False # type: ignore[comparison-overlap]
936-
and self.usecols is None
937-
):
934+
if max_len > col_len and self.index_col is not False and self.usecols is None:
938935

939936
footers = self.skipfooter if self.skipfooter else 0
940937
bad_lines = []
@@ -983,13 +980,13 @@ def _rows_to_cols(self, content):
983980
col_indices = self._col_indices
984981

985982
if self._implicit_index:
983+
# error: Argument 1 to "len" has incompatible type
984+
# "Union[int, Sequence[int]]"; expected "Sized"
985+
lic = len(self.index_col) # type: ignore[arg-type]
986986
zipped_content = [
987987
a
988988
for i, a in enumerate(zipped_content)
989-
if (
990-
i < len(self.index_col)
991-
or i - len(self.index_col) in col_indices
992-
)
989+
if (i < lic or i - lic in col_indices)
993990
]
994991
else:
995992
zipped_content = [

0 commit comments

Comments
 (0)