Skip to content

Commit 9138b1d

Browse files
authored
Typ parts of c parser (#44677)
1 parent 9098d14 commit 9138b1d

File tree

3 files changed

+61
-14
lines changed

3 files changed

+61
-14
lines changed

pandas/io/parsers/base_parser.py

+30-4
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,10 @@
1212
DefaultDict,
1313
Hashable,
1414
Iterable,
15+
List,
1516
Mapping,
1617
Sequence,
18+
Tuple,
1719
cast,
1820
final,
1921
overload,
@@ -441,10 +443,15 @@ def _maybe_dedup_names(self, names: Sequence[Hashable]) -> Sequence[Hashable]:
441443
return names
442444

443445
@final
444-
def _maybe_make_multi_index_columns(self, columns, col_names=None):
446+
def _maybe_make_multi_index_columns(
447+
self,
448+
columns: Sequence[Hashable],
449+
col_names: Sequence[Hashable] | None = None,
450+
) -> Sequence[Hashable] | MultiIndex:
445451
# possibly create a column mi here
446452
if _is_potential_multi_index(columns):
447-
columns = MultiIndex.from_tuples(columns, names=col_names)
453+
list_columns = cast(List[Tuple], columns)
454+
return MultiIndex.from_tuples(list_columns, names=col_names)
448455
return columns
449456

450457
@final
@@ -923,7 +930,25 @@ def _check_data_length(
923930
stacklevel=find_stack_level(),
924931
)
925932

926-
def _evaluate_usecols(self, usecols, names):
933+
@overload
934+
def _evaluate_usecols(
935+
self,
936+
usecols: set[int] | Callable[[Hashable], object],
937+
names: Sequence[Hashable],
938+
) -> set[int]:
939+
...
940+
941+
@overload
942+
def _evaluate_usecols(
943+
self, usecols: set[str], names: Sequence[Hashable]
944+
) -> set[str]:
945+
...
946+
947+
def _evaluate_usecols(
948+
self,
949+
usecols: Callable[[Hashable], object] | set[str] | set[int],
950+
names: Sequence[Hashable],
951+
) -> set[str] | set[int]:
927952
"""
928953
Check whether or not the 'usecols' parameter
929954
is a callable. If so, enumerates the 'names'
@@ -1289,7 +1314,8 @@ def _get_na_values(col, na_values, na_fvalues, keep_default_na):
12891314

12901315

12911316
def _is_potential_multi_index(
1292-
columns, index_col: bool | Sequence[int] | None = None
1317+
columns: Sequence[Hashable] | MultiIndex,
1318+
index_col: bool | Sequence[int] | None = None,
12931319
) -> bool:
12941320
"""
12951321
Check whether or not the `columns` parameter

pandas/io/parsers/c_parser_wrapper.py

+28-8
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,19 @@
11
from __future__ import annotations
22

3+
from typing import (
4+
Hashable,
5+
Mapping,
6+
Sequence,
7+
)
38
import warnings
49

510
import numpy as np
611

712
import pandas._libs.parsers as parsers
813
from pandas._typing import (
914
ArrayLike,
15+
DtypeArg,
16+
DtypeObj,
1017
FilePath,
1118
ReadCsvBuffer,
1219
)
@@ -20,6 +27,10 @@
2027
from pandas.core.dtypes.concat import union_categoricals
2128
from pandas.core.dtypes.dtypes import ExtensionDtype
2229

30+
from pandas import (
31+
Index,
32+
MultiIndex,
33+
)
2334
from pandas.core.indexes.api import ensure_index_from_sequences
2435

2536
from pandas.io.parsers.base_parser import (
@@ -193,7 +204,7 @@ def close(self) -> None:
193204
except ValueError:
194205
pass
195206

196-
def _set_noconvert_columns(self):
207+
def _set_noconvert_columns(self) -> None:
197208
"""
198209
Set the columns that should not undergo dtype conversions.
199210
@@ -214,7 +225,14 @@ def _set_noconvert_columns(self):
214225
for col in noconvert_columns:
215226
self._reader.set_noconvert(col)
216227

217-
def read(self, nrows=None):
228+
def read(
229+
self,
230+
nrows: int | None = None,
231+
) -> tuple[
232+
Index | MultiIndex | None,
233+
Sequence[Hashable] | MultiIndex,
234+
Mapping[Hashable, ArrayLike],
235+
]:
218236
try:
219237
if self.low_memory:
220238
chunks = self._reader.read_low_memory(nrows)
@@ -306,11 +324,11 @@ def read(self, nrows=None):
306324
index, names = self._make_index(date_data, alldata, names)
307325

308326
# maybe create a mi on the columns
309-
names = self._maybe_make_multi_index_columns(names, self.col_names)
327+
conv_names = self._maybe_make_multi_index_columns(names, self.col_names)
310328

311-
return index, names, date_data
329+
return index, conv_names, date_data
312330

313-
def _filter_usecols(self, names):
331+
def _filter_usecols(self, names: Sequence[Hashable]) -> Sequence[Hashable]:
314332
# hackish
315333
usecols = self._evaluate_usecols(self.usecols, names)
316334
if usecols is not None and len(names) != len(usecols):
@@ -395,13 +413,15 @@ def _concatenate_chunks(chunks: list[dict[int, ArrayLike]]) -> dict:
395413
return result
396414

397415

398-
def ensure_dtype_objs(dtype):
416+
def ensure_dtype_objs(
417+
dtype: DtypeArg | dict[Hashable, DtypeArg] | None
418+
) -> DtypeObj | dict[Hashable, DtypeObj] | None:
399419
"""
400420
Ensure we have either None, a dtype object, or a dictionary mapping to
401421
dtype objects.
402422
"""
403423
if isinstance(dtype, dict):
404-
dtype = {k: pandas_dtype(dtype[k]) for k in dtype}
424+
return {k: pandas_dtype(dtype[k]) for k in dtype}
405425
elif dtype is not None:
406-
dtype = pandas_dtype(dtype)
426+
return pandas_dtype(dtype)
407427
return dtype

pandas/io/parsers/python_parser.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -270,8 +270,8 @@ def read(self, rows: int | None = None):
270270
self.index_names,
271271
self.dtype,
272272
)
273-
columns = self._maybe_make_multi_index_columns(columns, self.col_names)
274-
return index, columns, col_dict
273+
conv_columns = self._maybe_make_multi_index_columns(columns, self.col_names)
274+
return index, conv_columns, col_dict
275275

276276
# handle new style for names in index
277277
count_empty_content_vals = count_empty_vals(content[0])
@@ -560,6 +560,7 @@ def _handle_usecols(
560560
561561
usecols_key is used if there are string usecols.
562562
"""
563+
col_indices: set[int] | list[int]
563564
if self.usecols is not None:
564565
if callable(self.usecols):
565566
col_indices = self._evaluate_usecols(self.usecols, usecols_key)

0 commit comments

Comments
 (0)