Skip to content

Commit 42eaf6e

Browse files
jbrockmendelTLouf
authored andcommitted
TYP: __future__ annotations in parsers (pandas-dev#41620)
1 parent 6cec46c commit 42eaf6e

File tree

2 files changed

+29
-35
lines changed

2 files changed

+29
-35
lines changed

pandas/io/parsers/base_parser.py

+15-19
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from __future__ import annotations
2+
13
from collections import defaultdict
24
import csv
35
import datetime
@@ -6,14 +8,8 @@
68
Any,
79
Callable,
810
DefaultDict,
9-
Dict,
1011
Iterable,
11-
List,
12-
Optional,
1312
Sequence,
14-
Set,
15-
Tuple,
16-
Union,
1713
cast,
1814
)
1915
import warnings
@@ -122,12 +118,12 @@ class ParserBase:
122118
def __init__(self, kwds):
123119

124120
self.names = kwds.get("names")
125-
self.orig_names: Optional[List] = None
121+
self.orig_names: list | None = None
126122
self.prefix = kwds.pop("prefix", None)
127123

128124
self.index_col = kwds.get("index_col", None)
129-
self.unnamed_cols: Set = set()
130-
self.index_names: Optional[List] = None
125+
self.unnamed_cols: set = set()
126+
self.index_names: list | None = None
131127
self.col_names = None
132128

133129
self.parse_dates = _validate_parse_dates_arg(kwds.pop("parse_dates", False))
@@ -205,9 +201,9 @@ def __init__(self, kwds):
205201

206202
self.usecols, self.usecols_dtype = self._validate_usecols_arg(kwds["usecols"])
207203

208-
self.handles: Optional[IOHandles] = None
204+
self.handles: IOHandles | None = None
209205

210-
def _open_handles(self, src: FilePathOrBuffer, kwds: Dict[str, Any]) -> None:
206+
def _open_handles(self, src: FilePathOrBuffer, kwds: dict[str, Any]) -> None:
211207
"""
212208
Let the readers open IOHanldes after they are done with their potential raises.
213209
"""
@@ -221,7 +217,7 @@ def _open_handles(self, src: FilePathOrBuffer, kwds: Dict[str, Any]) -> None:
221217
errors=kwds.get("encoding_errors", "strict"),
222218
)
223219

224-
def _validate_parse_dates_presence(self, columns: List[str]) -> None:
220+
def _validate_parse_dates_presence(self, columns: list[str]) -> None:
225221
"""
226222
Check if parse_dates are in columns.
227223
@@ -371,7 +367,7 @@ def _maybe_dedup_names(self, names):
371367
# would be nice!
372368
if self.mangle_dupe_cols:
373369
names = list(names) # so we can index
374-
counts: DefaultDict[Union[int, str, Tuple], int] = defaultdict(int)
370+
counts: DefaultDict[int | str | tuple, int] = defaultdict(int)
375371
is_potential_mi = _is_potential_multi_index(names, self.index_col)
376372

377373
for i, col in enumerate(names):
@@ -596,8 +592,8 @@ def _convert_to_ndarrays(
596592

597593
@final
598594
def _set_noconvert_dtype_columns(
599-
self, col_indices: List[int], names: List[Union[int, str, Tuple]]
600-
) -> Set[int]:
595+
self, col_indices: list[int], names: list[int | str | tuple]
596+
) -> set[int]:
601597
"""
602598
Set the columns that should not undergo dtype conversions.
603599
@@ -615,7 +611,7 @@ def _set_noconvert_dtype_columns(
615611
-------
616612
A set of integers containing the positions of the columns not to convert.
617613
"""
618-
usecols: Optional[Union[List[int], List[str]]]
614+
usecols: list[int] | list[str] | None
619615
noconvert_columns = set()
620616
if self.usecols_dtype == "integer":
621617
# A set of integers will be converted to a list in
@@ -900,7 +896,7 @@ def _clean_index_names(self, columns, index_col, unnamed_cols):
900896
return [None] * len(index_col), columns, index_col
901897

902898
cp_cols = list(columns)
903-
index_names: List[Optional[Union[int, str]]] = []
899+
index_names: list[str | int | None] = []
904900

905901
# don't mutate
906902
index_col = list(index_col)
@@ -926,7 +922,7 @@ def _clean_index_names(self, columns, index_col, unnamed_cols):
926922
return index_names, columns, index_col
927923

928924
def _get_empty_meta(
929-
self, columns, index_col, index_names, dtype: Optional[DtypeArg] = None
925+
self, columns, index_col, index_names, dtype: DtypeArg | None = None
930926
):
931927
columns = list(columns)
932928

@@ -1150,7 +1146,7 @@ def _get_na_values(col, na_values, na_fvalues, keep_default_na):
11501146

11511147

11521148
def _is_potential_multi_index(
1153-
columns, index_col: Optional[Union[bool, Sequence[int]]] = None
1149+
columns, index_col: bool | Sequence[int] | None = None
11541150
) -> bool:
11551151
"""
11561152
Check whether or not the `columns` parameter

pandas/io/parsers/python_parser.py

+14-16
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from __future__ import annotations
2+
13
from collections import (
24
abc,
35
defaultdict,
@@ -9,10 +11,6 @@
911
from typing import (
1012
DefaultDict,
1113
Iterator,
12-
List,
13-
Optional,
14-
Set,
15-
Tuple,
1614
cast,
1715
)
1816
import warnings
@@ -44,14 +42,14 @@
4442

4543

4644
class PythonParser(ParserBase):
47-
def __init__(self, f: Union[FilePathOrBuffer, List], **kwds):
45+
def __init__(self, f: Union[FilePathOrBuffer, list], **kwds):
4846
"""
4947
Workhorse function for processing nested list into DataFrame
5048
"""
5149
ParserBase.__init__(self, kwds)
5250

53-
self.data: Optional[Iterator[str]] = None
54-
self.buf: List = []
51+
self.data: Iterator[str] | None = None
52+
self.buf: list = []
5553
self.pos = 0
5654
self.line_pos = 0
5755

@@ -110,7 +108,7 @@ def __init__(self, f: Union[FilePathOrBuffer, List], **kwds):
110108

111109
# Get columns in two steps: infer from data, then
112110
# infer column indices from self.usecols if it is specified.
113-
self._col_indices: Optional[List[int]] = None
111+
self._col_indices: list[int] | None = None
114112
try:
115113
(
116114
self.columns,
@@ -143,7 +141,7 @@ def __init__(self, f: Union[FilePathOrBuffer, List], **kwds):
143141
self.columns = self.columns[0]
144142

145143
# get popped off for index
146-
self.orig_names: List[Union[int, str, Tuple]] = list(self.columns)
144+
self.orig_names: list[int | str | tuple] = list(self.columns)
147145

148146
# needs to be cleaned/refactored
149147
# multiple date column thing turning into a real spaghetti factory
@@ -160,7 +158,7 @@ def __init__(self, f: Union[FilePathOrBuffer, List], **kwds):
160158
self._col_indices = list(range(len(self.columns)))
161159

162160
self._validate_parse_dates_presence(self.columns)
163-
no_thousands_columns: Optional[Set[int]] = None
161+
no_thousands_columns: set[int] | None = None
164162
if self.parse_dates:
165163
no_thousands_columns = self._set_noconvert_dtype_columns(
166164
self._col_indices, self.columns
@@ -360,7 +358,7 @@ def _infer_columns(self):
360358
names = self.names
361359
num_original_columns = 0
362360
clear_buffer = True
363-
unnamed_cols: Set[Optional[Union[int, str]]] = set()
361+
unnamed_cols: set[str | int | None] = set()
364362

365363
if self.header is not None:
366364
header = self.header
@@ -374,7 +372,7 @@ def _infer_columns(self):
374372
have_mi_columns = False
375373
header = [header]
376374

377-
columns: List[List[Optional[Union[int, str]]]] = []
375+
columns: list[list[int | str | None]] = []
378376
for level, hr in enumerate(header):
379377
try:
380378
line = self._buffered_line()
@@ -403,7 +401,7 @@ def _infer_columns(self):
403401

404402
line = self.names[:]
405403

406-
this_columns: List[Optional[Union[int, str]]] = []
404+
this_columns: list[int | str | None] = []
407405
this_unnamed_cols = []
408406

409407
for i, c in enumerate(line):
@@ -531,8 +529,8 @@ def _infer_columns(self):
531529

532530
def _handle_usecols(
533531
self,
534-
columns: List[List[Union[Optional[str], Optional[int]]]],
535-
usecols_key: List[Union[Optional[str], Optional[int]]],
532+
columns: list[list[str | int | None]],
533+
usecols_key: list[str | int | None],
536534
num_original_columns: int,
537535
):
538536
"""
@@ -1209,7 +1207,7 @@ def _make_reader(self, f):
12091207
self.infer_nrows,
12101208
)
12111209

1212-
def _remove_empty_lines(self, lines) -> List:
1210+
def _remove_empty_lines(self, lines) -> list:
12131211
"""
12141212
Returns the list of lines without the empty ones. With fixed-width
12151213
fields, empty lines become arrays of empty strings.

0 commit comments

Comments
 (0)