4
4
abc ,
5
5
defaultdict ,
6
6
)
7
- from collections .abc import (
8
- Hashable ,
9
- Iterator ,
10
- Mapping ,
11
- Sequence ,
12
- )
13
7
import csv
14
8
from io import StringIO
15
9
import re
50
44
)
51
45
52
46
if TYPE_CHECKING :
47
+ from collections .abc import (
48
+ Hashable ,
49
+ Iterator ,
50
+ Mapping ,
51
+ Sequence ,
52
+ )
53
+
53
54
from pandas ._typing import (
54
55
ArrayLike ,
55
56
ReadCsvBuffer ,
56
57
Scalar ,
58
+ T ,
57
59
)
58
60
59
61
from pandas import (
60
62
Index ,
61
63
MultiIndex ,
64
+ Series ,
62
65
)
63
66
64
67
# BOM character (byte order mark)
@@ -77,7 +80,7 @@ def __init__(self, f: ReadCsvBuffer[str] | list, **kwds) -> None:
77
80
"""
78
81
super ().__init__ (kwds )
79
82
80
- self .data : Iterator [str ] | None = None
83
+ self .data : Iterator [list [ str ]] | list [ list [ Scalar ]] = []
81
84
self .buf : list = []
82
85
self .pos = 0
83
86
self .line_pos = 0
@@ -116,10 +119,11 @@ def __init__(self, f: ReadCsvBuffer[str] | list, **kwds) -> None:
116
119
117
120
# Set self.data to something that can read lines.
118
121
if isinstance (f , list ):
119
- # read_excel: f is a list
120
- self .data = cast ( Iterator [ str ], f )
122
+ # read_excel: f is a nested list, can contain non-str
123
+ self .data = f
121
124
else :
122
125
assert hasattr (f , "readline" )
126
+ # yields list of str
123
127
self .data = self ._make_reader (f )
124
128
125
129
# Get columns in two steps: infer from data, then
@@ -179,7 +183,7 @@ def num(self) -> re.Pattern:
179
183
)
180
184
return re .compile (regex )
181
185
182
- def _make_reader (self , f : IO [str ] | ReadCsvBuffer [str ]):
186
+ def _make_reader (self , f : IO [str ] | ReadCsvBuffer [str ]) -> Iterator [ list [ str ]] :
183
187
sep = self .delimiter
184
188
185
189
if sep is None or len (sep ) == 1 :
@@ -246,7 +250,9 @@ def _read():
246
250
def read (
247
251
self , rows : int | None = None
248
252
) -> tuple [
249
- Index | None , Sequence [Hashable ] | MultiIndex , Mapping [Hashable , ArrayLike ]
253
+ Index | None ,
254
+ Sequence [Hashable ] | MultiIndex ,
255
+ Mapping [Hashable , ArrayLike | Series ],
250
256
]:
251
257
try :
252
258
content = self ._get_lines (rows )
@@ -326,7 +332,9 @@ def _exclude_implicit_index(
326
332
def get_chunk (
327
333
self , size : int | None = None
328
334
) -> tuple [
329
- Index | None , Sequence [Hashable ] | MultiIndex , Mapping [Hashable , ArrayLike ]
335
+ Index | None ,
336
+ Sequence [Hashable ] | MultiIndex ,
337
+ Mapping [Hashable , ArrayLike | Series ],
330
338
]:
331
339
if size is None :
332
340
# error: "PythonParser" has no attribute "chunksize"
@@ -689,7 +697,7 @@ def _check_for_bom(self, first_row: list[Scalar]) -> list[Scalar]:
689
697
new_row_list : list [Scalar ] = [new_row ]
690
698
return new_row_list + first_row [1 :]
691
699
692
- def _is_line_empty (self , line : list [Scalar ]) -> bool :
700
+ def _is_line_empty (self , line : Sequence [Scalar ]) -> bool :
693
701
"""
694
702
Check if a line is empty or not.
695
703
@@ -730,8 +738,6 @@ def _next_line(self) -> list[Scalar]:
730
738
else :
731
739
while self .skipfunc (self .pos ):
732
740
self .pos += 1
733
- # assert for mypy, data is Iterator[str] or None, would error in next
734
- assert self .data is not None
735
741
next (self .data )
736
742
737
743
while True :
@@ -800,12 +806,10 @@ def _next_iter_line(self, row_num: int) -> list[Scalar] | None:
800
806
The row number of the line being parsed.
801
807
"""
802
808
try :
803
- # assert for mypy, data is Iterator[str] or None, would error in next
804
- assert self .data is not None
809
+ assert not isinstance (self .data , list )
805
810
line = next (self .data )
806
- # for mypy
807
- assert isinstance (line , list )
808
- return line
811
+ # lie about list[str] vs list[Scalar] to minimize ignores
812
+ return line # type: ignore[return-value]
809
813
except csv .Error as e :
810
814
if self .on_bad_lines in (
811
815
self .BadLineHandleMethod .ERROR ,
@@ -855,7 +859,7 @@ def _check_comments(self, lines: list[list[Scalar]]) -> list[list[Scalar]]:
855
859
ret .append (rl )
856
860
return ret
857
861
858
- def _remove_empty_lines (self , lines : list [list [Scalar ]]) -> list [list [Scalar ]]:
862
+ def _remove_empty_lines (self , lines : list [list [T ]]) -> list [list [T ]]:
859
863
"""
860
864
Iterate through the lines and remove any that are
861
865
either empty or contain only one whitespace value
@@ -1121,9 +1125,6 @@ def _get_lines(self, rows: int | None = None) -> list[list[Scalar]]:
1121
1125
row_ct = 0
1122
1126
offset = self .pos if self .pos is not None else 0
1123
1127
while row_ct < rows :
1124
- # assert for mypy, data is Iterator[str] or None, would
1125
- # error in next
1126
- assert self .data is not None
1127
1128
new_row = next (self .data )
1128
1129
if not self .skipfunc (offset + row_index ):
1129
1130
row_ct += 1
@@ -1338,7 +1339,7 @@ def _make_reader(self, f: IO[str] | ReadCsvBuffer[str]) -> FixedWidthReader:
1338
1339
self .infer_nrows ,
1339
1340
)
1340
1341
1341
- def _remove_empty_lines (self , lines : list [list [Scalar ]]) -> list [list [Scalar ]]:
1342
+ def _remove_empty_lines (self , lines : list [list [T ]]) -> list [list [T ]]:
1342
1343
"""
1343
1344
Returns the list of lines without the empty ones. With fixed-width
1344
1345
fields, empty lines become arrays of empty strings.
0 commit comments