diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index 27d0944572024..b493c4f12fb31 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -9,9 +9,11 @@ import re import sys from typing import ( + IO, DefaultDict, Hashable, Iterator, + Literal, Mapping, Sequence, cast, @@ -1135,9 +1137,17 @@ class FixedWidthReader(abc.Iterator): A reader of fixed-width lines. """ - def __init__(self, f, colspecs, delimiter, comment, skiprows=None, infer_nrows=100): + def __init__( + self, + f: IO[str], + colspecs: list[tuple[int, int]] | Literal["infer"], + delimiter: str | None, + comment: str | None, + skiprows: set[int] | None = None, + infer_nrows: int = 100, + ) -> None: self.f = f - self.buffer = None + self.buffer: Iterator | None = None self.delimiter = "\r\n" + delimiter if delimiter else "\n\r\t " self.comment = comment if colspecs == "infer": @@ -1165,7 +1175,7 @@ def __init__(self, f, colspecs, delimiter, comment, skiprows=None, infer_nrows=1 "2 element tuple or list of integers" ) - def get_rows(self, infer_nrows, skiprows=None): + def get_rows(self, infer_nrows: int, skiprows: set[int] | None = None) -> list[str]: """ Read rows from self.f, skipping as specified. @@ -1203,7 +1213,9 @@ def get_rows(self, infer_nrows, skiprows=None): self.buffer = iter(buffer_rows) return detect_rows - def detect_colspecs(self, infer_nrows=100, skiprows=None): + def detect_colspecs( + self, infer_nrows: int = 100, skiprows: set[int] | None = None + ) -> list[tuple[int, int]]: # Regex escape the delimiters delimiters = "".join([fr"\{x}" for x in self.delimiter]) pattern = re.compile(f"([^{delimiters}]+)") @@ -1223,7 +1235,7 @@ def detect_colspecs(self, infer_nrows=100, skiprows=None): edge_pairs = list(zip(edges[::2], edges[1::2])) return edge_pairs - def __next__(self): + def __next__(self) -> list[str]: if self.buffer is not None: try: line = next(self.buffer) @@ -1242,13 +1254,15 @@ class FixedWidthFieldParser(PythonParser): See PythonParser for details. """ - def __init__(self, f, **kwds): + def __init__( + self, f: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], **kwds + ) -> None: # Support iterators, convert to a list. self.colspecs = kwds.pop("colspecs") self.infer_nrows = kwds.pop("infer_nrows") PythonParser.__init__(self, f, **kwds) - def _make_reader(self, f): + def _make_reader(self, f: IO[str]) -> None: self.data = FixedWidthReader( f, self.colspecs, @@ -1258,7 +1272,7 @@ def _make_reader(self, f): self.infer_nrows, ) - def _remove_empty_lines(self, lines) -> list: + def _remove_empty_lines(self, lines: list[list[Scalar]]) -> list[list[Scalar]]: """ Returns the list of lines without the empty ones. With fixed-width fields, empty lines become arrays of empty strings. diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 63ab10e1e5362..6a2a7b791c048 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -760,11 +760,11 @@ def read_table( def read_fwf( filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], - colspecs="infer", - widths=None, - infer_nrows=100, + colspecs: list[tuple[int, int]] | str | None = "infer", + widths: list[int] | None = None, + infer_nrows: int = 100, **kwds, -): +) -> DataFrame | TextFileReader: r""" Read a table of fixed-width formatted lines into DataFrame. @@ -799,7 +799,7 @@ def read_fwf( Returns ------- - DataFrame or TextParser + DataFrame or TextFileReader A comma-separated values (csv) file is returned as two-dimensional data structure with labeled axes. @@ -825,6 +825,9 @@ def read_fwf( colspecs.append((col, col + w)) col += w + # for mypy + assert colspecs is not None + # GH#40830 # Ensure length of `colspecs` matches length of `names` names = kwds.get("names")