Skip to content

Type read_fwf #44678

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 22 additions & 8 deletions pandas/io/parsers/python_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@
import re
import sys
from typing import (
IO,
DefaultDict,
Hashable,
Iterator,
Literal,
Mapping,
Sequence,
cast,
Expand Down Expand Up @@ -1135,9 +1137,17 @@ class FixedWidthReader(abc.Iterator):
A reader of fixed-width lines.
"""

def __init__(self, f, colspecs, delimiter, comment, skiprows=None, infer_nrows=100):
def __init__(
self,
f: IO[str],
colspecs: list[tuple[int, int]] | Literal["infer"],
delimiter: str | None,
comment: str | None,
skiprows: set[int] | None = None,
infer_nrows: int = 100,
) -> None:
self.f = f
self.buffer = None
self.buffer: Iterator | None = None
self.delimiter = "\r\n" + delimiter if delimiter else "\n\r\t "
self.comment = comment
if colspecs == "infer":
Expand Down Expand Up @@ -1165,7 +1175,7 @@ def __init__(self, f, colspecs, delimiter, comment, skiprows=None, infer_nrows=1
"2 element tuple or list of integers"
)

def get_rows(self, infer_nrows, skiprows=None):
def get_rows(self, infer_nrows: int, skiprows: set[int] | None = None) -> list[str]:
"""
Read rows from self.f, skipping as specified.

Expand Down Expand Up @@ -1203,7 +1213,9 @@ def get_rows(self, infer_nrows, skiprows=None):
self.buffer = iter(buffer_rows)
return detect_rows

def detect_colspecs(self, infer_nrows=100, skiprows=None):
def detect_colspecs(
self, infer_nrows: int = 100, skiprows: set[int] | None = None
) -> list[tuple[int, int]]:
# Regex escape the delimiters
delimiters = "".join([fr"\{x}" for x in self.delimiter])
pattern = re.compile(f"([^{delimiters}]+)")
Expand All @@ -1223,7 +1235,7 @@ def detect_colspecs(self, infer_nrows=100, skiprows=None):
edge_pairs = list(zip(edges[::2], edges[1::2]))
return edge_pairs

def __next__(self):
def __next__(self) -> list[str]:
if self.buffer is not None:
try:
line = next(self.buffer)
Expand All @@ -1242,13 +1254,15 @@ class FixedWidthFieldParser(PythonParser):
See PythonParser for details.
"""

def __init__(self, f, **kwds):
def __init__(
self, f: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], **kwds
) -> None:
# Support iterators, convert to a list.
self.colspecs = kwds.pop("colspecs")
self.infer_nrows = kwds.pop("infer_nrows")
PythonParser.__init__(self, f, **kwds)

def _make_reader(self, f):
def _make_reader(self, f: IO[str]) -> None:
self.data = FixedWidthReader(
f,
self.colspecs,
Expand All @@ -1258,7 +1272,7 @@ def _make_reader(self, f):
self.infer_nrows,
)

def _remove_empty_lines(self, lines) -> list:
def _remove_empty_lines(self, lines: list[list[Scalar]]) -> list[list[Scalar]]:
"""
Returns the list of lines without the empty ones. With fixed-width
fields, empty lines become arrays of empty strings.
Expand Down
13 changes: 8 additions & 5 deletions pandas/io/parsers/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -760,11 +760,11 @@ def read_table(

def read_fwf(
filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str],
colspecs="infer",
widths=None,
infer_nrows=100,
colspecs: list[tuple[int, int]] | str | None = "infer",
widths: list[int] | None = None,
infer_nrows: int = 100,
**kwds,
):
) -> DataFrame | TextFileReader:
r"""
Read a table of fixed-width formatted lines into DataFrame.

Expand Down Expand Up @@ -799,7 +799,7 @@ def read_fwf(

Returns
-------
DataFrame or TextParser
DataFrame or TextFileReader
A comma-separated values (csv) file is returned as two-dimensional
data structure with labeled axes.

Expand All @@ -825,6 +825,9 @@ def read_fwf(
colspecs.append((col, col + w))
col += w

# for mypy
assert colspecs is not None

# GH#40830
# Ensure length of `colspecs` matches length of `names`
names = kwds.get("names")
Expand Down