Skip to content

Commit 3ca6330

Browse files
authored
BUG: skip_blank_lines ignored by read_fwf (#37803)
Closes #37758
1 parent 9b3cc20 commit 3ca6330

File tree

3 files changed

+59
-0
lines changed

3 files changed

+59
-0
lines changed

doc/source/whatsnew/v1.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -623,6 +623,7 @@ I/O
623623
- Bug in :meth:`DataFrame.to_hdf` was not dropping missing rows with ``dropna=True`` (:issue:`35719`)
624624
- Bug in :func:`read_html` was raising a ``TypeError`` when supplying a ``pathlib.Path`` argument to the ``io`` parameter (:issue:`37705`)
625625
- :meth:`to_excel` and :meth:`to_markdown` support writing to fsspec URLs such as S3 and Google Cloud Storage (:issue:`33987`)
626+
- Bug in :meth:`read_fw` was not skipping blank lines (even with ``skip_blank_lines=True``) (:issue:`37758`)
626627

627628
Plotting
628629
^^^^^^^^

pandas/io/parsers.py

+13
Original file line numberDiff line numberDiff line change
@@ -3750,6 +3750,19 @@ def _make_reader(self, f):
37503750
self.infer_nrows,
37513751
)
37523752

3753+
def _remove_empty_lines(self, lines) -> List:
3754+
"""
3755+
Returns the list of lines without the empty ones. With fixed-width
3756+
fields, empty lines become arrays of empty strings.
3757+
3758+
See PythonParser._remove_empty_lines.
3759+
"""
3760+
return [
3761+
line
3762+
for line in lines
3763+
if any(not isinstance(e, str) or e.strip() for e in line)
3764+
]
3765+
37533766

37543767
def _refine_defaults_read(
37553768
dialect: Union[str, csv.Dialect],

pandas/tests/io/parser/test_read_fwf.py

+45
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,51 @@ def test_fwf_comment(comment):
340340
tm.assert_almost_equal(result, expected)
341341

342342

343+
def test_fwf_skip_blank_lines():
344+
data = """
345+
346+
A B C D
347+
348+
201158 360.242940 149.910199 11950.7
349+
201159 444.953632 166.985655 11788.4
350+
351+
352+
201162 502.953953 173.237159 12468.3
353+
354+
"""
355+
result = read_fwf(StringIO(data), skip_blank_lines=True)
356+
expected = DataFrame(
357+
[
358+
[201158, 360.242940, 149.910199, 11950.7],
359+
[201159, 444.953632, 166.985655, 11788.4],
360+
[201162, 502.953953, 173.237159, 12468.3],
361+
],
362+
columns=["A", "B", "C", "D"],
363+
)
364+
tm.assert_frame_equal(result, expected)
365+
366+
data = """\
367+
A B C D
368+
201158 360.242940 149.910199 11950.7
369+
201159 444.953632 166.985655 11788.4
370+
371+
372+
201162 502.953953 173.237159 12468.3
373+
"""
374+
result = read_fwf(StringIO(data), skip_blank_lines=False)
375+
expected = DataFrame(
376+
[
377+
[201158, 360.242940, 149.910199, 11950.7],
378+
[201159, 444.953632, 166.985655, 11788.4],
379+
[np.nan, np.nan, np.nan, np.nan],
380+
[np.nan, np.nan, np.nan, np.nan],
381+
[201162, 502.953953, 173.237159, 12468.3],
382+
],
383+
columns=["A", "B", "C", "D"],
384+
)
385+
tm.assert_frame_equal(result, expected)
386+
387+
343388
@pytest.mark.parametrize("thousands", [",", "#", "~"])
344389
def test_fwf_thousands(thousands):
345390
data = """\

0 commit comments

Comments
 (0)