BUG: skip_blank_lines ignored by read_fwf (#37803)

ma3da · web-flow · commit 3ca6330bf06f · 2020-11-14T12:13:00.000-08:00
Closes #37758
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -623,6 +623,7 @@ I/O
 - Bug in :meth:`DataFrame.to_hdf` was not dropping missing rows with ``dropna=True`` (:issue:`35719`)
 - Bug in :func:`read_html` was raising a ``TypeError`` when supplying a ``pathlib.Path`` argument to the ``io`` parameter (:issue:`37705`)
 - :meth:`to_excel` and :meth:`to_markdown` support writing to fsspec URLs such as S3 and Google Cloud Storage (:issue:`33987`)
+- Bug in :meth:`read_fw` was not skipping blank lines (even with ``skip_blank_lines=True``) (:issue:`37758`)
 
 Plotting
 ^^^^^^^^
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -3750,6 +3750,19 @@ def _make_reader(self, f):
             self.infer_nrows,
         )
 
+    def _remove_empty_lines(self, lines) -> List:
+        """
+        Returns the list of lines without the empty ones. With fixed-width
+        fields, empty lines become arrays of empty strings.
+
+        See PythonParser._remove_empty_lines.
+        """
+        return [
+            line
+            for line in lines
+            if any(not isinstance(e, str) or e.strip() for e in line)
+        ]
+
 
 def _refine_defaults_read(
     dialect: Union[str, csv.Dialect],
diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py
@@ -340,6 +340,51 @@ def test_fwf_comment(comment):
     tm.assert_almost_equal(result, expected)
 
 
+def test_fwf_skip_blank_lines():
+    data = """
+
+A         B            C            D
+
+201158    360.242940   149.910199   11950.7
+201159    444.953632   166.985655   11788.4
+
+
+201162    502.953953   173.237159   12468.3
+
+"""
+    result = read_fwf(StringIO(data), skip_blank_lines=True)
+    expected = DataFrame(
+        [
+            [201158, 360.242940, 149.910199, 11950.7],
+            [201159, 444.953632, 166.985655, 11788.4],
+            [201162, 502.953953, 173.237159, 12468.3],
+        ],
+        columns=["A", "B", "C", "D"],
+    )
+    tm.assert_frame_equal(result, expected)
+
+    data = """\
+A         B            C            D
+201158    360.242940   149.910199   11950.7
+201159    444.953632   166.985655   11788.4
+
+
+201162    502.953953   173.237159   12468.3
+"""
+    result = read_fwf(StringIO(data), skip_blank_lines=False)
+    expected = DataFrame(
+        [
+            [201158, 360.242940, 149.910199, 11950.7],
+            [201159, 444.953632, 166.985655, 11788.4],
+            [np.nan, np.nan, np.nan, np.nan],
+            [np.nan, np.nan, np.nan, np.nan],
+            [201162, 502.953953, 173.237159, 12468.3],
+        ],
+        columns=["A", "B", "C", "D"],
+    )
+    tm.assert_frame_equal(result, expected)
+
+
 @pytest.mark.parametrize("thousands", [",", "#", "~"])
 def test_fwf_thousands(thousands):
     data = """\