BUG: fix for read_html with bs4 failing on table with header and one column

kotrfa · jreback · commit bec52727c991 · 2016-04-25T17:56:45.000-04:00
closes #12975 closes #9178
diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt
@@ -417,6 +417,11 @@ Bug Fixes
 - Bug in ``concat`` doesn't handle empty ``Series`` properly (:issue:`11082`)
 
 
+
+- Bug in ``fill_value`` is ignored if the argument to a binary operator is a constant (:issue `12723`)
+
+- Bug in ``pd.read_html`` when using bs4 flavor and parsing table with a header and only one column (:issue `9178`)
+
 - Bug in ``pivot_table`` when ``margins=True`` and ``dropna=True`` where nulls still contributed to margin count (:issue:`12577`)
 - Bug in ``pivot_table`` when ``dropna=False`` where table index/column names disappear (:issue:`12133`)
 - Bug in ``crosstab`` when ``margins=True`` and ``dropna=False`` which raised (:issue:`12642`)
@@ -425,7 +430,4 @@ Bug Fixes
 - Bug in ``.describe()`` resets categorical columns information (:issue:`11558`)
 - Bug where ``loffset`` argument was not applied when calling ``resample().count()`` on a timeseries (:issue:`12725`)
 - ``pd.read_excel()`` now accepts path objects (e.g. ``pathlib.Path``, ``py.path.local``) for the file path, in line with other ``read_*`` functions (:issue:`12655`)
-- ``pd.read_excel()`` now accepts column names associated with keyword argument ``names``(:issue:`12870`)
-
-
-- Bug in ``fill_value`` is ignored if the argument to a binary operator is a constant (:issue:`12723`)
+- ``pd.read_excel()`` now accepts column names associated with keyword argument ``names``(:issue `12870`)
diff --git a/pandas/io/html.py b/pandas/io/html.py
@@ -356,14 +356,16 @@ def _parse_raw_thead(self, table):
         res = []
         if thead:
             res = lmap(self._text_getter, self._parse_th(thead[0]))
-        return np.array(res).squeeze() if res and len(res) == 1 else res
+        return np.atleast_1d(
+            np.array(res).squeeze()) if res and len(res) == 1 else res
 
     def _parse_raw_tfoot(self, table):
         tfoot = self._parse_tfoot(table)
         res = []
         if tfoot:
             res = lmap(self._text_getter, self._parse_td(tfoot[0]))
-        return np.array(res).squeeze() if res and len(res) == 1 else res
+        return np.atleast_1d(
+            np.array(res).squeeze()) if res and len(res) == 1 else res
 
     def _parse_raw_tbody(self, table):
         tbody = self._parse_tbody(table)
diff --git a/pandas/io/tests/test_html.py b/pandas/io/tests/test_html.py
@@ -416,6 +416,31 @@ def test_empty_tables(self):
         res2 = self.read_html(StringIO(data2))
         assert_framelist_equal(res1, res2)
 
+    def test_header_and_one_column(self):
+        """
+        Don't fail with bs4 when there is a header and only one column
+        as described in issue #9178
+        """
+        data = StringIO('''<html>
+            <body>
+             <table>
+                <thead>
+                    <tr>
+                        <th>Header</th>
+                    </tr>
+                </thead>
+                <tbody>
+                    <tr>
+                        <td>first</td>
+                    </tr>
+                </tbody>
+            </table>
+            </body>
+        </html>''')
+        expected = DataFrame(data={'Header': 'first'}, index=[0])
+        result = self.read_html(data)[0]
+        tm.assert_frame_equal(result, expected)
+
     def test_tfoot_read(self):
         """
         Make sure that read_html reads tfoot, containing td or th.