Skip to content

Commit 59c175f

Browse files
mdmuellerjreback
authored andcommitted
Made read_html ignore empty tables, added test
1 parent f07710d commit 59c175f

File tree

3 files changed

+35
-3
lines changed

3 files changed

+35
-3
lines changed

doc/source/v0.15.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -665,6 +665,7 @@ Bug Fixes
665665
- Bug in ``read_csv`` where line comments were not handled correctly given
666666
a custom line terminator or ``delim_whitespace=True`` (:issue:`8122`).
667667

668+
- Bug in ``read_html`` where empty tables caused a ``StopIteration`` (:issue:`7575`)
668669

669670
- Bug in accessing groups from a ``GroupBy`` when the original grouper
670671
was a tuple (:issue:`8121`).

pandas/io/html.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -711,9 +711,14 @@ def _parse(flavor, io, match, header, index_col, skiprows, infer_types,
711711
else:
712712
raise_with_traceback(retained)
713713

714-
return [_data_to_frame(table, header, index_col, skiprows, infer_types,
715-
parse_dates, tupleize_cols, thousands)
716-
for table in tables]
714+
ret = []
715+
for table in tables:
716+
try:
717+
ret.append(_data_to_frame(table, header, index_col, skiprows,
718+
infer_types, parse_dates, tupleize_cols, thousands))
719+
except StopIteration: # empty table
720+
continue
721+
return ret
717722

718723

719724
def read_html(io, match='.+', flavor=None, header=None, index_col=None,

pandas/io/tests/test_html.py

+26
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,32 @@ def test_thousands_macau_index_col(self):
401401

402402
self.assertFalse(any(s.isnull().any() for _, s in df.iteritems()))
403403

404+
def test_empty_tables(self):
405+
"""
406+
Make sure that read_html ignores empty tables.
407+
"""
408+
data1 = '''<table>
409+
<thead>
410+
<tr>
411+
<th>A</th>
412+
<th>B</th>
413+
</tr>
414+
</thead>
415+
<tbody>
416+
<tr>
417+
<td>1</td>
418+
<td>2</td>
419+
</tr>
420+
</tbody>
421+
</table>'''
422+
data2 = data1 + '''<table>
423+
<tbody>
424+
</tbody>
425+
</table>'''
426+
res1 = self.read_html(StringIO(data1))
427+
res2 = self.read_html(StringIO(data2))
428+
assert_framelist_equal(res1, res2)
429+
404430
def test_countries_municipalities(self):
405431
# GH5048
406432
data1 = StringIO('''<table>

0 commit comments

Comments
 (0)