Skip to content

Commit 7bd1b24

Browse files
committed
Merge branch 'add-footer-to-read-html' of https://github.com/mjsu/pandas into mjsu-add-footer-to-read-html
Conflicts: doc/source/whatsnew/v0.15.2.txt
2 parents 6cd7490 + 7587bf1 commit 7bd1b24

File tree

3 files changed

+38
-2
lines changed

3 files changed

+38
-2
lines changed

doc/source/whatsnew/v0.15.2.txt

+1
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ Enhancements
8282
- Added context manager to ``HDFStore`` for automatic closing (:issue:`8791`).
8383
- ``to_datetime`` gains an ``exact`` keyword to allow for a format to not require an exact match for a provided format string (if its ``False). ``exact`` defaults to ``True`` (meaning that exact matching is still the default) (:issue:`8904`)
8484
- Added ``axvlines`` boolean option to parallel_coordinates plot function, determines whether vertical lines will be printed, default is True
85+
- Added ability to read table footers to read_html (:issue:`8552`)
8586

8687
.. _whatsnew_0152.performance:
8788

pandas/io/html.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -577,7 +577,7 @@ def _parse_raw_thead(self, table):
577577
table.xpath(expr)]
578578

579579
def _parse_raw_tfoot(self, table):
580-
expr = './/tfoot//th'
580+
expr = './/tfoot//th|//tfoot//td'
581581
return [_remove_whitespace(x.text_content()) for x in
582582
table.xpath(expr)]
583583

@@ -594,14 +594,17 @@ def _expand_elements(body):
594594

595595
def _data_to_frame(data, header, index_col, skiprows, infer_types,
596596
parse_dates, tupleize_cols, thousands):
597-
head, body, _ = data # _ is footer which is rarely used: ignore for now
597+
head, body, foot = data
598598

599599
if head:
600600
body = [head] + body
601601

602602
if header is None: # special case when a table has <th> elements
603603
header = 0
604604

605+
if foot:
606+
body += [foot]
607+
605608
# fill out elements of body that are "ragged"
606609
_expand_elements(body)
607610

pandas/io/tests/test_html.py

+32
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,38 @@ def test_empty_tables(self):
426426
res1 = self.read_html(StringIO(data1))
427427
res2 = self.read_html(StringIO(data2))
428428
assert_framelist_equal(res1, res2)
429+
430+
def test_tfoot_read(self):
431+
"""
432+
Make sure that read_html reads tfoot, containing td or th.
433+
Ignores empty tfoot
434+
"""
435+
data_template = '''<table>
436+
<thead>
437+
<tr>
438+
<th>A</th>
439+
<th>B</th>
440+
</tr>
441+
</thead>
442+
<tbody>
443+
<tr>
444+
<td>bodyA</td>
445+
<td>bodyB</td>
446+
</tr>
447+
</tbody>
448+
<tfoot>
449+
{footer}
450+
</tfoot>
451+
</table>'''
452+
453+
data1 = data_template.format(footer = "")
454+
data2 = data_template.format(footer ="<tr><td>footA</td><th>footB</th></tr>")
455+
456+
d1 = {'A': ['bodyA'], 'B': ['bodyB']}
457+
d2 = {'A': ['bodyA', 'footA'], 'B': ['bodyB', 'footB']}
458+
459+
tm.assert_frame_equal(self.read_html(data1)[0], DataFrame(d1))
460+
tm.assert_frame_equal(self.read_html(data2)[0], DataFrame(d2))
429461

430462
def test_countries_municipalities(self):
431463
# GH5048

0 commit comments

Comments
 (0)