Skip to content

Commit 74c2384

Browse files
committed
Address latest review comments from @WillAyd
Mostly involved reformatting test_html.py
1 parent d2f0b83 commit 74c2384

File tree

2 files changed

+322
-292
lines changed

2 files changed

+322
-292
lines changed

pandas/io/html.py

+4-16
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,8 @@ def _attr_getter(self, obj, attr):
236236
str or unicode
237237
The attribute value.
238238
"""
239-
raise com.AbstractMethodError(self)
239+
# Both lxml and BeautifulSoup have the same implementation:
240+
return obj.get(attr)
240241

241242
def _text_getter(self, obj):
242243
"""
@@ -409,19 +410,12 @@ def row_is_all_th(row):
409410
self._parse_td(row))
410411

411412
if not header_rows:
412-
# The table has no <thead>. Move the top all-<th> rows from the
413-
# <tbody> to the <thead>. (This is a common case because many
413+
# The table has no <thead>. Move the top all-<th> rows from
414+
# body_rows to header_rows. (This is a common case because many
414415
# tables in the wild have no <thead> or <tfoot>
415416
while body_rows and row_is_all_th(body_rows[0]):
416417
header_rows.append(body_rows.pop(0))
417418

418-
if not footer_rows:
419-
# The table has no <tfoot>. Treat last all-<th> rows as footers.
420-
while body_rows and row_is_all_th(body_rows[-1]):
421-
# .insert(), not .append(): we're moving "bottom of <tbody>" to
422-
# "top of <tfoot>"
423-
footer_rows.insert(0, body_rows.pop())
424-
425419
header = self._expand_colspan_rowspan(header_rows)
426420
body = self._expand_colspan_rowspan(body_rows)
427421
footer = self._expand_colspan_rowspan(footer_rows)
@@ -578,9 +572,6 @@ def _parse_tables(self, doc, match, attrs):
578572
.format(patt=match.pattern))
579573
return result
580574

581-
def _attr_getter(self, obj, attr):
582-
return obj.get(attr)
583-
584575
def _text_getter(self, obj):
585576
return obj.text
586577

@@ -662,9 +653,6 @@ class _LxmlFrameParser(_HtmlFrameParser):
662653
def __init__(self, *args, **kwargs):
663654
super(_LxmlFrameParser, self).__init__(*args, **kwargs)
664655

665-
def _attr_getter(self, obj, attr):
666-
return obj.get(attr)
667-
668656
def _text_getter(self, obj):
669657
return obj.text_content()
670658

0 commit comments

Comments
 (0)