@@ -582,7 +582,6 @@ def __init__(self, *args, **kwargs) -> None:
582
582
def _parse_tables (self , doc , match , attrs ):
583
583
element_name = self ._strainer .name
584
584
tables = doc .find_all (element_name , attrs = attrs )
585
-
586
585
if not tables :
587
586
raise ValueError ("No tables found" )
588
587
@@ -592,13 +591,15 @@ def _parse_tables(self, doc, match, attrs):
592
591
593
592
for table in tables :
594
593
if self .displayed_only :
594
+ for elem in table .find_all ("style" ):
595
+ elem .decompose ()
596
+
595
597
for elem in table .find_all (style = re .compile (r"display:\s*none" )):
596
598
elem .decompose ()
597
599
598
600
if table not in unique_tables and table .find (string = match ) is not None :
599
601
result .append (table )
600
602
unique_tables .add (table )
601
-
602
603
if not result :
603
604
raise ValueError (f"No tables found matching pattern { repr (match .pattern )} " )
604
605
return result
@@ -725,15 +726,17 @@ def _parse_tables(self, doc, match, kwargs):
725
726
tables = doc .xpath (xpath_expr , namespaces = _re_namespace )
726
727
727
728
tables = self ._handle_hidden_tables (tables , "attrib" )
729
+ self .displayed_only = True
728
730
if self .displayed_only :
729
731
for table in tables :
730
732
# lxml utilizes XPATH 1.0 which does not have regex
731
733
# support. As a result, we find all elements with a style
732
734
# attribute and iterate them to check for display:none
735
+ for elem in table .xpath (".//style" ):
736
+ elem .drop_tree ()
733
737
for elem in table .xpath (".//*[@style]" ):
734
738
if "display:none" in elem .attrib .get ("style" , "" ).replace (" " , "" ):
735
739
elem .drop_tree ()
736
-
737
740
if not tables :
738
741
raise ValueError (f"No tables found matching regex { repr (pattern )} " )
739
742
return tables
@@ -1170,6 +1173,7 @@ def read_html(
1170
1173
'{None, "header", "footer", "body", "all"}, got '
1171
1174
f'"{ extract_links } "'
1172
1175
)
1176
+
1173
1177
validate_header_arg (header )
1174
1178
check_dtype_backend (dtype_backend )
1175
1179
0 commit comments