@@ -454,15 +454,26 @@ def row_is_all_th(row):
454
454
while body_rows and row_is_all_th (body_rows [0 ]):
455
455
header_rows .append (body_rows .pop (0 ))
456
456
457
- header = self ._expand_colspan_rowspan (header_rows , section = "header" )
458
- body = self ._expand_colspan_rowspan (body_rows , section = "body" )
459
- footer = self ._expand_colspan_rowspan (footer_rows , section = "footer" )
457
+ header , rem = self ._expand_colspan_rowspan (header_rows , section = "header" )
458
+ body , rem = self ._expand_colspan_rowspan (
459
+ body_rows ,
460
+ section = "body" ,
461
+ remainder = rem ,
462
+ overflow = len (footer_rows ) > 0 ,
463
+ )
464
+ footer , _ = self ._expand_colspan_rowspan (
465
+ footer_rows , section = "footer" , remainder = rem , overflow = False
466
+ )
460
467
461
468
return header , body , footer
462
469
463
470
def _expand_colspan_rowspan (
464
- self , rows , section : Literal ["header" , "footer" , "body" ]
465
- ) -> list [list ]:
471
+ self ,
472
+ rows ,
473
+ section : Literal ["header" , "footer" , "body" ],
474
+ remainder : list [tuple [int , str | tuple , int ]] | None = None ,
475
+ overflow : bool = True ,
476
+ ) -> tuple [list [list ], list [tuple [int , str | tuple , int ]]]:
466
477
"""
467
478
Given a list of <tr>s, return a list of text rows.
468
479
@@ -471,12 +482,20 @@ def _expand_colspan_rowspan(
471
482
rows : list of node-like
472
483
List of <tr>s
473
484
section : the section that the rows belong to (header, body or footer).
485
+ remainder: list[tuple[int, str | tuple, int]] | None
486
+ Any remainder from the expansion of previous section
487
+ overflow: bool
488
+ If true, return any partial rows as 'remainder'. If not, use up any
489
+ partial rows. True by default.
474
490
475
491
Returns
476
492
-------
477
493
list of list
478
494
Each returned row is a list of str text, or tuple (text, link)
479
495
if extract_links is not None.
496
+ remainder
497
+ Remaining partial rows if any. If overflow is False, an empty list
498
+ is returned.
480
499
481
500
Notes
482
501
-----
@@ -485,9 +504,7 @@ def _expand_colspan_rowspan(
485
504
"""
486
505
all_texts = [] # list of rows, each a list of str
487
506
text : str | tuple
488
- remainder : list [
489
- tuple [int , str | tuple , int ]
490
- ] = [] # list of (index, text, nrows)
507
+ remainder = remainder if remainder is not None else []
491
508
492
509
for tr in rows :
493
510
texts = [] # the output for this row
@@ -528,19 +545,20 @@ def _expand_colspan_rowspan(
528
545
all_texts .append (texts )
529
546
remainder = next_remainder
530
547
531
- # Append rows that only appear because the previous row had non-1
532
- # rowspan
533
- while remainder :
534
- next_remainder = []
535
- texts = []
536
- for prev_i , prev_text , prev_rowspan in remainder :
537
- texts .append (prev_text )
538
- if prev_rowspan > 1 :
539
- next_remainder .append ((prev_i , prev_text , prev_rowspan - 1 ))
540
- all_texts .append (texts )
541
- remainder = next_remainder
548
+ if not overflow :
549
+ # Append rows that only appear because the previous row had non-1
550
+ # rowspan
551
+ while remainder :
552
+ next_remainder = []
553
+ texts = []
554
+ for prev_i , prev_text , prev_rowspan in remainder :
555
+ texts .append (prev_text )
556
+ if prev_rowspan > 1 :
557
+ next_remainder .append ((prev_i , prev_text , prev_rowspan - 1 ))
558
+ all_texts .append (texts )
559
+ remainder = next_remainder
542
560
543
- return all_texts
561
+ return all_texts , remainder
544
562
545
563
def _handle_hidden_tables (self , tbl_list , attr_name : str ):
546
564
"""
0 commit comments