@@ -180,9 +180,9 @@ def __init__(
180
180
181
181
self .default_encoding = "latin-1"
182
182
self .compression = b""
183
- self .column_names_strings : list [str ] = []
184
- self .column_names : list [str ] = []
185
- self .column_formats : list [str ] = []
183
+ self .column_names_raw : list [bytes ] = []
184
+ self .column_names : list [str | bytes ] = []
185
+ self .column_formats : list [str | bytes ] = []
186
186
self .columns : list [_Column ] = []
187
187
188
188
self ._current_page_data_subheader_pointers : list [_SubheaderPointer ] = []
@@ -570,12 +570,9 @@ def _process_columntext_subheader(self, offset: int, length: int) -> None:
570
570
571
571
buf = self ._read_bytes (offset , text_block_size )
572
572
cname_raw = buf [0 :text_block_size ].rstrip (b"\x00 " )
573
- cname = cname_raw
574
- if self .convert_header_text :
575
- cname = cname .decode (self .encoding or self .default_encoding )
576
- self .column_names_strings .append (cname )
573
+ self .column_names_raw .append (cname_raw )
577
574
578
- if len (self .column_names_strings ) == 1 :
575
+ if len (self .column_names_raw ) == 1 :
579
576
compression_literal = b""
580
577
for cl in const .compression_literals :
581
578
if cl in cname_raw :
@@ -644,8 +641,14 @@ def _process_columnname_subheader(self, offset: int, length: int) -> None:
644
641
)
645
642
col_len = self ._read_int (col_name_length , const .column_name_length_length )
646
643
647
- name_str = self .column_names_strings [idx ]
648
- self .column_names .append (name_str [col_offset : col_offset + col_len ])
644
+ name_raw = self .column_names_raw [idx ]
645
+ cname = name_raw [col_offset : col_offset + col_len ]
646
+ if self .convert_header_text :
647
+ self .column_names .append (
648
+ cname .decode (self .encoding or self .default_encoding )
649
+ )
650
+ else :
651
+ self .column_names .append (cname )
649
652
650
653
def _process_columnattributes_subheader (self , offset : int , length : int ) -> None :
651
654
int_len = self ._int_length
@@ -693,7 +696,7 @@ def _process_format_subheader(self, offset: int, length: int) -> None:
693
696
x = self ._read_int (
694
697
text_subheader_format , const .column_format_text_subheader_index_length
695
698
)
696
- format_idx = min (x , len (self .column_names_strings ) - 1 )
699
+ format_idx = min (x , len (self .column_names_raw ) - 1 )
697
700
698
701
format_start = self ._read_int (
699
702
col_format_offset , const .column_format_offset_length
@@ -703,15 +706,29 @@ def _process_format_subheader(self, offset: int, length: int) -> None:
703
706
label_idx = self ._read_int (
704
707
text_subheader_label , const .column_label_text_subheader_index_length
705
708
)
706
- label_idx = min (label_idx , len (self .column_names_strings ) - 1 )
709
+ label_idx = min (label_idx , len (self .column_names_raw ) - 1 )
707
710
708
711
label_start = self ._read_int (col_label_offset , const .column_label_offset_length )
709
712
label_len = self ._read_int (col_label_len , const .column_label_length_length )
710
713
711
- label_names = self .column_names_strings [label_idx ]
712
- column_label = label_names [label_start : label_start + label_len ]
713
- format_names = self .column_names_strings [format_idx ]
714
- column_format = format_names [format_start : format_start + format_len ]
714
+ label_names = self .column_names_raw [label_idx ]
715
+ column_label_bytes = label_names [label_start : label_start + label_len ]
716
+ column_label : str | bytes
717
+ if self .convert_header_text :
718
+ column_label = column_label_bytes .decode (
719
+ self .encoding or self .default_encoding
720
+ )
721
+ else :
722
+ column_label = column_label_bytes
723
+ format_names = self .column_names_raw [format_idx ]
724
+ column_format_bytes = format_names [format_start : format_start + format_len ]
725
+ column_format : str | bytes
726
+ if self .convert_header_text :
727
+ column_format = column_format_bytes .decode (
728
+ self .encoding or self .default_encoding
729
+ )
730
+ else :
731
+ column_format = column_format_bytes
715
732
current_column_number = len (self .columns )
716
733
717
734
col = _Column (
0 commit comments