@@ -180,7 +180,7 @@ def __init__(
180
180
181
181
self .default_encoding = "latin-1"
182
182
self .compression = b""
183
- self .column_names_strings : list [str ] = []
183
+ self .column_names_raw : list [bytes ] = []
184
184
self .column_names : list [str ] = []
185
185
self .column_formats : list [str ] = []
186
186
self .columns : list [_Column ] = []
@@ -570,12 +570,9 @@ def _process_columntext_subheader(self, offset: int, length: int) -> None:
570
570
571
571
buf = self ._read_bytes (offset , text_block_size )
572
572
cname_raw = buf [0 :text_block_size ].rstrip (b"\x00 " )
573
- cname = cname_raw
574
- if self .convert_header_text :
575
- cname = cname .decode (self .encoding or self .default_encoding )
576
- self .column_names_strings .append (cname )
573
+ self .column_names_raw .append (cname_raw )
577
574
578
- if len (self .column_names_strings ) == 1 :
575
+ if len (self .column_names_raw ) == 1 :
579
576
compression_literal = b""
580
577
for cl in const .compression_literals :
581
578
if cl in cname_raw :
@@ -644,8 +641,11 @@ def _process_columnname_subheader(self, offset: int, length: int) -> None:
644
641
)
645
642
col_len = self ._read_int (col_name_length , const .column_name_length_length )
646
643
647
- name_str = self .column_names_strings [idx ]
648
- self .column_names .append (name_str [col_offset : col_offset + col_len ])
644
+ name_str = self .column_names_raw [idx ]
645
+ cname = name_str [col_offset : col_offset + col_len ]
646
+ if self .convert_header_text :
647
+ cname = cname .decode (self .encoding or self .default_encoding )
648
+ self .column_names .append (cname )
649
649
650
650
def _process_columnattributes_subheader (self , offset : int , length : int ) -> None :
651
651
int_len = self ._int_length
@@ -693,7 +693,7 @@ def _process_format_subheader(self, offset: int, length: int) -> None:
693
693
x = self ._read_int (
694
694
text_subheader_format , const .column_format_text_subheader_index_length
695
695
)
696
- format_idx = min (x , len (self .column_names_strings ) - 1 )
696
+ format_idx = min (x , len (self .column_names_raw ) - 1 )
697
697
698
698
format_start = self ._read_int (
699
699
col_format_offset , const .column_format_offset_length
@@ -703,15 +703,19 @@ def _process_format_subheader(self, offset: int, length: int) -> None:
703
703
label_idx = self ._read_int (
704
704
text_subheader_label , const .column_label_text_subheader_index_length
705
705
)
706
- label_idx = min (label_idx , len (self .column_names_strings ) - 1 )
706
+ label_idx = min (label_idx , len (self .column_names_raw ) - 1 )
707
707
708
708
label_start = self ._read_int (col_label_offset , const .column_label_offset_length )
709
709
label_len = self ._read_int (col_label_len , const .column_label_length_length )
710
710
711
- label_names = self .column_names_strings [label_idx ]
711
+ label_names = self .column_names_raw [label_idx ]
712
712
column_label = label_names [label_start : label_start + label_len ]
713
- format_names = self .column_names_strings [format_idx ]
713
+ if self .convert_header_text :
714
+ column_label = column_label .decode (self .encoding or self .default_encoding )
715
+ format_names = self .column_names_raw [format_idx ]
714
716
column_format = format_names [format_start : format_start + format_len ]
717
+ if self .convert_header_text :
718
+ column_format = column_format .decode (self .encoding or self .default_encoding )
715
719
current_column_number = len (self .columns )
716
720
717
721
col = _Column (
0 commit comments