@@ -73,6 +73,7 @@ def __init__(self, path_or_buf, index=None, convert_dates=True,
73
73
self .convert_text = convert_text
74
74
self .convert_header_text = convert_header_text
75
75
76
+ self .default_encoding = "latin-1"
76
77
self .compression = ""
77
78
self .column_names_strings = []
78
79
self .column_names = []
@@ -149,12 +150,12 @@ def _get_properties(self):
149
150
buf = self ._read_bytes (const .dataset_offset , const .dataset_length )
150
151
self .name = buf .rstrip (b'\x00 ' )
151
152
if self .convert_header_text :
152
- self .name = self .name .decode (self .encoding )
153
+ self .name = self .name .decode (self .encoding or self . default_encoding )
153
154
154
155
buf = self ._read_bytes (const .file_type_offset , const .file_type_length )
155
156
self .file_type = buf .rstrip (b'\x00 ' )
156
157
if self .convert_header_text :
157
- self .file_type = self .file_type .decode (self .encoding )
158
+ self .file_type = self .file_type .decode (self .encoding or self . default_encoding )
158
159
159
160
# Timestamp is epoch 01/01/1960
160
161
epoch = pd .datetime (1960 , 1 , 1 )
@@ -183,31 +184,31 @@ def _get_properties(self):
183
184
const .sas_release_length )
184
185
self .sas_release = buf .rstrip (b'\x00 ' )
185
186
if self .convert_header_text :
186
- self .sas_release = self .sas_release .decode (self .encoding )
187
+ self .sas_release = self .sas_release .decode (self .encoding or self . default_encoding )
187
188
188
189
buf = self ._read_bytes (const .sas_server_type_offset + total_align ,
189
190
const .sas_server_type_length )
190
191
self .server_type = buf .rstrip (b'\x00 ' )
191
192
if self .convert_header_text :
192
- self .server_type = self .server_type .decode (self .encoding )
193
+ self .server_type = self .server_type .decode (self .encoding or self . default_encoding )
193
194
194
195
buf = self ._read_bytes (const .os_version_number_offset + total_align ,
195
196
const .os_version_number_length )
196
197
self .os_version = buf .rstrip (b'\x00 ' )
197
198
if self .convert_header_text :
198
- self .os_version = self .os_version .decode (self .encoding )
199
+ self .os_version = self .os_version .decode (self .encoding or self . default_encoding )
199
200
200
201
buf = self ._read_bytes (const .os_name_offset + total_align ,
201
202
const .os_name_length )
202
203
buf = buf .rstrip (b'\x00 ' )
203
204
if len (buf ) > 0 :
204
- self .os_name = buf .decode (self .encoding )
205
+ self .os_name = buf .decode (self .encoding or self . default_encoding )
205
206
else :
206
207
buf = self ._read_bytes (const .os_maker_offset + total_align ,
207
208
const .os_maker_length )
208
209
self .os_name = buf .rstrip (b'\x00 ' )
209
210
if self .convert_header_text :
210
- self .os_name = self .os_name .decode (self .encoding )
211
+ self .os_name = self .os_name .decode (self .encoding or self . default_encoding )
211
212
212
213
# Read a single float of the given width (4 or 8).
213
214
def _read_float (self , offset , width ):
@@ -401,14 +402,14 @@ def _process_columntext_subheader(self, offset, length):
401
402
buf = self ._read_bytes (offset , text_block_size )
402
403
cname = buf [0 :text_block_size ].rstrip (b"\x00 " )
403
404
if self .convert_header_text :
404
- cname = cname .decode (self .encoding )
405
+ cname = cname .decode (self .encoding or self . default_encoding )
405
406
self .column_names_strings .append (cname )
406
407
407
408
if len (self .column_names_strings ) == 1 :
408
409
column_name = self .column_names_strings [0 ]
409
410
compression_literal = ""
410
411
for cl in const .compression_literals :
411
- if cl in column_name :
412
+ if cl in str ( column_name ) :
412
413
compression_literal = cl
413
414
self .compression = compression_literal
414
415
offset -= self ._int_length
@@ -425,20 +426,23 @@ def _process_columntext_subheader(self, offset, length):
425
426
if self .U64 :
426
427
offset1 += 4
427
428
buf = self ._read_bytes (offset1 , self ._lcp )
428
- self .creator_proc = buf [0 :self ._lcp ]. decode ()
429
+ self .creator_proc = buf [0 :self ._lcp ]
429
430
elif compression_literal == const .rle_compression :
430
431
offset1 = offset + 40
431
432
if self .U64 :
432
433
offset1 += 4
433
434
buf = self ._read_bytes (offset1 , self ._lcp )
434
- self .creator_proc = buf [0 :self ._lcp ]. decode ()
435
+ self .creator_proc = buf [0 :self ._lcp ]
435
436
elif self ._lcs > 0 :
436
437
self ._lcp = 0
437
438
offset1 = offset + 16
438
439
if self .U64 :
439
440
offset1 += 4
440
441
buf = self ._read_bytes (offset1 , self ._lcs )
441
- self .creator_proc = buf [0 :self ._lcp ].decode ()
442
+ self .creator_proc = buf [0 :self ._lcp ]
443
+ if self .convert_header_text :
444
+ if hasattr (self , "creator_proc" ):
445
+ self .creator_proc = self .creator_proc .decode (self .encoding or self .default_encoding )
442
446
443
447
def _process_columnname_subheader (self , offset , length ):
444
448
int_len = self ._int_length
@@ -624,7 +628,7 @@ def _chunk_to_dataframe(self):
624
628
elif self .column_types [j ] == b's' :
625
629
rslt [name ] = self ._string_chunk [js , :]
626
630
if self .convert_text and (self .encoding is not None ):
627
- rslt [name ] = rslt [name ].str .decode (self .encoding )
631
+ rslt [name ] = rslt [name ].str .decode (self .encoding or self . default_encoding )
628
632
if self .blank_missing :
629
633
ii = rslt [name ].str .len () == 0
630
634
rslt .loc [ii , name ] = np .nan
0 commit comments