Skip to content

Commit 888829c

Browse files
committed
Fix reading some RDC compressed pages
1 parent c9ce063 commit 888829c

File tree

4 files changed

+24
-20
lines changed

4 files changed

+24
-20
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -814,6 +814,7 @@ I/O
814814
- Bug in :func:`read_excel` when reading a ``.ods`` file with newlines between xml elements (:issue:`45598`)
815815
- Bug in :func:`read_parquet` when ``engine="fastparquet"`` where the file was not closed on error (:issue:`46555`)
816816
- :meth:`to_html` now excludes the ``border`` attribute from ``<table>`` elements when ``border`` keyword is set to ``False``.
817+
- Bug in :func:`read_sas` with certain types of compressed SAS7BDAT files (:issue:`35545`)
817818
-
818819

819820
Period

pandas/io/sas/sas.pyx

+3-5
Original file line numberDiff line numberDiff line change
@@ -203,8 +203,7 @@ cdef enum ColumnTypes:
203203
# type the page_data types
204204
cdef:
205205
int page_meta_type = const.page_meta_type
206-
int page_mix_types_0 = const.page_mix_types[0]
207-
int page_mix_types_1 = const.page_mix_types[1]
206+
int page_mix_type = const.page_mix_type
208207
int page_data_type = const.page_data_type
209208
int subheader_pointers_offset = const.subheader_pointers_offset
210209

@@ -350,8 +349,7 @@ cdef class Parser:
350349
current_subheader_pointer.offset,
351350
current_subheader_pointer.length)
352351
return False
353-
elif (self.current_page_type == page_mix_types_0 or
354-
self.current_page_type == page_mix_types_1):
352+
elif self.current_page_type == page_mix_type:
355353
align_correction = (
356354
bit_offset
357355
+ subheader_pointers_offset
@@ -369,7 +367,7 @@ cdef class Parser:
369367
if done:
370368
return True
371369
return False
372-
elif self.current_page_type & page_data_type == page_data_type:
370+
elif self.current_page_type == page_data_type:
373371
self.process_byte_array_with_data(
374372
bit_offset
375373
+ subheader_pointers_offset

pandas/io/sas/sas7bdat.py

+12-9
Original file line numberDiff line numberDiff line change
@@ -410,11 +410,11 @@ def _parse_metadata(self) -> None:
410410

411411
def _process_page_meta(self) -> bool:
412412
self._read_page_header()
413-
pt = [const.page_meta_type, const.page_amd_type] + const.page_mix_types
413+
pt = [const.page_meta_type, const.page_amd_type, const.page_mix_type]
414414
if self._current_page_type in pt:
415415
self._process_page_metadata()
416-
is_data_page = self._current_page_type & const.page_data_type
417-
is_mix_page = self._current_page_type in const.page_mix_types
416+
is_data_page = self._current_page_type == const.page_data_type
417+
is_mix_page = self._current_page_type == const.page_mix_type
418418
return bool(
419419
is_data_page
420420
or is_mix_page
@@ -424,7 +424,9 @@ def _process_page_meta(self) -> bool:
424424
def _read_page_header(self):
425425
bit_offset = self._page_bit_offset
426426
tx = const.page_type_offset + bit_offset
427-
self._current_page_type = self._read_int(tx, const.page_type_length)
427+
self._current_page_type = (
428+
self._read_int(tx, const.page_type_length) & const.page_type_mask2
429+
)
428430
tx = const.block_count_offset + bit_offset
429431
self._current_page_block_count = self._read_int(tx, const.block_count_length)
430432
tx = const.subheader_count_offset + bit_offset
@@ -774,13 +776,14 @@ def _read_next_page(self):
774776
raise ValueError(msg)
775777

776778
self._read_page_header()
777-
page_type = self._current_page_type
778-
if page_type == const.page_meta_type:
779+
if self._current_page_type == const.page_meta_type:
779780
self._process_page_metadata()
780781

781-
is_data_page = page_type & const.page_data_type
782-
pt = [const.page_meta_type] + const.page_mix_types
783-
if not is_data_page and self._current_page_type not in pt:
782+
if self._current_page_type not in [
783+
const.page_data_type,
784+
const.page_meta_type,
785+
const.page_mix_type,
786+
]:
784787
return self._read_next_page()
785788

786789
return False

pandas/io/sas/sas_constants.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,14 @@
5353
block_count_length = 2
5454
subheader_count_offset = 4
5555
subheader_count_length = 2
56-
page_meta_type = 0
57-
page_data_type = 256
58-
page_amd_type = 1024
59-
page_metc_type = 16384
60-
page_comp_type = -28672
61-
page_mix_types = [512, 640]
56+
page_type_mask = 0x0F00
57+
# Keep "page_comp_type" bits
58+
page_type_mask2 = 0xF000 | page_type_mask
59+
page_meta_type = 0x0000
60+
page_data_type = 0x0100
61+
page_mix_type = 0x0200
62+
page_amd_type = 0x0400
63+
page_comp_type = 0x9000
6264
subheader_pointers_offset = 8
6365
truncated_subheader_id = 1
6466
compressed_subheader_id = 4

0 commit comments

Comments
 (0)