Skip to content

Commit 066a6c9

Browse files
jonashaagyehoshuadimarsky
authored andcommitted
Properly propagate exceptions in sas.pyx (pandas-dev#47149)
1 parent 607229a commit 066a6c9

File tree

2 files changed

+41
-11
lines changed

2 files changed

+41
-11
lines changed

pandas/io/sas/sas.pyx

+8-11
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ ctypedef unsigned short uint16_t
1313
# algorithm. It is partially documented here:
1414
#
1515
# https://cran.r-project.org/package=sas7bdat/vignettes/sas7bdat.pdf
16-
cdef const uint8_t[:] rle_decompress(int result_length, const uint8_t[:] inbuff):
16+
cdef const uint8_t[:] rle_decompress(int result_length, const uint8_t[:] inbuff) except *:
1717

1818
cdef:
1919
uint8_t control_byte, x
@@ -116,7 +116,7 @@ cdef const uint8_t[:] rle_decompress(int result_length, const uint8_t[:] inbuff)
116116
# rdc_decompress decompresses data using the Ross Data Compression algorithm:
117117
#
118118
# http://collaboration.cmc.ec.gc.ca/science/rpn/biblio/ddj/Website/articles/CUJ/1992/9210/ross/ross.htm
119-
cdef const uint8_t[:] rdc_decompress(int result_length, const uint8_t[:] inbuff):
119+
cdef const uint8_t[:] rdc_decompress(int result_length, const uint8_t[:] inbuff) except *:
120120

121121
cdef:
122122
uint8_t cmd
@@ -177,17 +177,14 @@ cdef const uint8_t[:] rdc_decompress(int result_length, const uint8_t[:] inbuff)
177177
rpos += cnt
178178

179179
# short pattern
180-
elif (cmd >= 3) & (cmd <= 15):
180+
else:
181181
ofs = cnt + 3
182182
ofs += <uint16_t>inbuff[ipos] << 4
183183
ipos += 1
184184
for k in range(cmd):
185185
outbuff[rpos + k] = outbuff[rpos - <int>ofs + k]
186186
rpos += cmd
187187

188-
else:
189-
raise ValueError("unknown RDC command")
190-
191188
# In py37 cython/clang sees `len(outbuff)` as size_t and not Py_ssize_t
192189
if <Py_ssize_t>len(outbuff) != <Py_ssize_t>result_length:
193190
raise ValueError(f"RDC: {len(outbuff)} != {result_length}\n")
@@ -231,7 +228,7 @@ cdef class Parser:
231228
int subheader_pointer_length
232229
int current_page_type
233230
bint is_little_endian
234-
const uint8_t[:] (*decompress)(int result_length, const uint8_t[:] inbuff)
231+
const uint8_t[:] (*decompress)(int result_length, const uint8_t[:] inbuff) except *
235232
object parser
236233

237234
def __init__(self, object parser):
@@ -294,8 +291,8 @@ cdef class Parser:
294291
self.parser._current_row_in_chunk_index = self.current_row_in_chunk_index
295292
self.parser._current_row_in_file_index = self.current_row_in_file_index
296293

297-
cdef bint read_next_page(self):
298-
cdef done
294+
cdef bint read_next_page(self) except? True:
295+
cdef bint done
299296

300297
done = self.parser._read_next_page()
301298
if done:
@@ -316,7 +313,7 @@ cdef class Parser:
316313
)
317314
self.current_page_subheaders_count = self.parser._current_page_subheaders_count
318315

319-
cdef readline(self):
316+
cdef bint readline(self) except? True:
320317

321318
cdef:
322319
int offset, bit_offset, align_correction
@@ -385,7 +382,7 @@ cdef class Parser:
385382
else:
386383
raise ValueError(f"unknown page type: {self.current_page_type}")
387384

388-
cdef void process_byte_array_with_data(self, int offset, int length):
385+
cdef void process_byte_array_with_data(self, int offset, int length) except *:
389386

390387
cdef:
391388
Py_ssize_t j

pandas/tests/io/sas/test_sas7bdat.py

+33
Original file line numberDiff line numberDiff line change
@@ -341,3 +341,36 @@ def test_null_date(datapath):
341341
},
342342
)
343343
tm.assert_frame_equal(df, expected)
344+
345+
346+
@pytest.mark.parametrize("test_file", ["test2.sas7bdat", "test3.sas7bdat"])
347+
def test_exception_propagation_rdc_rle_decompress(datapath, monkeypatch, test_file):
348+
"""Errors in RLE/RDC decompression should propagate the same error."""
349+
orig_np_zeros = np.zeros
350+
351+
def _patched_zeros(size, dtype):
352+
if isinstance(size, int):
353+
# np.zeros() call in {rdc,rle}_decompress
354+
raise Exception("Test exception")
355+
else:
356+
# Other calls to np.zeros
357+
return orig_np_zeros(size, dtype)
358+
359+
monkeypatch.setattr(np, "zeros", _patched_zeros)
360+
361+
with pytest.raises(Exception, match="^Test exception$"):
362+
pd.read_sas(datapath("io", "sas", "data", test_file))
363+
364+
365+
def test_exception_propagation_rle_decompress(tmp_path, datapath):
366+
"""Illegal control byte in RLE decompressor should raise the correct ValueError."""
367+
with open(datapath("io", "sas", "data", "test2.sas7bdat"), "rb") as f:
368+
data = bytearray(f.read())
369+
invalid_control_byte = 0x10
370+
page_offset = 0x10000
371+
control_byte_pos = 55229
372+
data[page_offset + control_byte_pos] = invalid_control_byte
373+
tmp_file = tmp_path / "test2.sas7bdat"
374+
tmp_file.write_bytes(data)
375+
with pytest.raises(ValueError, match="unknown control byte"):
376+
pd.read_sas(tmp_file)

0 commit comments

Comments
 (0)