@@ -3,20 +3,21 @@ cimport numpy as np
3
3
from numpy cimport uint8_t, uint16_t, int8_t
4
4
import sas_constants as const
5
5
6
-
7
6
# rle_decompress decompresses data using a Run Length Encoding
8
7
# algorithm. It is partially documented here:
9
8
#
10
9
# https://cran.r-project.org/web/packages/sas7bdat/vignettes/sas7bdat.pdf
11
- cdef np.ndarray[uint8_t, ndim = 1 ] rle_decompress(int result_length, np.ndarray[uint8_t, ndim= 1 ] inbuff):
10
+ cdef rle_decompress(int result_length, np.ndarray[uint8_t, ndim= 1 ] inbuff):
12
11
13
- cdef:
14
- uint8_t control_byte, x
15
- np.ndarray[uint8_t, ndim= 1 ] result = np.zeros(result_length, np.uint8)
16
- int rpos = 0
17
- int ipos = 0
18
- int i, nbytes
19
- length = len (inbuff)
12
+ cdef uint8_t control_byte
13
+ cdef uint8_t [:] result = np.zeros(result_length, np.uint8)
14
+
15
+ cdef int rpos = 0
16
+ cdef int ipos = 0
17
+ cdef int i
18
+ cdef int nbytes
19
+ cdef uint8_t x
20
+ cdef length = len (inbuff)
20
21
21
22
while ipos < length:
22
23
control_byte = inbuff[ipos] & 0xF0
@@ -105,22 +106,24 @@ cdef np.ndarray[uint8_t, ndim=1] rle_decompress(int result_length, np.ndarray[ui
105
106
if len (result) != result_length:
106
107
print (" RLE: %v != %v \n " , (len (result), result_length))
107
108
108
- return np.asarray(result, dtype = np.uint8 )
109
+ return np.asarray(result).tostring( )
109
110
110
111
111
112
# rdc_decompress decompresses data using the Ross Data Compression algorithm:
112
113
#
113
114
# http://collaboration.cmc.ec.gc.ca/science/rpn/biblio/ddj/Website/articles/CUJ/1992/9210/ross/ross.htm
114
- cdef np.ndarray[uint8_t, ndim = 1 ] rdc_decompress(int result_length, np.ndarray[uint8_t, ndim= 1 ] inbuff):
115
+ cdef rdc_decompress(int result_length, np.ndarray[uint8_t, ndim= 1 ] inbuff):
115
116
116
- cdef:
117
- uint8_t cmd, ofs, cnt
118
- uint16_t ctrl_bits
119
- uint16_t ctrl_mask = 0
120
- int ipos = 0
121
- int rpos = 0
122
- int k
123
- np.ndarray[uint8_t, ndim= 1 ] outbuff = np.zeros(result_length, dtype = np.uint8)
117
+ cdef uint8_t cmd
118
+ cdef uint16_t ctrl_bits
119
+ cdef uint16_t ctrl_mask = 0
120
+ cdef uint16_t ofs
121
+ cdef uint16_t cnt
122
+ cdef int ipos = 0
123
+ cdef int rpos = 0
124
+ cdef int k
125
+
126
+ cdef uint8_t [:] outbuff = np.zeros(result_length, dtype = np.uint8)
124
127
125
128
ii = - 1
126
129
@@ -187,10 +190,9 @@ cdef np.ndarray[uint8_t, ndim=1] rdc_decompress(int result_length, np.ndarray[ui
187
190
if len (outbuff) != result_length:
188
191
raise ValueError (" RDC: %v != %v \n " , len (outbuff), result_length)
189
192
190
- return np.asarray(outbuff, dtype = np.uint8)
191
-
193
+ return np.asarray(outbuff).tostring()
192
194
193
- cdef np.ndarray[uint8_t, ndim = 1 ] decompress(object parser, int row_length, uint8_t[:] page):
195
+ cdef decompress(object parser, int row_length, page):
194
196
page = np.frombuffer(page, dtype = np.uint8)
195
197
if parser.compression == const.rle_compression:
196
198
return rle_decompress(row_length, page)
@@ -210,7 +212,7 @@ def do_read(object parser, int nrows):
210
212
break
211
213
212
214
213
- cdef bint readline(object parser):
215
+ cdef readline(object parser):
214
216
215
217
cdef:
216
218
int offset, bit_offset, align_correction, subheader_pointer_length
@@ -281,17 +283,17 @@ cdef bint readline(object parser):
281
283
parser._current_page_type)
282
284
283
285
284
- cdef void process_byte_array_with_data(object parser, int offset, int length):
286
+ cdef process_byte_array_with_data(object parser, int offset, int length):
285
287
286
288
cdef:
287
289
int s, j, k, m, start, jb, js, lngt
288
290
long [:] lengths = parser._column_data_lengths
289
291
long [:] offsets = parser._column_data_offsets
290
292
char [:] column_types = parser.column_types
291
293
uint8_t[:, :] byte_chunk = parser._byte_chunk
292
- # object[:, :] string_chunk = parser._string_chunk
294
+ object [:, :] string_chunk = parser._string_chunk
293
295
294
- source = np.frombuffer( parser._cached_page[offset:offset+ length], dtype = np.uint8)
296
+ source = parser._cached_page[offset:offset+ length]
295
297
if (parser.compression != " " ) and (length < parser.row_length):
296
298
source = decompress(parser, parser.row_length, source)
297
299
@@ -312,7 +314,7 @@ cdef void process_byte_array_with_data(object parser, int offset, int length):
312
314
byte_chunk[jb, m + k] = source[start + k]
313
315
jb += 1
314
316
elif column_types[j] == b' s' :
315
- parser._string_chunk [js][ parser._current_row_in_chunk_index] = source[start:(start+ lngt)].tostring() .rstrip()
317
+ string_chunk [js, parser._current_row_in_chunk_index] = source[start:(start+ lngt)].rstrip()
316
318
js += 1
317
319
else :
318
320
raise ValueError (" unknown column type: %s " % parser.columns[j].ctype)
0 commit comments