|
2 | 2 | # cython: boundscheck=False, initializedcheck=False
|
3 | 3 |
|
4 | 4 | import numpy as np
|
5 |
| -cimport numpy as cnp |
6 |
| -from numpy cimport uint8_t, uint16_t, int8_t, int64_t, ndarray |
7 | 5 | import sas_constants as const
|
8 | 6 |
|
| 7 | +ctypedef signed long long int64_t |
| 8 | +ctypedef unsigned char uint8_t |
| 9 | +ctypedef unsigned short uint16_t |
| 10 | + |
9 | 11 | # rle_decompress decompresses data using a Run Length Encoding
|
10 | 12 | # algorithm. It is partially documented here:
|
11 | 13 | #
|
12 | 14 | # https://cran.r-project.org/web/packages/sas7bdat/vignettes/sas7bdat.pdf
|
13 |
| -cdef ndarray[uint8_t, ndim=1] rle_decompress( |
14 |
| - int result_length, ndarray[uint8_t, ndim=1] inbuff): |
| 15 | +cdef const uint8_t[:] rle_decompress(int result_length, |
| 16 | + const uint8_t[:] inbuff): |
15 | 17 |
|
16 | 18 | cdef:
|
17 | 19 | uint8_t control_byte, x
|
18 |
| - uint8_t [:] result = np.zeros(result_length, np.uint8) |
| 20 | + uint8_t[:] result = np.zeros(result_length, np.uint8) |
19 | 21 | int rpos = 0, ipos = 0, length = len(inbuff)
|
20 | 22 | int i, nbytes, end_of_first_byte
|
21 | 23 |
|
@@ -115,14 +117,14 @@ cdef ndarray[uint8_t, ndim=1] rle_decompress(
|
115 | 117 | # rdc_decompress decompresses data using the Ross Data Compression algorithm:
|
116 | 118 | #
|
117 | 119 | # http://collaboration.cmc.ec.gc.ca/science/rpn/biblio/ddj/Website/articles/CUJ/1992/9210/ross/ross.htm
|
118 |
| -cdef ndarray[uint8_t, ndim=1] rdc_decompress( |
119 |
| - int result_length, ndarray[uint8_t, ndim=1] inbuff): |
| 120 | +cdef const uint8_t[:] rdc_decompress(int result_length, |
| 121 | + const uint8_t[:] inbuff): |
120 | 122 |
|
121 | 123 | cdef:
|
122 | 124 | uint8_t cmd
|
123 | 125 | uint16_t ctrl_bits, ctrl_mask = 0, ofs, cnt
|
124 | 126 | int ipos = 0, rpos = 0, k
|
125 |
| - uint8_t [:] outbuff = np.zeros(result_length, dtype=np.uint8) |
| 127 | + uint8_t[:] outbuff = np.zeros(result_length, dtype=np.uint8) |
126 | 128 |
|
127 | 129 | ii = -1
|
128 | 130 |
|
@@ -230,8 +232,8 @@ cdef class Parser(object):
|
230 | 232 | int subheader_pointer_length
|
231 | 233 | int current_page_type
|
232 | 234 | bint is_little_endian
|
233 |
| - ndarray[uint8_t, ndim=1] (*decompress)( |
234 |
| - int result_length, ndarray[uint8_t, ndim=1] inbuff) |
| 235 | + const uint8_t[:] (*decompress)(int result_length, |
| 236 | + const uint8_t[:] inbuff) |
235 | 237 | object parser
|
236 | 238 |
|
237 | 239 | def __init__(self, object parser):
|
@@ -395,7 +397,7 @@ cdef class Parser(object):
|
395 | 397 | Py_ssize_t j
|
396 | 398 | int s, k, m, jb, js, current_row
|
397 | 399 | int64_t lngt, start, ct
|
398 |
| - ndarray[uint8_t, ndim=1] source |
| 400 | + const uint8_t[:] source |
399 | 401 | int64_t[:] column_types
|
400 | 402 | int64_t[:] lengths
|
401 | 403 | int64_t[:] offsets
|
@@ -434,8 +436,8 @@ cdef class Parser(object):
|
434 | 436 | jb += 1
|
435 | 437 | elif column_types[j] == column_type_string:
|
436 | 438 | # string
|
437 |
| - string_chunk[js, current_row] = source[start:( |
438 |
| - start + lngt)].tostring().rstrip() |
| 439 | + string_chunk[js, current_row] = np.array(source[start:( |
| 440 | + start + lngt)]).tostring().rstrip() |
439 | 441 | js += 1
|
440 | 442 |
|
441 | 443 | self.current_row_on_page_index += 1
|
|
0 commit comments