|
1 | 1 | import numpy as np
|
2 | 2 | cimport numpy as np
|
3 |
| -from numpy cimport uint8_t, uint16_t |
| 3 | +from numpy cimport uint8_t, uint16_t, int8_t |
4 | 4 |
|
5 | 5 | # rle_decompress decompresses data using a Run Length Encoding
|
6 | 6 | # algorithm. It is partially documented here:
|
@@ -191,43 +191,44 @@ def _rdc_decompress(int result_length, np.ndarray[uint8_t, ndim=1] inbuff):
|
191 | 191 |
|
192 | 192 | return np.asarray(outbuff).tostring()
|
193 | 193 |
|
194 |
| -def process_byte_array_with_data(parser, int offset, int length, np.ndarray[uint8_t, ndim=2] byte_chunk, |
195 |
| - np.ndarray[dtype=object, ndim=2] string_chunk): |
| 194 | +def process_byte_array_with_data(parser, int offset, int length, uint8_t[:, ::1] byte_chunk, |
| 195 | + object[:, ::1] string_chunk): |
196 | 196 |
|
197 | 197 | cdef int s
|
198 | 198 | cdef int j
|
| 199 | + cdef int k |
199 | 200 | cdef int m
|
200 | 201 | cdef int start
|
201 |
| - cdef int end |
202 |
| - cdef bytes source |
203 |
| - cdef bytes temp |
204 | 202 | cdef int jb
|
205 | 203 | cdef int js
|
| 204 | + cdef int lngt |
| 205 | + |
| 206 | + cdef long[:] lengths = parser._column_data_lengths |
| 207 | + cdef long[:] offsets = parser._column_data_offsets |
| 208 | + cdef char[:] column_types = parser.column_types |
206 | 209 |
|
| 210 | + source = parser._cached_page[offset:offset+length] |
207 | 211 | if (parser.compression != "") and (length < parser.row_length):
|
208 |
| - source = parser._decompress(parser.row_length, parser._cached_page[offset:offset + length]) |
209 |
| - else: |
210 |
| - source = parser._cached_page[offset:offset + length] |
| 212 | + source = parser._decompress(parser.row_length, source) |
211 | 213 |
|
212 | 214 | s = 8 * parser._current_row_in_chunk_index
|
213 | 215 | js = 0
|
214 | 216 | jb = 0
|
215 | 217 | for j in range(parser.column_count):
|
216 |
| - length = parser._column_data_lengths[j] |
217 |
| - if length == 0: |
| 218 | + lngt = lengths[j] |
| 219 | + if lngt == 0: |
218 | 220 | break
|
219 |
| - start = parser._column_data_offsets[j] |
220 |
| - end = start + length |
221 |
| - temp = source[start:end] |
222 |
| - if parser.column_types[j] == b'd': |
223 |
| - m = 8 - length |
| 221 | + start = offsets[j] |
| 222 | + if column_types[j] == b'd': |
224 | 223 | if parser.byte_order == "<":
|
225 |
| - byte_chunk[jb, s+m:s+8] = np.frombuffer(temp, dtype=np.uint8) |
| 224 | + m = s + 8 - lngt |
226 | 225 | else:
|
227 |
| - byte_chunk[jb, s:s+length] = np.frombuffer(temp, dtype=np.uint8) |
| 226 | + m = s |
| 227 | + for k in range(lngt): |
| 228 | + byte_chunk[jb, m + k] = source[start + k] |
228 | 229 | jb += 1
|
229 |
| - elif parser.column_types[j] == b's': |
230 |
| - string_chunk[js, parser._current_row_in_chunk_index] = bytes(temp) |
| 230 | + elif column_types[j] == b's': |
| 231 | + string_chunk[js, parser._current_row_in_chunk_index] = bytes(source[start:start+lngt]) |
231 | 232 | js += 1
|
232 | 233 | else:
|
233 | 234 | raise ValueError("unknown column type: %s" % parser.columns[j].ctype)
|
|
0 commit comments