@@ -121,30 +121,30 @@ cdef extern from "parser/tokenizer.h":
121
121
io_callback cb_io
122
122
io_cleanup cb_cleanup
123
123
124
- size_t chunksize # Number of bytes to prepare for each chunk
125
- char * data # pointer to data to be processed
126
- size_t datalen # amount of data available
127
- size_t datapos
124
+ int64_t chunksize # Number of bytes to prepare for each chunk
125
+ char * data # pointer to data to be processed
126
+ int64_t datalen # amount of data available
127
+ int64_t datapos
128
128
129
129
# where to write out tokenized data
130
130
char * stream
131
- size_t stream_len
132
- size_t stream_cap
131
+ int64_t stream_len
132
+ int64_t stream_cap
133
133
134
134
# Store words in (potentially ragged) matrix for now, hmm
135
135
char ** words
136
- size_t * word_starts # where we are in the stream
137
- size_t words_len
138
- size_t words_cap
136
+ int64_t * word_starts # where we are in the stream
137
+ int64_t words_len
138
+ int64_t words_cap
139
139
140
- char * pword_start # pointer to stream start of current field
141
- size_t word_start # position start of current field
140
+ char * pword_start # pointer to stream start of current field
141
+ int64_t word_start # position start of current field
142
142
143
- size_t * line_start # position in words for start of line
144
- size_t * line_fields # Number of fields in each line
145
- size_t lines # Number of lines observed
146
- size_t file_lines # Number of file lines observed (with bad/skipped)
147
- size_t lines_cap # Vector capacity
143
+ int64_t * line_start # position in words for start of line
144
+ int64_t * line_fields # Number of fields in each line
145
+ int64_t lines # Number of lines observed
146
+ int64_t file_lines # Number of file lines observed (with bad/skipped)
147
+ int64_t lines_cap # Vector capacity
148
148
149
149
# Tokenizing stuff
150
150
ParserState state
@@ -178,13 +178,13 @@ cdef extern from "parser/tokenizer.h":
178
178
char thousands
179
179
180
180
int header # Boolean: 1: has header, 0: no header
181
- ssize_t header_start # header row start
182
- ssize_t header_end # header row end
181
+ int64_t header_start # header row start
182
+ int64_t header_end # header row end
183
183
184
184
void * skipset
185
185
PyObject * skipfunc
186
186
int64_t skip_first_N_rows
187
- size_t skipfooter
187
+ int64_t skipfooter
188
188
# pick one, depending on whether the converter requires GIL
189
189
double (* double_converter_nogil)(const char * , char ** ,
190
190
char , char , char , int ) nogil
@@ -195,12 +195,12 @@ cdef extern from "parser/tokenizer.h":
195
195
char * warn_msg
196
196
char * error_msg
197
197
198
- size_t skip_empty_lines
198
+ int64_t skip_empty_lines
199
199
200
200
ctypedef struct coliter_t:
201
201
char ** words
202
- size_t * line_start
203
- size_t col
202
+ int64_t * line_start
203
+ int64_t col
204
204
205
205
ctypedef struct uint_state:
206
206
int seen_sint
@@ -210,7 +210,7 @@ cdef extern from "parser/tokenizer.h":
210
210
void uint_state_init(uint_state * self )
211
211
int uint64_conflict(uint_state * self )
212
212
213
- void coliter_setup(coliter_t * it, parser_t * parser, size_t i, size_t start) nogil
213
+ void coliter_setup(coliter_t * it, parser_t * parser, int64_t i, int64_t start) nogil
214
214
void COLITER_NEXT(coliter_t, const char * ) nogil
215
215
216
216
parser_t* parser_new()
@@ -289,14 +289,14 @@ cdef class TextReader:
289
289
object true_values, false_values
290
290
object handle
291
291
bint na_filter, verbose, has_usecols, has_mi_columns
292
- size_t parser_start
292
+ int64_t parser_start
293
293
list clocks
294
294
char * c_encoding
295
295
kh_str_t * false_set
296
296
kh_str_t * true_set
297
297
298
298
cdef public:
299
- size_t leading_cols, table_width, skipfooter, buffer_lines
299
+ int64_t leading_cols, table_width, skipfooter, buffer_lines
300
300
object allow_leading_cols
301
301
object delimiter, converters, delim_whitespace
302
302
object na_values
@@ -731,7 +731,7 @@ cdef class TextReader:
731
731
char * word
732
732
object name
733
733
int status
734
- size_t hr, data_line
734
+ int64_t hr, data_line
735
735
char * errors = " strict"
736
736
cdef StringPath path = _string_path(self .c_encoding)
737
737
@@ -950,8 +950,8 @@ cdef class TextReader:
950
950
951
951
cdef _read_rows(self , rows, bint trim):
952
952
cdef:
953
- size_t buffered_lines
954
- size_t irows, footer = 0
953
+ int64_t buffered_lines
954
+ int64_t irows, footer = 0
955
955
956
956
self ._start_clock()
957
957
@@ -1019,13 +1019,13 @@ cdef class TextReader:
1019
1019
1020
1020
def _convert_column_data (self , rows = None , upcast_na = False , footer = 0 ):
1021
1021
cdef:
1022
- size_t i
1022
+ int64_t i
1023
1023
int nused
1024
1024
kh_str_t * na_hashset = NULL
1025
- size_t start, end
1025
+ int64_t start, end
1026
1026
object name, na_flist, col_dtype = None
1027
1027
bint na_filter = 0
1028
- size_t num_cols
1028
+ int64_t num_cols
1029
1029
1030
1030
start = self .parser_start
1031
1031
@@ -1038,7 +1038,7 @@ cdef class TextReader:
1038
1038
# if footer > 0:
1039
1039
# end -= footer
1040
1040
1041
- num_cols = 0
1041
+ num_cols = - 1
1042
1042
for i in range (self .parser.lines):
1043
1043
num_cols = (num_cols < self .parser.line_fields[i]) * \
1044
1044
self .parser.line_fields[i] + \
@@ -1197,7 +1197,7 @@ cdef class TextReader:
1197
1197
return col_res, na_count
1198
1198
1199
1199
cdef _convert_with_dtype(self , object dtype, Py_ssize_t i,
1200
- size_t start, size_t end,
1200
+ int64_t start, int64_t end,
1201
1201
bint na_filter,
1202
1202
bint user_dtype,
1203
1203
kh_str_t * na_hashset,
@@ -1277,7 +1277,7 @@ cdef class TextReader:
1277
1277
raise TypeError (" the dtype %s is not "
1278
1278
" supported for parsing" % dtype)
1279
1279
1280
- cdef _string_convert(self , Py_ssize_t i, size_t start, size_t end,
1280
+ cdef _string_convert(self , Py_ssize_t i, int64_t start, int64_t end,
1281
1281
bint na_filter, kh_str_t * na_hashset):
1282
1282
1283
1283
cdef StringPath path = _string_path(self .c_encoding)
@@ -1338,7 +1338,7 @@ cdef class TextReader:
1338
1338
kh_destroy_str(table)
1339
1339
1340
1340
cdef _get_column_name(self , Py_ssize_t i, Py_ssize_t nused):
1341
- cdef int j
1341
+ cdef int64_t j
1342
1342
if self .has_usecols and self .names is not None :
1343
1343
if (not callable (self .usecols) and
1344
1344
len (self .names) == len (self .usecols)):
@@ -1430,8 +1430,8 @@ cdef inline StringPath _string_path(char *encoding):
1430
1430
# ----------------------------------------------------------------------
1431
1431
# Type conversions / inference support code
1432
1432
1433
- cdef _string_box_factorize(parser_t * parser, size_t col,
1434
- size_t line_start, size_t line_end,
1433
+ cdef _string_box_factorize(parser_t * parser, int64_t col,
1434
+ int64_t line_start, int64_t line_end,
1435
1435
bint na_filter, kh_str_t * na_hashset):
1436
1436
cdef:
1437
1437
int error, na_count = 0
@@ -1483,8 +1483,8 @@ cdef _string_box_factorize(parser_t *parser, size_t col,
1483
1483
1484
1484
return result, na_count
1485
1485
1486
- cdef _string_box_utf8(parser_t * parser, size_t col,
1487
- size_t line_start, size_t line_end,
1486
+ cdef _string_box_utf8(parser_t * parser, int64_t col,
1487
+ int64_t line_start, int64_t line_end,
1488
1488
bint na_filter, kh_str_t * na_hashset):
1489
1489
cdef:
1490
1490
int error, na_count = 0
@@ -1536,8 +1536,8 @@ cdef _string_box_utf8(parser_t *parser, size_t col,
1536
1536
1537
1537
return result, na_count
1538
1538
1539
- cdef _string_box_decode(parser_t * parser, size_t col,
1540
- size_t line_start, size_t line_end,
1539
+ cdef _string_box_decode(parser_t * parser, int64_t col,
1540
+ int64_t line_start, int64_t line_end,
1541
1541
bint na_filter, kh_str_t * na_hashset,
1542
1542
char * encoding):
1543
1543
cdef:
@@ -1595,8 +1595,8 @@ cdef _string_box_decode(parser_t *parser, size_t col,
1595
1595
1596
1596
1597
1597
@ cython.boundscheck (False )
1598
- cdef _categorical_convert(parser_t * parser, size_t col,
1599
- size_t line_start, size_t line_end,
1598
+ cdef _categorical_convert(parser_t * parser, int64_t col,
1599
+ int64_t line_start, int64_t line_end,
1600
1600
bint na_filter, kh_str_t * na_hashset,
1601
1601
char * encoding):
1602
1602
" Convert column data into codes, categories"
@@ -1666,8 +1666,8 @@ cdef _categorical_convert(parser_t *parser, size_t col,
1666
1666
kh_destroy_str(table)
1667
1667
return np.asarray(codes), result, na_count
1668
1668
1669
- cdef _to_fw_string(parser_t * parser, size_t col, size_t line_start,
1670
- size_t line_end, size_t width):
1669
+ cdef _to_fw_string(parser_t * parser, int64_t col, int64_t line_start,
1670
+ int64_t line_end, int64_t width):
1671
1671
cdef:
1672
1672
Py_ssize_t i
1673
1673
coliter_t it
@@ -1683,11 +1683,11 @@ cdef _to_fw_string(parser_t *parser, size_t col, size_t line_start,
1683
1683
1684
1684
return result
1685
1685
1686
- cdef inline void _to_fw_string_nogil(parser_t * parser, size_t col,
1687
- size_t line_start, size_t line_end,
1686
+ cdef inline void _to_fw_string_nogil(parser_t * parser, int64_t col,
1687
+ int64_t line_start, int64_t line_end,
1688
1688
size_t width, char * data) nogil:
1689
1689
cdef:
1690
- size_t i
1690
+ int64_t i
1691
1691
coliter_t it
1692
1692
const char * word = NULL
1693
1693
@@ -1702,7 +1702,7 @@ cdef char* cinf = b'inf'
1702
1702
cdef char * cposinf = b' +inf'
1703
1703
cdef char * cneginf = b' -inf'
1704
1704
1705
- cdef _try_double(parser_t * parser, size_t col, size_t line_start, size_t line_end,
1705
+ cdef _try_double(parser_t * parser, int64_t col, int64_t line_start, int64_t line_end,
1706
1706
bint na_filter, kh_str_t * na_hashset, object na_flist):
1707
1707
cdef:
1708
1708
int error, na_count = 0
@@ -1811,7 +1811,7 @@ cdef inline int _try_double_nogil(parser_t *parser,
1811
1811
1812
1812
return 0
1813
1813
1814
- cdef _try_uint64(parser_t * parser, size_t col, size_t line_start, size_t line_end,
1814
+ cdef _try_uint64(parser_t * parser, int64_t col, int64_t line_start, int64_t line_end,
1815
1815
bint na_filter, kh_str_t * na_hashset):
1816
1816
cdef:
1817
1817
int error
@@ -1845,8 +1845,8 @@ cdef _try_uint64(parser_t *parser, size_t col, size_t line_start, size_t line_en
1845
1845
1846
1846
return result
1847
1847
1848
- cdef inline int _try_uint64_nogil(parser_t * parser, size_t col, size_t line_start,
1849
- size_t line_end, bint na_filter,
1848
+ cdef inline int _try_uint64_nogil(parser_t * parser, int64_t col, int64_t line_start,
1849
+ int64_t line_end, bint na_filter,
1850
1850
const kh_str_t * na_hashset,
1851
1851
uint64_t * data, uint_state * state) nogil:
1852
1852
cdef:
@@ -1882,7 +1882,7 @@ cdef inline int _try_uint64_nogil(parser_t *parser, size_t col, size_t line_star
1882
1882
1883
1883
return 0
1884
1884
1885
- cdef _try_int64(parser_t * parser, size_t col, size_t line_start, size_t line_end,
1885
+ cdef _try_int64(parser_t * parser, int64_t col, int64_t line_start, int64_t line_end,
1886
1886
bint na_filter, kh_str_t * na_hashset):
1887
1887
cdef:
1888
1888
int error, na_count = 0
@@ -1909,8 +1909,8 @@ cdef _try_int64(parser_t *parser, size_t col, size_t line_start, size_t line_end
1909
1909
1910
1910
return result, na_count
1911
1911
1912
- cdef inline int _try_int64_nogil(parser_t * parser, size_t col, size_t line_start,
1913
- size_t line_end, bint na_filter,
1912
+ cdef inline int _try_int64_nogil(parser_t * parser, int64_t col, int64_t line_start,
1913
+ int64_t line_end, bint na_filter,
1914
1914
const kh_str_t * na_hashset, int64_t NA,
1915
1915
int64_t * data, int * na_count) nogil:
1916
1916
cdef:
@@ -1947,7 +1947,7 @@ cdef inline int _try_int64_nogil(parser_t *parser, size_t col, size_t line_start
1947
1947
1948
1948
return 0
1949
1949
1950
- cdef _try_bool(parser_t * parser, size_t col, size_t line_start, size_t line_end,
1950
+ cdef _try_bool(parser_t * parser, int64_t col, int64_t line_start, int64_t line_end,
1951
1951
bint na_filter, kh_str_t * na_hashset):
1952
1952
cdef:
1953
1953
int na_count
@@ -1969,8 +1969,8 @@ cdef _try_bool(parser_t *parser, size_t col, size_t line_start, size_t line_end,
1969
1969
return None , None
1970
1970
return result.view(np.bool_), na_count
1971
1971
1972
- cdef inline int _try_bool_nogil(parser_t * parser, size_t col, size_t line_start,
1973
- size_t line_end, bint na_filter,
1972
+ cdef inline int _try_bool_nogil(parser_t * parser, int64_t col, int64_t line_start,
1973
+ int64_t line_end, bint na_filter,
1974
1974
const kh_str_t * na_hashset, uint8_t NA,
1975
1975
uint8_t * data, int * na_count) nogil:
1976
1976
cdef:
@@ -2009,7 +2009,7 @@ cdef inline int _try_bool_nogil(parser_t *parser, size_t col, size_t line_start,
2009
2009
data += 1
2010
2010
return 0
2011
2011
2012
- cdef _try_bool_flex(parser_t * parser, size_t col, size_t line_start, size_t line_end,
2012
+ cdef _try_bool_flex(parser_t * parser, int64_t col, int64_t line_start, int64_t line_end,
2013
2013
bint na_filter, const kh_str_t * na_hashset,
2014
2014
const kh_str_t * true_hashset,
2015
2015
const kh_str_t * false_hashset):
@@ -2035,8 +2035,8 @@ cdef _try_bool_flex(parser_t *parser, size_t col, size_t line_start, size_t line
2035
2035
return None , None
2036
2036
return result.view(np.bool_), na_count
2037
2037
2038
- cdef inline int _try_bool_flex_nogil(parser_t * parser, size_t col, size_t line_start,
2039
- size_t line_end, bint na_filter,
2038
+ cdef inline int _try_bool_flex_nogil(parser_t * parser, int64_t col, int64_t line_start,
2039
+ int64_t line_end, bint na_filter,
2040
2040
const kh_str_t * na_hashset,
2041
2041
const kh_str_t * true_hashset,
2042
2042
const kh_str_t * false_hashset,
@@ -2254,8 +2254,8 @@ for k in list(na_values):
2254
2254
na_values[np.dtype(k)] = na_values[k]
2255
2255
2256
2256
2257
- cdef _apply_converter(object f, parser_t * parser, size_t col,
2258
- size_t line_start, size_t line_end,
2257
+ cdef _apply_converter(object f, parser_t * parser, int64_t col,
2258
+ int64_t line_start, int64_t line_end,
2259
2259
char * c_encoding):
2260
2260
cdef:
2261
2261
int error
@@ -2299,7 +2299,7 @@ def _to_structured_array(dict columns, object names, object usecols):
2299
2299
2300
2300
object name, fnames, field_type
2301
2301
Py_ssize_t i, offset, nfields, length
2302
- size_t stride, elsize
2302
+ int64_t stride, elsize
2303
2303
char * buf
2304
2304
2305
2305
if names is None :
@@ -2347,10 +2347,10 @@ def _to_structured_array(dict columns, object names, object usecols):
2347
2347
2348
2348
return recs
2349
2349
2350
- cdef _fill_structured_column(char * dst, char * src, size_t elsize,
2351
- size_t stride, size_t length, bint incref):
2350
+ cdef _fill_structured_column(char * dst, char * src, int64_t elsize,
2351
+ int64_t stride, int64_t length, bint incref):
2352
2352
cdef:
2353
- size_t i
2353
+ int64_t i
2354
2354
2355
2355
if incref:
2356
2356
util.transfer_object_column(dst, src, stride, length)
0 commit comments