@@ -121,30 +121,30 @@ cdef extern from "parser/tokenizer.h":
121
121
io_callback cb_io
122
122
io_cleanup cb_cleanup
123
123
124
- int chunksize # Number of bytes to prepare for each chunk
125
- char * data # pointer to data to be processed
126
- int datalen # amount of data available
127
- int datapos
124
+ int64_t chunksize # Number of bytes to prepare for each chunk
125
+ char * data # pointer to data to be processed
126
+ int64_t datalen # amount of data available
127
+ int64_t datapos
128
128
129
129
# where to write out tokenized data
130
130
char * stream
131
- int stream_len
132
- int stream_cap
131
+ int64_t stream_len
132
+ int64_t stream_cap
133
133
134
134
# Store words in (potentially ragged) matrix for now, hmm
135
135
char ** words
136
- int * word_starts # where we are in the stream
137
- int words_len
138
- int words_cap
136
+ int64_t * word_starts # where we are in the stream
137
+ int64_t words_len
138
+ int64_t words_cap
139
139
140
- char * pword_start # pointer to stream start of current field
141
- int word_start # position start of current field
140
+ char * pword_start # pointer to stream start of current field
141
+ int64_t word_start # position start of current field
142
142
143
- int * line_start # position in words for start of line
144
- int * line_fields # Number of fields in each line
145
- int lines # Number of lines observed
146
- int file_lines # Number of file lines observed (with bad/skipped)
147
- int lines_cap # Vector capacity
143
+ int64_t * line_start # position in words for start of line
144
+ int64_t * line_fields # Number of fields in each line
145
+ int64_t lines # Number of lines observed
146
+ int64_t file_lines # Number of lines observed (with bad/skipped)
147
+ int64_t lines_cap # Vector capacity
148
148
149
149
# Tokenizing stuff
150
150
ParserState state
@@ -177,14 +177,14 @@ cdef extern from "parser/tokenizer.h":
177
177
# thousands separator (comma, period)
178
178
char thousands
179
179
180
- int header # Boolean: 1: has header, 0: no header
181
- int header_start # header row start
182
- int header_end # header row end
180
+ int header # Boolean: 1: has header, 0: no header
181
+ int64_t header_start # header row start
182
+ int64_t header_end # header row end
183
183
184
184
void * skipset
185
185
PyObject * skipfunc
186
186
int64_t skip_first_N_rows
187
- int skipfooter
187
+ int64_t skipfooter
188
188
# pick one, depending on whether the converter requires GIL
189
189
double (* double_converter_nogil)(const char * , char ** ,
190
190
char , char , char , int ) nogil
@@ -195,12 +195,12 @@ cdef extern from "parser/tokenizer.h":
195
195
char * warn_msg
196
196
char * error_msg
197
197
198
- int skip_empty_lines
198
+ int64_t skip_empty_lines
199
199
200
200
ctypedef struct coliter_t:
201
201
char ** words
202
- int * line_start
203
- int col
202
+ int64_t * line_start
203
+ int64_t col
204
204
205
205
ctypedef struct uint_state:
206
206
int seen_sint
@@ -210,7 +210,8 @@ cdef extern from "parser/tokenizer.h":
210
210
void uint_state_init(uint_state * self )
211
211
int uint64_conflict(uint_state * self )
212
212
213
- void coliter_setup(coliter_t * it, parser_t * parser, int i, int start) nogil
213
+ void coliter_setup(coliter_t * it, parser_t * parser,
214
+ int64_t i, int64_t start) nogil
214
215
void COLITER_NEXT(coliter_t, const char * ) nogil
215
216
216
217
parser_t* parser_new()
@@ -289,14 +290,14 @@ cdef class TextReader:
289
290
object true_values, false_values
290
291
object handle
291
292
bint na_filter, verbose, has_usecols, has_mi_columns
292
- int parser_start
293
+ int64_t parser_start
293
294
list clocks
294
295
char * c_encoding
295
296
kh_str_t * false_set
296
297
kh_str_t * true_set
297
298
298
299
cdef public:
299
- int leading_cols, table_width, skipfooter, buffer_lines
300
+ int64_t leading_cols, table_width, skipfooter, buffer_lines
300
301
object allow_leading_cols
301
302
object delimiter, converters, delim_whitespace
302
303
object na_values
@@ -730,7 +731,8 @@ cdef class TextReader:
730
731
Py_ssize_t i, start, field_count, passed_count, unnamed_count # noqa
731
732
char * word
732
733
object name
733
- int status, hr, data_line
734
+ int status
735
+ int64_t hr, data_line
734
736
char * errors = " strict"
735
737
cdef StringPath path = _string_path(self .c_encoding)
736
738
@@ -949,8 +951,8 @@ cdef class TextReader:
949
951
950
952
cdef _read_rows(self , rows, bint trim):
951
953
cdef:
952
- int buffered_lines
953
- int irows, footer = 0
954
+ int64_t buffered_lines
955
+ int64_t irows, footer = 0
954
956
955
957
self ._start_clock()
956
958
@@ -1018,12 +1020,13 @@ cdef class TextReader:
1018
1020
1019
1021
def _convert_column_data (self , rows = None , upcast_na = False , footer = 0 ):
1020
1022
cdef:
1021
- Py_ssize_t i, nused
1023
+ int64_t i
1024
+ int nused
1022
1025
kh_str_t * na_hashset = NULL
1023
- int start, end
1026
+ int64_t start, end
1024
1027
object name, na_flist, col_dtype = None
1025
1028
bint na_filter = 0
1026
- Py_ssize_t num_cols
1029
+ int64_t num_cols
1027
1030
1028
1031
start = self .parser_start
1029
1032
@@ -1195,7 +1198,7 @@ cdef class TextReader:
1195
1198
return col_res, na_count
1196
1199
1197
1200
cdef _convert_with_dtype(self , object dtype, Py_ssize_t i,
1198
- int start, int end,
1201
+ int64_t start, int64_t end,
1199
1202
bint na_filter,
1200
1203
bint user_dtype,
1201
1204
kh_str_t * na_hashset,
@@ -1275,7 +1278,7 @@ cdef class TextReader:
1275
1278
raise TypeError (" the dtype %s is not "
1276
1279
" supported for parsing" % dtype)
1277
1280
1278
- cdef _string_convert(self , Py_ssize_t i, int start, int end,
1281
+ cdef _string_convert(self , Py_ssize_t i, int64_t start, int64_t end,
1279
1282
bint na_filter, kh_str_t * na_hashset):
1280
1283
1281
1284
cdef StringPath path = _string_path(self .c_encoding)
@@ -1336,6 +1339,7 @@ cdef class TextReader:
1336
1339
kh_destroy_str(table)
1337
1340
1338
1341
cdef _get_column_name(self , Py_ssize_t i, Py_ssize_t nused):
1342
+ cdef int64_t j
1339
1343
if self .has_usecols and self .names is not None :
1340
1344
if (not callable (self .usecols) and
1341
1345
len (self .names) == len (self .usecols)):
@@ -1427,8 +1431,8 @@ cdef inline StringPath _string_path(char *encoding):
1427
1431
# ----------------------------------------------------------------------
1428
1432
# Type conversions / inference support code
1429
1433
1430
- cdef _string_box_factorize(parser_t * parser, int col,
1431
- int line_start, int line_end,
1434
+ cdef _string_box_factorize(parser_t * parser, int64_t col,
1435
+ int64_t line_start, int64_t line_end,
1432
1436
bint na_filter, kh_str_t * na_hashset):
1433
1437
cdef:
1434
1438
int error, na_count = 0
@@ -1480,8 +1484,8 @@ cdef _string_box_factorize(parser_t *parser, int col,
1480
1484
1481
1485
return result, na_count
1482
1486
1483
- cdef _string_box_utf8(parser_t * parser, int col,
1484
- int line_start, int line_end,
1487
+ cdef _string_box_utf8(parser_t * parser, int64_t col,
1488
+ int64_t line_start, int64_t line_end,
1485
1489
bint na_filter, kh_str_t * na_hashset):
1486
1490
cdef:
1487
1491
int error, na_count = 0
@@ -1533,8 +1537,8 @@ cdef _string_box_utf8(parser_t *parser, int col,
1533
1537
1534
1538
return result, na_count
1535
1539
1536
- cdef _string_box_decode(parser_t * parser, int col,
1537
- int line_start, int line_end,
1540
+ cdef _string_box_decode(parser_t * parser, int64_t col,
1541
+ int64_t line_start, int64_t line_end,
1538
1542
bint na_filter, kh_str_t * na_hashset,
1539
1543
char * encoding):
1540
1544
cdef:
@@ -1592,8 +1596,8 @@ cdef _string_box_decode(parser_t *parser, int col,
1592
1596
1593
1597
1594
1598
@ cython.boundscheck (False )
1595
- cdef _categorical_convert(parser_t * parser, int col,
1596
- int line_start, int line_end,
1599
+ cdef _categorical_convert(parser_t * parser, int64_t col,
1600
+ int64_t line_start, int64_t line_end,
1597
1601
bint na_filter, kh_str_t * na_hashset,
1598
1602
char * encoding):
1599
1603
" Convert column data into codes, categories"
@@ -1663,8 +1667,8 @@ cdef _categorical_convert(parser_t *parser, int col,
1663
1667
kh_destroy_str(table)
1664
1668
return np.asarray(codes), result, na_count
1665
1669
1666
- cdef _to_fw_string(parser_t * parser, int col, int line_start,
1667
- int line_end, size_t width):
1670
+ cdef _to_fw_string(parser_t * parser, int64_t col, int64_t line_start,
1671
+ int64_t line_end, int64_t width):
1668
1672
cdef:
1669
1673
Py_ssize_t i
1670
1674
coliter_t it
@@ -1680,11 +1684,11 @@ cdef _to_fw_string(parser_t *parser, int col, int line_start,
1680
1684
1681
1685
return result
1682
1686
1683
- cdef inline void _to_fw_string_nogil(parser_t * parser, int col,
1684
- int line_start, int line_end,
1687
+ cdef inline void _to_fw_string_nogil(parser_t * parser, int64_t col,
1688
+ int64_t line_start, int64_t line_end,
1685
1689
size_t width, char * data) nogil:
1686
1690
cdef:
1687
- Py_ssize_t i
1691
+ int64_t i
1688
1692
coliter_t it
1689
1693
const char * word = NULL
1690
1694
@@ -1699,7 +1703,8 @@ cdef char* cinf = b'inf'
1699
1703
cdef char * cposinf = b' +inf'
1700
1704
cdef char * cneginf = b' -inf'
1701
1705
1702
- cdef _try_double(parser_t * parser, int col, int line_start, int line_end,
1706
+ cdef _try_double(parser_t * parser, int64_t col,
1707
+ int64_t line_start, int64_t line_end,
1703
1708
bint na_filter, kh_str_t * na_hashset, object na_flist):
1704
1709
cdef:
1705
1710
int error, na_count = 0
@@ -1808,7 +1813,8 @@ cdef inline int _try_double_nogil(parser_t *parser,
1808
1813
1809
1814
return 0
1810
1815
1811
- cdef _try_uint64(parser_t * parser, int col, int line_start, int line_end,
1816
+ cdef _try_uint64(parser_t * parser, int64_t col,
1817
+ int64_t line_start, int64_t line_end,
1812
1818
bint na_filter, kh_str_t * na_hashset):
1813
1819
cdef:
1814
1820
int error
@@ -1842,8 +1848,9 @@ cdef _try_uint64(parser_t *parser, int col, int line_start, int line_end,
1842
1848
1843
1849
return result
1844
1850
1845
- cdef inline int _try_uint64_nogil(parser_t * parser, int col, int line_start,
1846
- int line_end, bint na_filter,
1851
+ cdef inline int _try_uint64_nogil(parser_t * parser, int64_t col,
1852
+ int64_t line_start,
1853
+ int64_t line_end, bint na_filter,
1847
1854
const kh_str_t * na_hashset,
1848
1855
uint64_t * data, uint_state * state) nogil:
1849
1856
cdef:
@@ -1879,7 +1886,8 @@ cdef inline int _try_uint64_nogil(parser_t *parser, int col, int line_start,
1879
1886
1880
1887
return 0
1881
1888
1882
- cdef _try_int64(parser_t * parser, int col, int line_start, int line_end,
1889
+ cdef _try_int64(parser_t * parser, int64_t col,
1890
+ int64_t line_start, int64_t line_end,
1883
1891
bint na_filter, kh_str_t * na_hashset):
1884
1892
cdef:
1885
1893
int error, na_count = 0
@@ -1906,8 +1914,9 @@ cdef _try_int64(parser_t *parser, int col, int line_start, int line_end,
1906
1914
1907
1915
return result, na_count
1908
1916
1909
- cdef inline int _try_int64_nogil(parser_t * parser, int col, int line_start,
1910
- int line_end, bint na_filter,
1917
+ cdef inline int _try_int64_nogil(parser_t * parser, int64_t col,
1918
+ int64_t line_start,
1919
+ int64_t line_end, bint na_filter,
1911
1920
const kh_str_t * na_hashset, int64_t NA,
1912
1921
int64_t * data, int * na_count) nogil:
1913
1922
cdef:
@@ -1944,7 +1953,8 @@ cdef inline int _try_int64_nogil(parser_t *parser, int col, int line_start,
1944
1953
1945
1954
return 0
1946
1955
1947
- cdef _try_bool(parser_t * parser, int col, int line_start, int line_end,
1956
+ cdef _try_bool(parser_t * parser, int64_t col,
1957
+ int64_t line_start, int64_t line_end,
1948
1958
bint na_filter, kh_str_t * na_hashset):
1949
1959
cdef:
1950
1960
int na_count
@@ -1966,8 +1976,9 @@ cdef _try_bool(parser_t *parser, int col, int line_start, int line_end,
1966
1976
return None , None
1967
1977
return result.view(np.bool_), na_count
1968
1978
1969
- cdef inline int _try_bool_nogil(parser_t * parser, int col, int line_start,
1970
- int line_end, bint na_filter,
1979
+ cdef inline int _try_bool_nogil(parser_t * parser, int64_t col,
1980
+ int64_t line_start,
1981
+ int64_t line_end, bint na_filter,
1971
1982
const kh_str_t * na_hashset, uint8_t NA,
1972
1983
uint8_t * data, int * na_count) nogil:
1973
1984
cdef:
@@ -2006,7 +2017,8 @@ cdef inline int _try_bool_nogil(parser_t *parser, int col, int line_start,
2006
2017
data += 1
2007
2018
return 0
2008
2019
2009
- cdef _try_bool_flex(parser_t * parser, int col, int line_start, int line_end,
2020
+ cdef _try_bool_flex(parser_t * parser, int64_t col,
2021
+ int64_t line_start, int64_t line_end,
2010
2022
bint na_filter, const kh_str_t * na_hashset,
2011
2023
const kh_str_t * true_hashset,
2012
2024
const kh_str_t * false_hashset):
@@ -2032,8 +2044,9 @@ cdef _try_bool_flex(parser_t *parser, int col, int line_start, int line_end,
2032
2044
return None , None
2033
2045
return result.view(np.bool_), na_count
2034
2046
2035
- cdef inline int _try_bool_flex_nogil(parser_t * parser, int col, int line_start,
2036
- int line_end, bint na_filter,
2047
+ cdef inline int _try_bool_flex_nogil(parser_t * parser, int64_t col,
2048
+ int64_t line_start,
2049
+ int64_t line_end, bint na_filter,
2037
2050
const kh_str_t * na_hashset,
2038
2051
const kh_str_t * true_hashset,
2039
2052
const kh_str_t * false_hashset,
@@ -2251,8 +2264,8 @@ for k in list(na_values):
2251
2264
na_values[np.dtype(k)] = na_values[k]
2252
2265
2253
2266
2254
- cdef _apply_converter(object f, parser_t * parser, int col,
2255
- int line_start, int line_end,
2267
+ cdef _apply_converter(object f, parser_t * parser, int64_t col,
2268
+ int64_t line_start, int64_t line_end,
2256
2269
char * c_encoding):
2257
2270
cdef:
2258
2271
int error
@@ -2296,7 +2309,7 @@ def _to_structured_array(dict columns, object names, object usecols):
2296
2309
2297
2310
object name, fnames, field_type
2298
2311
Py_ssize_t i, offset, nfields, length
2299
- int stride, elsize
2312
+ int64_t stride, elsize
2300
2313
char * buf
2301
2314
2302
2315
if names is None :
@@ -2344,10 +2357,10 @@ def _to_structured_array(dict columns, object names, object usecols):
2344
2357
2345
2358
return recs
2346
2359
2347
- cdef _fill_structured_column(char * dst, char * src, int elsize,
2348
- int stride, int length, bint incref):
2360
+ cdef _fill_structured_column(char * dst, char * src, int64_t elsize,
2361
+ int64_t stride, int64_t length, bint incref):
2349
2362
cdef:
2350
- Py_ssize_t i
2363
+ int64_t i
2351
2364
2352
2365
if incref:
2353
2366
util.transfer_object_column(dst, src, stride, length)
0 commit comments