Skip to content

Commit 395f712

Browse files
jeffknuppgfyoung
authored andcommitted
BUG: Fix parser field type compatability on 32-bit systems. (#17071)
Closes gh-17063
1 parent 9e6bb42 commit 395f712

File tree

1 file changed

+15
-13
lines changed

1 file changed

+15
-13
lines changed

pandas/_libs/src/parser/tokenizer.c

+15-13
Original file line numberDiff line numberDiff line change
@@ -196,14 +196,14 @@ int parser_init(parser_t *self) {
196196
sz = STREAM_INIT_SIZE / 10;
197197
sz = sz ? sz : 1;
198198
self->words = (char **)malloc(sz * sizeof(char *));
199-
self->word_starts = (size_t *)malloc(sz * sizeof(size_t));
199+
self->word_starts = (int64_t *)malloc(sz * sizeof(int64_t));
200200
self->words_cap = sz;
201201
self->words_len = 0;
202202

203203
// line pointers and metadata
204-
self->line_start = (size_t *)malloc(sz * sizeof(size_t));
204+
self->line_start = (int64_t *)malloc(sz * sizeof(int64_t));
205205

206-
self->line_fields = (size_t *)malloc(sz * sizeof(size_t));
206+
self->line_fields = (int64_t *)malloc(sz * sizeof(int64_t));
207207

208208
self->lines_cap = sz;
209209
self->lines = 0;
@@ -247,7 +247,7 @@ void parser_del(parser_t *self) {
247247
}
248248

249249
static int make_stream_space(parser_t *self, size_t nbytes) {
250-
size_t i, cap;
250+
int64_t i, cap;
251251
int status;
252252
void *orig_ptr, *newptr;
253253

@@ -419,7 +419,7 @@ static void append_warning(parser_t *self, const char *msg) {
419419

420420
static int end_line(parser_t *self) {
421421
char *msg;
422-
int fields;
422+
int64_t fields;
423423
int ex_fields = self->expected_fields;
424424
size_t bufsize = 100; // for error or warning messages
425425

@@ -468,8 +468,8 @@ static int end_line(parser_t *self) {
468468
if (self->error_bad_lines) {
469469
self->error_msg = (char *)malloc(bufsize);
470470
snprintf(self->error_msg, bufsize,
471-
"Expected %d fields in line %d, saw %d\n",
472-
ex_fields, self->file_lines, fields);
471+
"Expected %d fields in line %lld, saw %lld\n",
472+
ex_fields, (long long)self->file_lines, (long long)fields);
473473

474474
TRACE(("Error at line %d, %d fields\n", self->file_lines, fields));
475475

@@ -480,8 +480,9 @@ static int end_line(parser_t *self) {
480480
// pass up error message
481481
msg = (char *)malloc(bufsize);
482482
snprintf(msg, bufsize,
483-
"Skipping line %d: expected %d fields, saw %d\n",
484-
self->file_lines, ex_fields, fields);
483+
"Skipping line %lld: expected %d fields, saw %lld\n",
484+
(long long)self->file_lines, ex_fields,
485+
(long long)fields);
485486
append_warning(self, msg);
486487
free(msg);
487488
}
@@ -632,7 +633,7 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes) {
632633
stream = self->stream + self->stream_len; \
633634
slen = self->stream_len; \
634635
self->state = STATE; \
635-
if (line_limit > 0 && self->lines == start_lines + (size_t)line_limit) { \
636+
if (line_limit > 0 && self->lines == start_lines + (int64_t)line_limit) { \
636637
goto linelimit; \
637638
}
638639

@@ -647,7 +648,7 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes) {
647648
stream = self->stream + self->stream_len; \
648649
slen = self->stream_len; \
649650
self->state = STATE; \
650-
if (line_limit > 0 && self->lines == start_lines + (size_t)line_limit) { \
651+
if (line_limit > 0 && self->lines == start_lines + (int64_t)line_limit) { \
651652
goto linelimit; \
652653
}
653654

@@ -1147,7 +1148,8 @@ static int parser_handle_eof(parser_t *self) {
11471148
case IN_QUOTED_FIELD:
11481149
self->error_msg = (char *)malloc(bufsize);
11491150
snprintf(self->error_msg, bufsize,
1150-
"EOF inside string starting at line %d", self->file_lines);
1151+
"EOF inside string starting at line %lld",
1152+
(long long)self->file_lines);
11511153
return -1;
11521154

11531155
case ESCAPED_CHAR:
@@ -1318,7 +1320,7 @@ void debug_print_parser(parser_t *self) {
13181320
char *token;
13191321

13201322
for (line = 0; line < self->lines; ++line) {
1321-
printf("(Parsed) Line %d: ", line);
1323+
printf("(Parsed) Line %lld: ", (long long)line);
13221324

13231325
for (j = 0; j < self->line_fields[j]; ++j) {
13241326
token = self->words[j + self->line_start[line]];

0 commit comments

Comments
 (0)