Skip to content

Commit 0c27f40

Browse files
ginggsjreback
authored andcommitted
BUG: Switch more size_t references to int64_t (#20785) (#20786)
1 parent 5d0e815 commit 0c27f40

File tree

2 files changed

+18
-17
lines changed

2 files changed

+18
-17
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1147,6 +1147,7 @@ I/O
11471147
- Bug in :func:`read_msgpack` with a non existent file is passed in Python 2 (:issue:`15296`)
11481148
- Bug in :func:`read_csv` where a ``MultiIndex`` with duplicate columns was not being mangled appropriately (:issue:`18062`)
11491149
- Bug in :func:`read_csv` where missing values were not being handled properly when ``keep_default_na=False`` with dictionary ``na_values`` (:issue:`19227`)
1150+
- Bug in :func:`read_csv` causing heap corruption on 32-bit, big-endian architectures (:issue:`20785`)
11501151
- Bug in :func:`read_sas` where a file with 0 variables gave an ``AttributeError`` incorrectly. Now it gives an ``EmptyDataError`` (:issue:`18184`)
11511152
- Bug in :func:`DataFrame.to_latex()` where pairs of braces meant to serve as invisible placeholders were escaped (:issue:`18667`)
11521153
- Bug in :func:`read_json` where large numeric values were causing an ``OverflowError`` (:issue:`18842`)

pandas/_libs/src/parser/tokenizer.c

+17-17
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,9 @@ static void free_if_not_null(void **ptr) {
6969
7070
*/
7171

72-
static void *grow_buffer(void *buffer, size_t length, size_t *capacity,
73-
size_t space, size_t elsize, int *error) {
74-
size_t cap = *capacity;
72+
static void *grow_buffer(void *buffer, int64_t length, int64_t *capacity,
73+
int64_t space, int64_t elsize, int *error) {
74+
int64_t cap = *capacity;
7575
void *newbuffer = buffer;
7676

7777
// Can we fit potentially nbytes tokens (+ null terminators) in the stream?
@@ -169,7 +169,7 @@ int parser_cleanup(parser_t *self) {
169169
}
170170

171171
int parser_init(parser_t *self) {
172-
size_t sz;
172+
int64_t sz;
173173

174174
/*
175175
Initialize data buffers
@@ -353,7 +353,7 @@ static int push_char(parser_t *self, char c) {
353353
("push_char: ERROR!!! self->stream_len(%d) >= "
354354
"self->stream_cap(%d)\n",
355355
self->stream_len, self->stream_cap))
356-
size_t bufsize = 100;
356+
int64_t bufsize = 100;
357357
self->error_msg = (char *)malloc(bufsize);
358358
snprintf(self->error_msg, bufsize,
359359
"Buffer overflow caught - possible malformed input file.\n");
@@ -370,7 +370,7 @@ int P_INLINE end_field(parser_t *self) {
370370
("end_field: ERROR!!! self->words_len(%zu) >= "
371371
"self->words_cap(%zu)\n",
372372
self->words_len, self->words_cap))
373-
size_t bufsize = 100;
373+
int64_t bufsize = 100;
374374
self->error_msg = (char *)malloc(bufsize);
375375
snprintf(self->error_msg, bufsize,
376376
"Buffer overflow caught - possible malformed input file.\n");
@@ -402,8 +402,8 @@ int P_INLINE end_field(parser_t *self) {
402402
}
403403

404404
static void append_warning(parser_t *self, const char *msg) {
405-
size_t ex_length;
406-
size_t length = strlen(msg);
405+
int64_t ex_length;
406+
int64_t length = strlen(msg);
407407
void *newptr;
408408

409409
if (self->warn_msg == NULL) {
@@ -423,7 +423,7 @@ static int end_line(parser_t *self) {
423423
char *msg;
424424
int64_t fields;
425425
int ex_fields = self->expected_fields;
426-
size_t bufsize = 100; // for error or warning messages
426+
int64_t bufsize = 100; // for error or warning messages
427427

428428
fields = self->line_fields[self->lines];
429429

@@ -495,7 +495,7 @@ static int end_line(parser_t *self) {
495495
fields < ex_fields) {
496496
// might overrun the buffer when closing fields
497497
if (make_stream_space(self, ex_fields - fields) < 0) {
498-
size_t bufsize = 100;
498+
int64_t bufsize = 100;
499499
self->error_msg = (char *)malloc(bufsize);
500500
snprintf(self->error_msg, bufsize, "out of memory");
501501
return -1;
@@ -516,7 +516,7 @@ static int end_line(parser_t *self) {
516516
TRACE((
517517
"end_line: ERROR!!! self->lines(%zu) >= self->lines_cap(%zu)\n",
518518
self->lines, self->lines_cap))
519-
size_t bufsize = 100;
519+
int64_t bufsize = 100;
520520
self->error_msg = (char *)malloc(bufsize);
521521
snprintf(self->error_msg, bufsize,
522522
"Buffer overflow caught - "
@@ -577,7 +577,7 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes) {
577577
self->datalen = bytes_read;
578578

579579
if (status != REACHED_EOF && self->data == NULL) {
580-
size_t bufsize = 200;
580+
int64_t bufsize = 200;
581581
self->error_msg = (char *)malloc(bufsize);
582582

583583
if (status == CALLING_READ_FAILED) {
@@ -608,7 +608,7 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes) {
608608
if (slen >= self->stream_cap) { \
609609
TRACE(("PUSH_CHAR: ERROR!!! slen(%d) >= stream_cap(%d)\n", slen, \
610610
self->stream_cap)) \
611-
size_t bufsize = 100; \
611+
int64_t bufsize = 100; \
612612
self->error_msg = (char *)malloc(bufsize); \
613613
snprintf(self->error_msg, bufsize, \
614614
"Buffer overflow caught - possible malformed input file.\n");\
@@ -729,7 +729,7 @@ int tokenize_bytes(parser_t *self, size_t line_limit, int64_t start_lines) {
729729
char *buf = self->data + self->datapos;
730730

731731
if (make_stream_space(self, self->datalen - self->datapos) < 0) {
732-
size_t bufsize = 100;
732+
int64_t bufsize = 100;
733733
self->error_msg = (char *)malloc(bufsize);
734734
snprintf(self->error_msg, bufsize, "out of memory");
735735
return -1;
@@ -1036,7 +1036,7 @@ int tokenize_bytes(parser_t *self, size_t line_limit, int64_t start_lines) {
10361036
PUSH_CHAR(c);
10371037
self->state = IN_FIELD;
10381038
} else {
1039-
size_t bufsize = 100;
1039+
int64_t bufsize = 100;
10401040
self->error_msg = (char *)malloc(bufsize);
10411041
snprintf(self->error_msg, bufsize,
10421042
"delimiter expected after quote in quote");
@@ -1132,7 +1132,7 @@ int tokenize_bytes(parser_t *self, size_t line_limit, int64_t start_lines) {
11321132
}
11331133

11341134
static int parser_handle_eof(parser_t *self) {
1135-
size_t bufsize = 100;
1135+
int64_t bufsize = 100;
11361136

11371137
TRACE(
11381138
("handling eof, datalen: %d, pstate: %d\n", self->datalen, self->state))
@@ -1177,7 +1177,7 @@ static int parser_handle_eof(parser_t *self) {
11771177
}
11781178

11791179
int parser_consume_rows(parser_t *self, size_t nrows) {
1180-
size_t i, offset, word_deletions, char_count;
1180+
int64_t i, offset, word_deletions, char_count;
11811181

11821182
if (nrows > self->lines) {
11831183
nrows = self->lines;

0 commit comments

Comments
 (0)