Skip to content

Remove unnecessary casts in tokenizer.c #37792

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 13, 2020
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 38 additions & 43 deletions pandas/_libs/src/parser/tokenizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ int parser_init(parser_t *self) {
self->warn_msg = NULL;

// token stream
self->stream = (char *)malloc(STREAM_INIT_SIZE * sizeof(char));
self->stream = malloc(STREAM_INIT_SIZE * sizeof(char));
if (self->stream == NULL) {
parser_cleanup(self);
return PARSER_OUT_OF_MEMORY;
Expand All @@ -170,16 +170,16 @@ int parser_init(parser_t *self) {
// word pointers and metadata
sz = STREAM_INIT_SIZE / 10;
sz = sz ? sz : 1;
self->words = (char **)malloc(sz * sizeof(char *));
self->word_starts = (int64_t *)malloc(sz * sizeof(int64_t));
self->words = malloc(sz * sizeof(char *));
self->word_starts = malloc(sz * sizeof(int64_t));
self->max_words_cap = sz;
self->words_cap = sz;
self->words_len = 0;

// line pointers and metadata
self->line_start = (int64_t *)malloc(sz * sizeof(int64_t));
self->line_start = malloc(sz * sizeof(int64_t));

self->line_fields = (int64_t *)malloc(sz * sizeof(int64_t));
self->line_fields = malloc(sz * sizeof(int64_t));

self->lines_cap = sz;
self->lines = 0;
Expand Down Expand Up @@ -345,7 +345,7 @@ static int push_char(parser_t *self, char c) {
"self->stream_cap(%d)\n",
self->stream_len, self->stream_cap))
int64_t bufsize = 100;
self->error_msg = (char *)malloc(bufsize);
self->error_msg = malloc(bufsize);
snprintf(self->error_msg, bufsize,
"Buffer overflow caught - possible malformed input file.\n");
return PARSER_OUT_OF_MEMORY;
Expand All @@ -362,7 +362,7 @@ int PANDAS_INLINE end_field(parser_t *self) {
"self->words_cap(%zu)\n",
self->words_len, self->words_cap))
int64_t bufsize = 100;
self->error_msg = (char *)malloc(bufsize);
self->error_msg = malloc(bufsize);
snprintf(self->error_msg, bufsize,
"Buffer overflow caught - possible malformed input file.\n");
return PARSER_OUT_OF_MEMORY;
Expand Down Expand Up @@ -398,7 +398,7 @@ static void append_warning(parser_t *self, const char *msg) {
void *newptr;

if (self->warn_msg == NULL) {
self->warn_msg = (char *)malloc(length + 1);
self->warn_msg = malloc(length + 1);
snprintf(self->warn_msg, length + 1, "%s", msg);
} else {
ex_length = strlen(self->warn_msg);
Expand Down Expand Up @@ -459,10 +459,10 @@ static int end_line(parser_t *self) {

// file_lines is now the actual file line number (starting at 1)
if (self->error_bad_lines) {
self->error_msg = (char *)malloc(bufsize);
self->error_msg = malloc(bufsize);
snprintf(self->error_msg, bufsize,
"Expected %d fields in line %lld, saw %lld\n",
ex_fields, (long long)self->file_lines, (long long)fields);
"Expected %d fields in line %" PRIu64 ", saw %" PRId64 "\n",
ex_fields, self->file_lines, fields);

TRACE(("Error at line %d, %d fields\n", self->file_lines, fields));

Expand All @@ -471,11 +471,10 @@ static int end_line(parser_t *self) {
// simply skip bad lines
if (self->warn_bad_lines) {
// pass up error message
msg = (char *)malloc(bufsize);
msg = malloc(bufsize);
snprintf(msg, bufsize,
"Skipping line %lld: expected %d fields, saw %lld\n",
(long long)self->file_lines, ex_fields,
(long long)fields);
"Skipping line %" PRIu64 ": expected %d fields, saw %"
PRId64 "\n", self->file_lines, ex_fields, fields);
append_warning(self, msg);
free(msg);
}
Expand All @@ -487,7 +486,7 @@ static int end_line(parser_t *self) {
// might overrun the buffer when closing fields
if (make_stream_space(self, ex_fields - fields) < 0) {
int64_t bufsize = 100;
self->error_msg = (char *)malloc(bufsize);
self->error_msg = malloc(bufsize);
snprintf(self->error_msg, bufsize, "out of memory");
return -1;
}
Expand All @@ -508,7 +507,7 @@ static int end_line(parser_t *self) {
"end_line: ERROR!!! self->lines(%zu) >= self->lines_cap(%zu)\n",
self->lines, self->lines_cap))
int64_t bufsize = 100;
self->error_msg = (char *)malloc(bufsize);
self->error_msg = malloc(bufsize);
snprintf(self->error_msg, bufsize,
"Buffer overflow caught - "
"possible malformed input file.\n");
Expand Down Expand Up @@ -569,7 +568,7 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes) {

if (status != REACHED_EOF && self->data == NULL) {
int64_t bufsize = 200;
self->error_msg = (char *)malloc(bufsize);
self->error_msg = malloc(bufsize);

if (status == CALLING_READ_FAILED) {
snprintf(self->error_msg, bufsize,
Expand Down Expand Up @@ -600,7 +599,7 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes) {
TRACE(("PUSH_CHAR: ERROR!!! slen(%d) >= stream_cap(%d)\n", slen, \
self->stream_cap)) \
int64_t bufsize = 100; \
self->error_msg = (char *)malloc(bufsize); \
self->error_msg = malloc(bufsize); \
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

extra whitespace

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is by design to keep all of the slashes right aligned

snprintf(self->error_msg, bufsize, \
"Buffer overflow caught - possible malformed input file.\n");\
return PARSER_OUT_OF_MEMORY; \
Expand Down Expand Up @@ -730,7 +729,7 @@ int tokenize_bytes(parser_t *self,

if (make_stream_space(self, self->datalen - self->datapos) < 0) {
int64_t bufsize = 100;
self->error_msg = (char *)malloc(bufsize);
self->error_msg = malloc(bufsize);
snprintf(self->error_msg, bufsize, "out of memory");
return -1;
}
Expand Down Expand Up @@ -1037,7 +1036,7 @@ int tokenize_bytes(parser_t *self,
self->state = IN_FIELD;
} else {
int64_t bufsize = 100;
self->error_msg = (char *)malloc(bufsize);
self->error_msg = malloc(bufsize);
snprintf(self->error_msg, bufsize,
"delimiter expected after quote in quote");
goto parsingerror;
Expand Down Expand Up @@ -1150,8 +1149,8 @@ static int parser_handle_eof(parser_t *self) {
case IN_QUOTED_FIELD:
self->error_msg = (char *)malloc(bufsize);
snprintf(self->error_msg, bufsize,
"EOF inside string starting at row %lld",
(long long)self->file_lines);
"EOF inside string starting at row %" PRIu64,
self->file_lines);
return -1;

case ESCAPED_CHAR:
Expand Down Expand Up @@ -1203,7 +1202,7 @@ int parser_consume_rows(parser_t *self, size_t nrows) {

/* move stream, only if something to move */
if (char_count < self->stream_len) {
memmove((void *)self->stream, (void *)(self->stream + char_count),
memmove(self->stream, (self->stream + char_count),
self->stream_len - char_count);
}
/* buffer counts */
Expand Down Expand Up @@ -1269,20 +1268,16 @@ int parser_trim_buffers(parser_t *self) {
new_cap = _next_pow2(self->words_len) + 1;
if (new_cap < self->words_cap) {
TRACE(("parser_trim_buffers: new_cap < self->words_cap\n"));
newptr = realloc((void *)self->words, new_cap * sizeof(char *));
if (newptr == NULL) {
self->words = realloc(self->words, new_cap * sizeof(char *));
if (self->words == NULL) {
return PARSER_OUT_OF_MEMORY;
} else {
self->words = (char **)newptr;
}
newptr = realloc((void *)self->word_starts,
new_cap * sizeof(int64_t));
if (newptr == NULL) {
self->word_starts = realloc(self->word_starts,
new_cap * sizeof(int64_t));
if (self->word_starts == NULL) {
return PARSER_OUT_OF_MEMORY;
} else {
self->word_starts = (int64_t *)newptr;
self->words_cap = new_cap;
}
self->words_cap = new_cap;
}

/* trim stream */
Expand All @@ -1295,7 +1290,7 @@ int parser_trim_buffers(parser_t *self) {
TRACE(
("parser_trim_buffers: new_cap < self->stream_cap, calling "
"realloc\n"));
newptr = realloc((void *)self->stream, new_cap);
newptr = realloc(self->stream, new_cap);
if (newptr == NULL) {
return PARSER_OUT_OF_MEMORY;
} else {
Expand All @@ -1321,19 +1316,19 @@ int parser_trim_buffers(parser_t *self) {
new_cap = _next_pow2(self->lines) + 1;
if (new_cap < self->lines_cap) {
TRACE(("parser_trim_buffers: new_cap < self->lines_cap\n"));
newptr = realloc((void *)self->line_start,
newptr = realloc(self->line_start,
new_cap * sizeof(int64_t));
if (newptr == NULL) {
return PARSER_OUT_OF_MEMORY;
} else {
self->line_start = (int64_t *)newptr;
self->line_start = newptr;
}
newptr = realloc((void *)self->line_fields,
newptr = realloc(self->line_fields,
new_cap * sizeof(int64_t));
if (newptr == NULL) {
return PARSER_OUT_OF_MEMORY;
} else {
self->line_fields = (int64_t *)newptr;
self->line_fields = newptr;
self->lines_cap = new_cap;
}
}
Expand Down Expand Up @@ -1828,14 +1823,14 @@ double round_trip(const char *p, char **q, char decimal, char sci, char tsep,
if (endpc == pc + strlen(pc)) {
if (q != NULL) {
// report endptr from source string (p)
*q = (char *) endptr;
*q = endptr;
}
} else {
*error = -1;
if (q != NULL) {
// p and pc are different len due to tsep removal. Can't report
// how much it has consumed of p. Just rewind to beginning.
*q = (char *)p;
*q = (char *)p; // TODO: this could be undefined behavior
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We probably just need better types here. I don't think q is ever actually mutated, but if it is this would be undefined behavior as the contents of p are qualified as const

}
}
if (maybe_int != NULL) *maybe_int = 0;
Expand Down Expand Up @@ -1863,7 +1858,7 @@ int uint64_conflict(uint_state *self) {

int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
int *error, char tsep) {
const char *p = (const char *)p_item;
const char *p = p_item;
int isneg = 0;
int64_t number = 0;
int d;
Expand Down Expand Up @@ -1983,7 +1978,7 @@ int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,

uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max,
uint64_t uint_max, int *error, char tsep) {
const char *p = (const char *)p_item;
const char *p = p_item;
uint64_t pre_max = uint_max / 10;
int dig_pre_max = uint_max % 10;
uint64_t number = 0;
Expand Down