Skip to content

Commit 2b9b58d

Browse files
jbrockmendelWillAyd
authored andcommitted
BLD: use unsigned instead of signed for lengths, avoid build warnings (#26759)
1 parent a14874f commit 2b9b58d

File tree

3 files changed

+37
-31
lines changed

3 files changed

+37
-31
lines changed

pandas/_libs/parsers.pyx

+9-9
Original file line numberDiff line numberDiff line change
@@ -119,24 +119,24 @@ cdef extern from "parser/tokenizer.h":
119119

120120
# where to write out tokenized data
121121
char *stream
122-
int64_t stream_len
123-
int64_t stream_cap
122+
uint64_t stream_len
123+
uint64_t stream_cap
124124

125125
# Store words in (potentially ragged) matrix for now, hmm
126126
char **words
127127
int64_t *word_starts # where we are in the stream
128-
int64_t words_len
129-
int64_t words_cap
130-
int64_t max_words_cap # maximum word cap encountered
128+
uint64_t words_len
129+
uint64_t words_cap
130+
uint64_t max_words_cap # maximum word cap encountered
131131

132132
char *pword_start # pointer to stream start of current field
133133
int64_t word_start # position start of current field
134134

135135
int64_t *line_start # position in words for start of line
136136
int64_t *line_fields # Number of fields in each line
137-
int64_t lines # Number of lines observed
138-
int64_t file_lines # Number of lines observed (with bad/skipped)
139-
int64_t lines_cap # Vector capacity
137+
uint64_t lines # Number of lines observed
138+
uint64_t file_lines # Number of lines observed (with bad/skipped)
139+
uint64_t lines_cap # Vector capacity
140140

141141
# Tokenizing stuff
142142
ParserState state
@@ -168,7 +168,7 @@ cdef extern from "parser/tokenizer.h":
168168

169169
int header # Boolean: 1: has header, 0: no header
170170
int64_t header_start # header row start
171-
int64_t header_end # header row end
171+
uint64_t header_end # header row end
172172

173173
void *skipset
174174
PyObject *skipfunc

pandas/_libs/src/parser/tokenizer.c

+19-13
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,9 @@ static void free_if_not_null(void **ptr) {
7171
7272
*/
7373

74-
static void *grow_buffer(void *buffer, int64_t length, int64_t *capacity,
74+
static void *grow_buffer(void *buffer, uint64_t length, uint64_t *capacity,
7575
int64_t space, int64_t elsize, int *error) {
76-
int64_t cap = *capacity;
76+
uint64_t cap = *capacity;
7777
void *newbuffer = buffer;
7878

7979
// Can we fit potentially nbytes tokens (+ null terminators) in the stream?
@@ -248,7 +248,7 @@ void parser_del(parser_t *self) {
248248
}
249249

250250
static int make_stream_space(parser_t *self, size_t nbytes) {
251-
int64_t i, cap, length;
251+
uint64_t i, cap, length;
252252
int status;
253253
void *orig_ptr, *newptr;
254254

@@ -263,7 +263,7 @@ static int make_stream_space(parser_t *self, size_t nbytes) {
263263
("\n\nmake_stream_space: nbytes = %zu. grow_buffer(self->stream...)\n",
264264
nbytes))
265265
self->stream = (char *)grow_buffer((void *)self->stream, self->stream_len,
266-
(int64_t*)&self->stream_cap, nbytes * 2,
266+
&self->stream_cap, nbytes * 2,
267267
sizeof(char), &status);
268268
TRACE(
269269
("make_stream_space: self->stream=%p, self->stream_len = %zu, "
@@ -305,7 +305,7 @@ static int make_stream_space(parser_t *self, size_t nbytes) {
305305

306306
self->words =
307307
(char **)grow_buffer((void *)self->words, length,
308-
(int64_t*)&self->words_cap, nbytes,
308+
&self->words_cap, nbytes,
309309
sizeof(char *), &status);
310310
TRACE(
311311
("make_stream_space: grow_buffer(self->self->words, %zu, %zu, %zu, "
@@ -336,7 +336,7 @@ static int make_stream_space(parser_t *self, size_t nbytes) {
336336
cap = self->lines_cap;
337337
self->line_start =
338338
(int64_t *)grow_buffer((void *)self->line_start, self->lines + 1,
339-
(int64_t*)&self->lines_cap, nbytes,
339+
&self->lines_cap, nbytes,
340340
sizeof(int64_t), &status);
341341
TRACE((
342342
"make_stream_space: grow_buffer(self->line_start, %zu, %zu, %zu, %d)\n",
@@ -471,7 +471,7 @@ static int end_line(parser_t *self) {
471471
return 0;
472472
}
473473

474-
if (!(self->lines <= (int64_t) self->header_end + 1) &&
474+
if (!(self->lines <= self->header_end + 1) &&
475475
(self->expected_fields < 0 && fields > ex_fields) && !(self->usecols)) {
476476
// increment file line count
477477
self->file_lines++;
@@ -507,7 +507,7 @@ static int end_line(parser_t *self) {
507507
}
508508
} else {
509509
// missing trailing delimiters
510-
if ((self->lines >= (int64_t) self->header_end + 1) &&
510+
if ((self->lines >= self->header_end + 1) &&
511511
fields < ex_fields) {
512512
// might overrun the buffer when closing fields
513513
if (make_stream_space(self, ex_fields - fields) < 0) {
@@ -651,7 +651,7 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes) {
651651
stream = self->stream + self->stream_len; \
652652
slen = self->stream_len; \
653653
self->state = STATE; \
654-
if (line_limit > 0 && self->lines == start_lines + (int64_t)line_limit) { \
654+
if (line_limit > 0 && self->lines == start_lines + line_limit) { \
655655
goto linelimit; \
656656
}
657657

@@ -666,7 +666,7 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes) {
666666
stream = self->stream + self->stream_len; \
667667
slen = self->stream_len; \
668668
self->state = STATE; \
669-
if (line_limit > 0 && self->lines == start_lines + (int64_t)line_limit) { \
669+
if (line_limit > 0 && self->lines == start_lines + line_limit) { \
670670
goto linelimit; \
671671
}
672672

@@ -737,7 +737,8 @@ int skip_this_line(parser_t *self, int64_t rownum) {
737737

738738
int tokenize_bytes(parser_t *self,
739739
size_t line_limit, int64_t start_lines) {
740-
int64_t i, slen;
740+
int64_t i;
741+
uint64_t slen;
741742
int should_skip;
742743
char c;
743744
char *stream;
@@ -1203,7 +1204,8 @@ static int parser_handle_eof(parser_t *self) {
12031204
}
12041205

12051206
int parser_consume_rows(parser_t *self, size_t nrows) {
1206-
int64_t i, offset, word_deletions, char_count;
1207+
int64_t offset, word_deletions;
1208+
uint64_t char_count, i;
12071209

12081210
if (nrows > self->lines) {
12091211
nrows = self->lines;
@@ -1229,6 +1231,8 @@ int parser_consume_rows(parser_t *self, size_t nrows) {
12291231
self->stream_len -= char_count;
12301232

12311233
/* move token metadata */
1234+
// Note: We should always have words_len < word_deletions, so this
1235+
// subtraction will remain appropriately-typed.
12321236
for (i = 0; i < self->words_len - word_deletions; ++i) {
12331237
offset = i + word_deletions;
12341238

@@ -1242,6 +1246,8 @@ int parser_consume_rows(parser_t *self, size_t nrows) {
12421246
self->word_start -= char_count;
12431247

12441248
/* move line metadata */
1249+
// Note: We should always have self->lines - nrows + 1 >= 0, so this
1250+
// subtraction will remain appropriately-typed.
12451251
for (i = 0; i < self->lines - nrows + 1; ++i) {
12461252
offset = i + nrows;
12471253
self->line_start[i] = self->line_start[offset] - word_deletions;
@@ -1265,7 +1271,7 @@ int parser_trim_buffers(parser_t *self) {
12651271
size_t new_cap;
12661272
void *newptr;
12671273

1268-
int64_t i;
1274+
uint64_t i;
12691275

12701276
/**
12711277
* Before we free up space and trim, we should

pandas/_libs/src/parser/tokenizer.h

+9-9
Original file line numberDiff line numberDiff line change
@@ -104,24 +104,24 @@ typedef struct parser_t {
104104

105105
// where to write out tokenized data
106106
char *stream;
107-
int64_t stream_len;
108-
int64_t stream_cap;
107+
uint64_t stream_len;
108+
uint64_t stream_cap;
109109

110110
// Store words in (potentially ragged) matrix for now, hmm
111111
char **words;
112112
int64_t *word_starts; // where we are in the stream
113-
int64_t words_len;
114-
int64_t words_cap;
115-
int64_t max_words_cap; // maximum word cap encountered
113+
uint64_t words_len;
114+
uint64_t words_cap;
115+
uint64_t max_words_cap; // maximum word cap encountered
116116

117117
char *pword_start; // pointer to stream start of current field
118118
int64_t word_start; // position start of current field
119119

120120
int64_t *line_start; // position in words for start of line
121121
int64_t *line_fields; // Number of fields in each line
122-
int64_t lines; // Number of (good) lines observed
123-
int64_t file_lines; // Number of lines (including bad or skipped)
124-
int64_t lines_cap; // Vector capacity
122+
uint64_t lines; // Number of (good) lines observed
123+
uint64_t file_lines; // Number of lines (including bad or skipped)
124+
uint64_t lines_cap; // Vector capacity
125125

126126
// Tokenizing stuff
127127
ParserState state;
@@ -153,7 +153,7 @@ typedef struct parser_t {
153153

154154
int header; // Boolean: 1: has header, 0: no header
155155
int64_t header_start; // header row start
156-
int64_t header_end; // header row end
156+
uint64_t header_end; // header row end
157157

158158
void *skipset;
159159
PyObject *skipfunc;

0 commit comments

Comments
 (0)