Skip to content

Commit 958e7f5

Browse files
authored
Clean up tokenizer / parser files (#56274)
* Clean up tokenizer / parser files * static_assert fix
1 parent fa07d05 commit 958e7f5

File tree

7 files changed

+139
-216
lines changed

7 files changed

+139
-216
lines changed

pandas/_libs/include/pandas/parser/io.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ typedef struct _rd_source {
2525

2626
void *new_rd_source(PyObject *obj);
2727

28-
int del_rd_source(void *src);
28+
void del_rd_source(void *src);
2929

30-
void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read,
30+
char *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read,
3131
int *status, const char *encoding_errors);

pandas/_libs/include/pandas/parser/pd_parser.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ typedef struct {
2020
int (*to_double)(char *, double *, char, char, int *);
2121
int (*floatify)(PyObject *, double *, int *);
2222
void *(*new_rd_source)(PyObject *);
23-
int (*del_rd_source)(void *);
24-
void *(*buffer_rd_bytes)(void *, size_t, size_t *, int *, const char *);
23+
void (*del_rd_source)(void *);
24+
char *(*buffer_rd_bytes)(void *, size_t, size_t *, int *, const char *);
2525
void (*uint_state_init)(uint_state *);
2626
int (*uint64_conflict)(uint_state *);
2727
void (*coliter_setup)(coliter_t *, parser_t *, int64_t, int64_t);
@@ -30,7 +30,7 @@ typedef struct {
3030
void (*parser_free)(parser_t *);
3131
void (*parser_del)(parser_t *);
3232
int (*parser_add_skiprow)(parser_t *, int64_t);
33-
int (*parser_set_skipfirstnrows)(parser_t *, int64_t);
33+
void (*parser_set_skipfirstnrows)(parser_t *, int64_t);
3434
void (*parser_set_default_options)(parser_t *);
3535
int (*parser_consume_rows)(parser_t *, size_t);
3636
int (*parser_trim_buffers)(parser_t *);

pandas/_libs/include/pandas/parser/tokenizer.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,9 @@ typedef enum {
8383

8484
typedef enum { ERROR, WARN, SKIP } BadLineHandleMethod;
8585

86-
typedef void *(*io_callback)(void *src, size_t nbytes, size_t *bytes_read,
86+
typedef char *(*io_callback)(void *src, size_t nbytes, size_t *bytes_read,
8787
int *status, const char *encoding_errors);
88-
typedef int (*io_cleanup)(void *src);
88+
typedef void (*io_cleanup)(void *src);
8989

9090
typedef struct parser_t {
9191
void *source;
@@ -186,7 +186,7 @@ int parser_trim_buffers(parser_t *self);
186186

187187
int parser_add_skiprow(parser_t *self, int64_t row);
188188

189-
int parser_set_skipfirstnrows(parser_t *self, int64_t nrows);
189+
void parser_set_skipfirstnrows(parser_t *self, int64_t nrows);
190190

191191
void parser_free(parser_t *self);
192192

pandas/_libs/parsers.pyx

+8-8
Original file line numberDiff line numberDiff line change
@@ -152,9 +152,9 @@ cdef extern from "pandas/parser/tokenizer.h":
152152
WARN,
153153
SKIP
154154

155-
ctypedef void* (*io_callback)(void *src, size_t nbytes, size_t *bytes_read,
155+
ctypedef char* (*io_callback)(void *src, size_t nbytes, size_t *bytes_read,
156156
int *status, const char *encoding_errors)
157-
ctypedef int (*io_cleanup)(void *src)
157+
ctypedef void (*io_cleanup)(void *src)
158158

159159
ctypedef struct parser_t:
160160
void *source
@@ -247,9 +247,9 @@ cdef extern from "pandas/parser/tokenizer.h":
247247
cdef extern from "pandas/parser/pd_parser.h":
248248
void *new_rd_source(object obj) except NULL
249249

250-
int del_rd_source(void *src)
250+
void del_rd_source(void *src)
251251

252-
void* buffer_rd_bytes(void *source, size_t nbytes,
252+
char* buffer_rd_bytes(void *source, size_t nbytes,
253253
size_t *bytes_read, int *status, const char *encoding_errors)
254254

255255
void uint_state_init(uint_state *self)
@@ -266,7 +266,7 @@ cdef extern from "pandas/parser/pd_parser.h":
266266
void parser_del(parser_t *self) nogil
267267
int parser_add_skiprow(parser_t *self, int64_t row)
268268

269-
int parser_set_skipfirstnrows(parser_t *self, int64_t nrows)
269+
void parser_set_skipfirstnrows(parser_t *self, int64_t nrows)
270270

271271
void parser_set_default_options(parser_t *self)
272272

@@ -318,13 +318,13 @@ cdef double round_trip_wrapper(const char *p, char **q, char decimal,
318318
return round_trip(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int)
319319

320320

321-
cdef void* buffer_rd_bytes_wrapper(void *source, size_t nbytes,
321+
cdef char* buffer_rd_bytes_wrapper(void *source, size_t nbytes,
322322
size_t *bytes_read, int *status,
323323
const char *encoding_errors) noexcept:
324324
return buffer_rd_bytes(source, nbytes, bytes_read, status, encoding_errors)
325325

326-
cdef int del_rd_source_wrapper(void *src) noexcept:
327-
return del_rd_source(src)
326+
cdef void del_rd_source_wrapper(void *src) noexcept:
327+
del_rd_source(src)
328328

329329

330330
cdef class TextReader:

pandas/_libs/src/parser/io.c

+9-17
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,10 @@ void *new_rd_source(PyObject *obj) {
3535
3636
*/
3737

38-
int del_rd_source(void *rds) {
38+
void del_rd_source(void *rds) {
3939
Py_XDECREF(RDS(rds)->obj);
4040
Py_XDECREF(RDS(rds)->buffer);
4141
free(rds);
42-
43-
return 0;
4442
}
4543

4644
/*
@@ -49,26 +47,20 @@ int del_rd_source(void *rds) {
4947
5048
*/
5149

52-
void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read,
50+
char *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read,
5351
int *status, const char *encoding_errors) {
54-
PyGILState_STATE state;
55-
PyObject *result, *func, *args, *tmp;
56-
57-
void *retval;
58-
59-
size_t length;
6052
rd_source *src = RDS(source);
61-
state = PyGILState_Ensure();
53+
PyGILState_STATE state = PyGILState_Ensure();
6254

6355
/* delete old object */
6456
Py_XDECREF(src->buffer);
6557
src->buffer = NULL;
66-
args = Py_BuildValue("(i)", nbytes);
58+
PyObject *args = Py_BuildValue("(i)", nbytes);
6759

68-
func = PyObject_GetAttrString(src->obj, "read");
60+
PyObject *func = PyObject_GetAttrString(src->obj, "read");
6961

7062
/* Note: PyObject_CallObject requires the GIL */
71-
result = PyObject_CallObject(func, args);
63+
PyObject *result = PyObject_CallObject(func, args);
7264
Py_XDECREF(args);
7365
Py_XDECREF(func);
7466

@@ -78,7 +70,7 @@ void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read,
7870
*status = CALLING_READ_FAILED;
7971
return NULL;
8072
} else if (!PyBytes_Check(result)) {
81-
tmp = PyUnicode_AsEncodedString(result, "utf-8", encoding_errors);
73+
PyObject *tmp = PyUnicode_AsEncodedString(result, "utf-8", encoding_errors);
8274
Py_DECREF(result);
8375
if (tmp == NULL) {
8476
PyGILState_Release(state);
@@ -87,7 +79,7 @@ void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read,
8779
result = tmp;
8880
}
8981

90-
length = PySequence_Length(result);
82+
const size_t length = PySequence_Length(result);
9183

9284
if (length == 0)
9385
*status = REACHED_EOF;
@@ -96,7 +88,7 @@ void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read,
9688

9789
/* hang on to the Python object */
9890
src->buffer = result;
99-
retval = (void *)PyBytes_AsString(result);
91+
char *retval = PyBytes_AsString(result);
10092

10193
PyGILState_Release(state);
10294

pandas/_libs/src/parser/pd_parser.c

+1-2
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ static int to_double(char *item, double *p_value, char sci, char decimal,
2424
}
2525

2626
static int floatify(PyObject *str, double *result, int *maybe_int) {
27-
int status;
2827
char *data;
2928
PyObject *tmp = NULL;
3029
const char sci = 'E';
@@ -43,7 +42,7 @@ static int floatify(PyObject *str, double *result, int *maybe_int) {
4342
return -1;
4443
}
4544

46-
status = to_double(data, result, sci, dec, maybe_int);
45+
const int status = to_double(data, result, sci, dec, maybe_int);
4746

4847
if (!status) {
4948
/* handle inf/-inf infinity/-infinity */

0 commit comments

Comments
 (0)