Skip to content

Clean up tokenizer / parser files #56274

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pandas/_libs/include/pandas/parser/io.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ typedef struct _rd_source {

void *new_rd_source(PyObject *obj);

int del_rd_source(void *src);
void del_rd_source(void *src);

void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read,
char *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read,
int *status, const char *encoding_errors);
6 changes: 3 additions & 3 deletions pandas/_libs/include/pandas/parser/pd_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ typedef struct {
int (*to_double)(char *, double *, char, char, int *);
int (*floatify)(PyObject *, double *, int *);
void *(*new_rd_source)(PyObject *);
int (*del_rd_source)(void *);
void *(*buffer_rd_bytes)(void *, size_t, size_t *, int *, const char *);
void (*del_rd_source)(void *);
char *(*buffer_rd_bytes)(void *, size_t, size_t *, int *, const char *);
void (*uint_state_init)(uint_state *);
int (*uint64_conflict)(uint_state *);
void (*coliter_setup)(coliter_t *, parser_t *, int64_t, int64_t);
Expand All @@ -30,7 +30,7 @@ typedef struct {
void (*parser_free)(parser_t *);
void (*parser_del)(parser_t *);
int (*parser_add_skiprow)(parser_t *, int64_t);
int (*parser_set_skipfirstnrows)(parser_t *, int64_t);
void (*parser_set_skipfirstnrows)(parser_t *, int64_t);
void (*parser_set_default_options)(parser_t *);
int (*parser_consume_rows)(parser_t *, size_t);
int (*parser_trim_buffers)(parser_t *);
Expand Down
6 changes: 3 additions & 3 deletions pandas/_libs/include/pandas/parser/tokenizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,9 @@ typedef enum {

typedef enum { ERROR, WARN, SKIP } BadLineHandleMethod;

typedef void *(*io_callback)(void *src, size_t nbytes, size_t *bytes_read,
typedef char *(*io_callback)(void *src, size_t nbytes, size_t *bytes_read,
int *status, const char *encoding_errors);
typedef int (*io_cleanup)(void *src);
typedef void (*io_cleanup)(void *src);

typedef struct parser_t {
void *source;
Expand Down Expand Up @@ -187,7 +187,7 @@ int parser_trim_buffers(parser_t *self);

int parser_add_skiprow(parser_t *self, int64_t row);

int parser_set_skipfirstnrows(parser_t *self, int64_t nrows);
void parser_set_skipfirstnrows(parser_t *self, int64_t nrows);

void parser_free(parser_t *self);

Expand Down
16 changes: 8 additions & 8 deletions pandas/_libs/parsers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,9 @@ cdef extern from "pandas/parser/tokenizer.h":
WARN,
SKIP

ctypedef void* (*io_callback)(void *src, size_t nbytes, size_t *bytes_read,
ctypedef char* (*io_callback)(void *src, size_t nbytes, size_t *bytes_read,
int *status, const char *encoding_errors)
ctypedef int (*io_cleanup)(void *src)
ctypedef void (*io_cleanup)(void *src)

ctypedef struct parser_t:
void *source
Expand Down Expand Up @@ -247,9 +247,9 @@ cdef extern from "pandas/parser/tokenizer.h":
cdef extern from "pandas/parser/pd_parser.h":
void *new_rd_source(object obj) except NULL

int del_rd_source(void *src)
void del_rd_source(void *src)

void* buffer_rd_bytes(void *source, size_t nbytes,
char* buffer_rd_bytes(void *source, size_t nbytes,
size_t *bytes_read, int *status, const char *encoding_errors)

void uint_state_init(uint_state *self)
Expand All @@ -266,7 +266,7 @@ cdef extern from "pandas/parser/pd_parser.h":
void parser_del(parser_t *self) nogil
int parser_add_skiprow(parser_t *self, int64_t row)

int parser_set_skipfirstnrows(parser_t *self, int64_t nrows)
void parser_set_skipfirstnrows(parser_t *self, int64_t nrows)

void parser_set_default_options(parser_t *self)

Expand Down Expand Up @@ -318,13 +318,13 @@ cdef double round_trip_wrapper(const char *p, char **q, char decimal,
return round_trip(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int)


cdef void* buffer_rd_bytes_wrapper(void *source, size_t nbytes,
cdef char* buffer_rd_bytes_wrapper(void *source, size_t nbytes,
size_t *bytes_read, int *status,
const char *encoding_errors) noexcept:
return buffer_rd_bytes(source, nbytes, bytes_read, status, encoding_errors)

cdef int del_rd_source_wrapper(void *src) noexcept:
return del_rd_source(src)
cdef void del_rd_source_wrapper(void *src) noexcept:
del_rd_source(src)


cdef class TextReader:
Expand Down
26 changes: 9 additions & 17 deletions pandas/_libs/src/parser/io.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,10 @@ void *new_rd_source(PyObject *obj) {

*/

int del_rd_source(void *rds) {
void del_rd_source(void *rds) {
Py_XDECREF(RDS(rds)->obj);
Py_XDECREF(RDS(rds)->buffer);
free(rds);

return 0;
}

/*
Expand All @@ -49,26 +47,20 @@ int del_rd_source(void *rds) {

*/

void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read,
char *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read,
int *status, const char *encoding_errors) {
PyGILState_STATE state;
PyObject *result, *func, *args, *tmp;

void *retval;

size_t length;
rd_source *src = RDS(source);
state = PyGILState_Ensure();
PyGILState_STATE state = PyGILState_Ensure();

/* delete old object */
Py_XDECREF(src->buffer);
src->buffer = NULL;
args = Py_BuildValue("(i)", nbytes);
PyObject *args = Py_BuildValue("(i)", nbytes);

func = PyObject_GetAttrString(src->obj, "read");
PyObject *func = PyObject_GetAttrString(src->obj, "read");

/* Note: PyObject_CallObject requires the GIL */
result = PyObject_CallObject(func, args);
PyObject *result = PyObject_CallObject(func, args);
Py_XDECREF(args);
Py_XDECREF(func);

Expand All @@ -78,7 +70,7 @@ void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read,
*status = CALLING_READ_FAILED;
return NULL;
} else if (!PyBytes_Check(result)) {
tmp = PyUnicode_AsEncodedString(result, "utf-8", encoding_errors);
PyObject *tmp = PyUnicode_AsEncodedString(result, "utf-8", encoding_errors);
Py_DECREF(result);
if (tmp == NULL) {
PyGILState_Release(state);
Expand All @@ -87,7 +79,7 @@ void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read,
result = tmp;
}

length = PySequence_Length(result);
const size_t length = PySequence_Length(result);

if (length == 0)
*status = REACHED_EOF;
Expand All @@ -96,7 +88,7 @@ void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read,

/* hang on to the Python object */
src->buffer = result;
retval = (void *)PyBytes_AsString(result);
char *retval = PyBytes_AsString(result);

PyGILState_Release(state);

Expand Down
3 changes: 1 addition & 2 deletions pandas/_libs/src/parser/pd_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ static int to_double(char *item, double *p_value, char sci, char decimal,
}

static int floatify(PyObject *str, double *result, int *maybe_int) {
int status;
char *data;
PyObject *tmp = NULL;
const char sci = 'E';
Expand All @@ -43,7 +42,7 @@ static int floatify(PyObject *str, double *result, int *maybe_int) {
return -1;
}

status = to_double(data, result, sci, dec, maybe_int);
const int status = to_double(data, result, sci, dec, maybe_int);

if (!status) {
/* handle inf/-inf infinity/-infinity */
Expand Down
Loading