diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index baf5633db0cb3..a11bf370412d2 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -284,21 +284,10 @@ cdef extern from "parser/tokenizer.h": cdef extern from "parser/io.h": - void *new_mmap(char *fname) - int del_mmap(void *src) - void* buffer_mmap_bytes(void *source, size_t nbytes, - size_t *bytes_read, int *status) - - void *new_file_source(char *fname, size_t buffer_size) except NULL - void *new_rd_source(object obj) except NULL - int del_file_source(void *src) int del_rd_source(void *src) - void* buffer_file_bytes(void *source, size_t nbytes, - size_t *bytes_read, int *status) - void* buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read, int *status, const char *encoding_errors) diff --git a/pandas/_libs/src/parser/io.c b/pandas/_libs/src/parser/io.c index 449f0b55bff70..2ed0cef3cdc58 100644 --- a/pandas/_libs/src/parser/io.c +++ b/pandas/_libs/src/parser/io.c @@ -9,83 +9,10 @@ The full license is in the LICENSE file, distributed with this software. #include "io.h" -#include -#include - -#ifndef O_BINARY -#define O_BINARY 0 -#endif // O_BINARY - -#ifdef _WIN32 -#define USE_WIN_UTF16 -#include -#endif - /* On-disk FILE, uncompressed */ -void *new_file_source(char *fname, size_t buffer_size) { - file_source *fs = (file_source *)malloc(sizeof(file_source)); - if (fs == NULL) { - PyErr_NoMemory(); - return NULL; - } - -#ifdef USE_WIN_UTF16 - // Fix gh-15086 properly - convert UTF8 to UTF16 that Windows widechar API - // accepts. This is needed because UTF8 might _not_ be convertible to MBCS - // for some conditions, as MBCS is locale-dependent, and not all unicode - // symbols can be expressed in it. - { - wchar_t* wname = NULL; - int required = MultiByteToWideChar(CP_UTF8, 0, fname, -1, NULL, 0); - if (required == 0) { - free(fs); - PyErr_SetFromWindowsErr(0); - return NULL; - } - wname = (wchar_t*)malloc(required * sizeof(wchar_t)); - if (wname == NULL) { - free(fs); - PyErr_NoMemory(); - return NULL; - } - if (MultiByteToWideChar(CP_UTF8, 0, fname, -1, wname, required) < - required) { - free(wname); - free(fs); - PyErr_SetFromWindowsErr(0); - return NULL; - } - fs->fd = _wopen(wname, O_RDONLY | O_BINARY); - free(wname); - } -#else - fs->fd = open(fname, O_RDONLY | O_BINARY); -#endif - if (fs->fd == -1) { - free(fs); - PyErr_SetFromErrnoWithFilename(PyExc_OSError, fname); - return NULL; - } - - // Only allocate this heap memory if we are not memory-mapping the file - fs->buffer = (char *)malloc((buffer_size + 1) * sizeof(char)); - - if (fs->buffer == NULL) { - close(fs->fd); - free(fs); - PyErr_NoMemory(); - return NULL; - } - - memset(fs->buffer, '\0', buffer_size + 1); - fs->size = buffer_size; - - return (void *)fs; -} - void *new_rd_source(PyObject *obj) { rd_source *rds = (rd_source *)malloc(sizeof(rd_source)); @@ -108,17 +35,6 @@ void *new_rd_source(PyObject *obj) { */ -int del_file_source(void *ptr) { - file_source *fs = ptr; - if (fs == NULL) return 0; - - free(fs->buffer); - close(fs->fd); - free(fs); - - return 0; -} - int del_rd_source(void *rds) { Py_XDECREF(RDS(rds)->obj); Py_XDECREF(RDS(rds)->buffer); @@ -133,35 +49,6 @@ int del_rd_source(void *rds) { */ -void *buffer_file_bytes(void *source, size_t nbytes, size_t *bytes_read, - int *status) { - file_source *fs = FS(source); - ssize_t rv; - - if (nbytes > fs->size) { - nbytes = fs->size; - } - - rv = read(fs->fd, fs->buffer, nbytes); - switch (rv) { - case -1: - *status = CALLING_READ_FAILED; - *bytes_read = 0; - return NULL; - case 0: - *status = REACHED_EOF; - *bytes_read = 0; - return NULL; - default: - *status = 0; - *bytes_read = rv; - fs->buffer[rv] = '\0'; - break; - } - - return (void *)fs->buffer; -} - void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read, int *status, const char *encoding_errors) { PyGILState_STATE state; @@ -218,98 +105,3 @@ void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read, return retval; } - -#ifdef HAVE_MMAP - -#include - -void *new_mmap(char *fname) { - memory_map *mm; - struct stat stat; - size_t filesize; - - mm = (memory_map *)malloc(sizeof(memory_map)); - if (mm == NULL) { - return NULL; - } - mm->fd = open(fname, O_RDONLY | O_BINARY); - if (mm->fd == -1) { - free(mm); - return NULL; - } - - if (fstat(mm->fd, &stat) == -1) { - close(mm->fd); - free(mm); - return NULL; - } - filesize = stat.st_size; /* XXX This might be 32 bits. */ - - mm->memmap = mmap(NULL, filesize, PROT_READ, MAP_SHARED, mm->fd, 0); - if (mm->memmap == MAP_FAILED) { - close(mm->fd); - free(mm); - return NULL; - } - - mm->size = (off_t)filesize; - mm->position = 0; - - return mm; -} - -int del_mmap(void *ptr) { - memory_map *mm = ptr; - - if (mm == NULL) return 0; - - munmap(mm->memmap, mm->size); - close(mm->fd); - free(mm); - - return 0; -} - -void *buffer_mmap_bytes(void *source, size_t nbytes, size_t *bytes_read, - int *status) { - void *retval; - memory_map *src = source; - size_t remaining = src->size - src->position; - - if (remaining == 0) { - *bytes_read = 0; - *status = REACHED_EOF; - return NULL; - } - - if (nbytes > remaining) { - nbytes = remaining; - } - - retval = src->memmap + src->position; - - /* advance position in mmap data structure */ - src->position += nbytes; - - *bytes_read = nbytes; - *status = 0; - - return retval; -} - -#else - -/* kludgy */ - -void *new_mmap(char *fname) { return NULL; } - -int del_mmap(void *src) { return 0; } - -/* don't use this! */ - -void *buffer_mmap_bytes(void *source, size_t nbytes, size_t *bytes_read, - int *status) { - return NULL; -} - -#endif // HAVE_MMAP diff --git a/pandas/_libs/src/parser/io.h b/pandas/_libs/src/parser/io.h index dbe757b458c54..f0e8b01855304 100644 --- a/pandas/_libs/src/parser/io.h +++ b/pandas/_libs/src/parser/io.h @@ -14,37 +14,8 @@ The full license is in the LICENSE file, distributed with this software. #include #include "tokenizer.h" -typedef struct _file_source { - /* The file being read. */ - int fd; - - char *buffer; - size_t size; -} file_source; - #define FS(source) ((file_source *)source) -#if !defined(_WIN32) && !defined(HAVE_MMAP) -#define HAVE_MMAP -#endif // HAVE_MMAP - -typedef struct _memory_map { - int fd; - - /* Size of the file, in bytes. */ - char *memmap; - size_t size; - - size_t position; -} memory_map; - -void *new_mmap(char *fname); - -int del_mmap(void *src); - -void *buffer_mmap_bytes(void *source, size_t nbytes, size_t *bytes_read, - int *status); - typedef struct _rd_source { PyObject *obj; PyObject *buffer; @@ -53,16 +24,10 @@ typedef struct _rd_source { #define RDS(source) ((rd_source *)source) -void *new_file_source(char *fname, size_t buffer_size); - void *new_rd_source(PyObject *obj); -int del_file_source(void *src); int del_rd_source(void *src); -void *buffer_file_bytes(void *source, size_t nbytes, size_t *bytes_read, - int *status); - void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read, int *status, const char *encoding_errors);