Skip to content

BUG: read_csv: fix wrong exception on permissions issue #32737

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,7 @@ I/O
- Bug in :meth:`read_excel` where a UTF-8 string with a high surrogate would cause a segmentation violation (:issue:`23809`)
- Bug in :meth:`read_csv` was causing a file descriptor leak on an empty file (:issue:`31488`)
- Bug in :meth:`read_csv` was causing a segfault when there were blank lines between the header and data rows (:issue:`28071`)
- Bug in :meth:`read_csv` was raising a misleading exception on permission issue (:issue:`23784`)


Plotting
Expand Down
18 changes: 2 additions & 16 deletions pandas/_libs/parsers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -241,9 +241,9 @@ cdef extern from "parser/io.h":
void* buffer_mmap_bytes(void *source, size_t nbytes,
size_t *bytes_read, int *status)

void *new_file_source(char *fname, size_t buffer_size)
void *new_file_source(char *fname, size_t buffer_size) except NULL

void *new_rd_source(object obj)
void *new_rd_source(object obj) except NULL

int del_file_source(void *src)
int del_rd_source(void *src)
Expand Down Expand Up @@ -667,26 +667,12 @@ cdef class TextReader:
ptr = new_file_source(source, self.parser.chunksize)
self.parser.cb_io = &buffer_file_bytes
self.parser.cb_cleanup = &del_file_source

if ptr == NULL:
if not os.path.exists(source):

raise FileNotFoundError(
ENOENT,
f'File {usource} does not exist',
usource)
raise IOError('Initializing from file failed')

self.parser.source = ptr

elif hasattr(source, 'read'):
# e.g., StringIO

ptr = new_rd_source(source)
if ptr == NULL:
raise IOError('Initializing parser from file-like '
'object failed')

self.parser.source = ptr
self.parser.cb_io = &buffer_rd_bytes
self.parser.cb_cleanup = &del_rd_source
Expand Down
19 changes: 11 additions & 8 deletions pandas/_libs/src/parser/io.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ The full license is in the LICENSE file, distributed with this software.
void *new_file_source(char *fname, size_t buffer_size) {
file_source *fs = (file_source *)malloc(sizeof(file_source));
if (fs == NULL) {
PyErr_NoMemory();
return NULL;
}

Expand All @@ -41,17 +42,20 @@ void *new_file_source(char *fname, size_t buffer_size) {
int required = MultiByteToWideChar(CP_UTF8, 0, fname, -1, NULL, 0);
if (required == 0) {
free(fs);
PyErr_SetFromWindowsErr(0);
return NULL;
}
wname = (wchar_t*)malloc(required * sizeof(wchar_t));
if (wname == NULL) {
free(fs);
PyErr_NoMemory();
return NULL;
}
if (MultiByteToWideChar(CP_UTF8, 0, fname, -1, wname, required) <
required) {
free(wname);
free(fs);
PyErr_SetFromWindowsErr(0);
return NULL;
}
fs->fd = _wopen(wname, O_RDONLY | O_BINARY);
Expand All @@ -62,6 +66,7 @@ void *new_file_source(char *fname, size_t buffer_size) {
#endif
if (fs->fd == -1) {
free(fs);
PyErr_SetFromErrnoWithFilename(PyExc_OSError, fname);
return NULL;
}

Expand All @@ -71,6 +76,7 @@ void *new_file_source(char *fname, size_t buffer_size) {
if (fs->buffer == NULL) {
close(fs->fd);
free(fs);
PyErr_NoMemory();
return NULL;
}

Expand All @@ -83,6 +89,10 @@ void *new_file_source(char *fname, size_t buffer_size) {
void *new_rd_source(PyObject *obj) {
rd_source *rds = (rd_source *)malloc(sizeof(rd_source));

if (rds == NULL) {
PyErr_NoMemory();
return NULL;
}
/* hold on to this object */
Py_INCREF(obj);
rds->obj = obj;
Expand Down Expand Up @@ -220,20 +230,15 @@ void *new_mmap(char *fname) {

mm = (memory_map *)malloc(sizeof(memory_map));
if (mm == NULL) {
fprintf(stderr, "new_file_buffer: malloc() failed.\n");
return (NULL);
return NULL;
}
mm->fd = open(fname, O_RDONLY | O_BINARY);
if (mm->fd == -1) {
fprintf(stderr, "new_file_buffer: open(%s) failed. errno =%d\n",
fname, errno);
free(mm);
return NULL;
}

if (fstat(mm->fd, &stat) == -1) {
fprintf(stderr, "new_file_buffer: fstat() failed. errno =%d\n",
errno);
close(mm->fd);
free(mm);
return NULL;
Expand All @@ -242,8 +247,6 @@ void *new_mmap(char *fname) {

mm->memmap = mmap(NULL, filesize, PROT_READ, MAP_SHARED, mm->fd, 0);
if (mm->memmap == MAP_FAILED) {
/* XXX Eventually remove this print statement. */
fprintf(stderr, "new_file_buffer: mmap() failed.\n");
close(mm->fd);
free(mm);
return NULL;
Expand Down
16 changes: 13 additions & 3 deletions pandas/tests/io/parser/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -960,13 +960,23 @@ def test_nonexistent_path(all_parsers):
parser = all_parsers
path = f"{tm.rands(10)}.csv"

msg = f"File {path} does not exist" if parser.engine == "c" else r"\[Errno 2\]"
msg = r"\[Errno 2\]"
with pytest.raises(FileNotFoundError, match=msg) as e:
parser.read_csv(path)
assert path == e.value.filename

filename = e.value.filename

assert path == filename
@td.skip_if_windows # os.chmod does not work in windows
def test_no_permission(all_parsers):
# GH 23784
parser = all_parsers

msg = r"\[Errno 13\]"
with tm.ensure_clean() as path:
os.chmod(path, 0) # make file unreadable
with pytest.raises(PermissionError, match=msg) as e:
parser.read_csv(path)
assert path == e.value.filename


def test_missing_trailing_delimiters(all_parsers):
Expand Down