Skip to content

Commit dec5211

Browse files
roberthdevriesjrebackgfyoung
authored
BUG: read_csv: fix wrong exception on permissions issue (#32737)
* Generate exception from the C code in the proper manner Get rid of all error printf's and produce proper Python exceptions * Declare some more exceptions from C code * Remove special case error message for c parser * Add whatsnew entry * Fix missing semicolons * Add regression test * Remove special case handling for Windows PyErr_SetFromErrnoWithFilename works for Unix and Windows * Remove call to GetLastError(), when using 0, the python error code handles this * black fixes * Fix indentation of assert statement (also in previous test, same error) * Skip the test on windows * Fix black issue * Let new_mmap fail without exception to allow fallback * Do not create a python error in new_mmap to allow the fallback to work silently * Remove the NULL pointer check for new_rd_source now that it will raise an exception * Update doc/source/whatsnew/v1.1.0.rst Co-Authored-By: gfyoung <[email protected]> Co-authored-by: Jeff Reback <[email protected]> Co-authored-by: gfyoung <[email protected]>
1 parent 80078ac commit dec5211

File tree

4 files changed

+27
-27
lines changed

4 files changed

+27
-27
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,7 @@ I/O
348348
- Bug in :meth:`read_excel` where a UTF-8 string with a high surrogate would cause a segmentation violation (:issue:`23809`)
349349
- Bug in :meth:`read_csv` was causing a file descriptor leak on an empty file (:issue:`31488`)
350350
- Bug in :meth:`read_csv` was causing a segfault when there were blank lines between the header and data rows (:issue:`28071`)
351+
- Bug in :meth:`read_csv` was raising a misleading exception on a permissions issue (:issue:`23784`)
351352

352353

353354
Plotting

pandas/_libs/parsers.pyx

+2-16
Original file line numberDiff line numberDiff line change
@@ -241,9 +241,9 @@ cdef extern from "parser/io.h":
241241
void* buffer_mmap_bytes(void *source, size_t nbytes,
242242
size_t *bytes_read, int *status)
243243

244-
void *new_file_source(char *fname, size_t buffer_size)
244+
void *new_file_source(char *fname, size_t buffer_size) except NULL
245245

246-
void *new_rd_source(object obj)
246+
void *new_rd_source(object obj) except NULL
247247

248248
int del_file_source(void *src)
249249
int del_rd_source(void *src)
@@ -667,26 +667,12 @@ cdef class TextReader:
667667
ptr = new_file_source(source, self.parser.chunksize)
668668
self.parser.cb_io = &buffer_file_bytes
669669
self.parser.cb_cleanup = &del_file_source
670-
671-
if ptr == NULL:
672-
if not os.path.exists(source):
673-
674-
raise FileNotFoundError(
675-
ENOENT,
676-
f'File {usource} does not exist',
677-
usource)
678-
raise IOError('Initializing from file failed')
679-
680670
self.parser.source = ptr
681671

682672
elif hasattr(source, 'read'):
683673
# e.g., StringIO
684674

685675
ptr = new_rd_source(source)
686-
if ptr == NULL:
687-
raise IOError('Initializing parser from file-like '
688-
'object failed')
689-
690676
self.parser.source = ptr
691677
self.parser.cb_io = &buffer_rd_bytes
692678
self.parser.cb_cleanup = &del_rd_source

pandas/_libs/src/parser/io.c

+11-8
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ The full license is in the LICENSE file, distributed with this software.
2828
void *new_file_source(char *fname, size_t buffer_size) {
2929
file_source *fs = (file_source *)malloc(sizeof(file_source));
3030
if (fs == NULL) {
31+
PyErr_NoMemory();
3132
return NULL;
3233
}
3334

@@ -41,17 +42,20 @@ void *new_file_source(char *fname, size_t buffer_size) {
4142
int required = MultiByteToWideChar(CP_UTF8, 0, fname, -1, NULL, 0);
4243
if (required == 0) {
4344
free(fs);
45+
PyErr_SetFromWindowsErr(0);
4446
return NULL;
4547
}
4648
wname = (wchar_t*)malloc(required * sizeof(wchar_t));
4749
if (wname == NULL) {
4850
free(fs);
51+
PyErr_NoMemory();
4952
return NULL;
5053
}
5154
if (MultiByteToWideChar(CP_UTF8, 0, fname, -1, wname, required) <
5255
required) {
5356
free(wname);
5457
free(fs);
58+
PyErr_SetFromWindowsErr(0);
5559
return NULL;
5660
}
5761
fs->fd = _wopen(wname, O_RDONLY | O_BINARY);
@@ -62,6 +66,7 @@ void *new_file_source(char *fname, size_t buffer_size) {
6266
#endif
6367
if (fs->fd == -1) {
6468
free(fs);
69+
PyErr_SetFromErrnoWithFilename(PyExc_OSError, fname);
6570
return NULL;
6671
}
6772

@@ -71,6 +76,7 @@ void *new_file_source(char *fname, size_t buffer_size) {
7176
if (fs->buffer == NULL) {
7277
close(fs->fd);
7378
free(fs);
79+
PyErr_NoMemory();
7480
return NULL;
7581
}
7682

@@ -83,6 +89,10 @@ void *new_file_source(char *fname, size_t buffer_size) {
8389
void *new_rd_source(PyObject *obj) {
8490
rd_source *rds = (rd_source *)malloc(sizeof(rd_source));
8591

92+
if (rds == NULL) {
93+
PyErr_NoMemory();
94+
return NULL;
95+
}
8696
/* hold on to this object */
8797
Py_INCREF(obj);
8898
rds->obj = obj;
@@ -220,20 +230,15 @@ void *new_mmap(char *fname) {
220230

221231
mm = (memory_map *)malloc(sizeof(memory_map));
222232
if (mm == NULL) {
223-
fprintf(stderr, "new_file_buffer: malloc() failed.\n");
224-
return (NULL);
233+
return NULL;
225234
}
226235
mm->fd = open(fname, O_RDONLY | O_BINARY);
227236
if (mm->fd == -1) {
228-
fprintf(stderr, "new_file_buffer: open(%s) failed. errno =%d\n",
229-
fname, errno);
230237
free(mm);
231238
return NULL;
232239
}
233240

234241
if (fstat(mm->fd, &stat) == -1) {
235-
fprintf(stderr, "new_file_buffer: fstat() failed. errno =%d\n",
236-
errno);
237242
close(mm->fd);
238243
free(mm);
239244
return NULL;
@@ -242,8 +247,6 @@ void *new_mmap(char *fname) {
242247

243248
mm->memmap = mmap(NULL, filesize, PROT_READ, MAP_SHARED, mm->fd, 0);
244249
if (mm->memmap == MAP_FAILED) {
245-
/* XXX Eventually remove this print statement. */
246-
fprintf(stderr, "new_file_buffer: mmap() failed.\n");
247250
close(mm->fd);
248251
free(mm);
249252
return NULL;

pandas/tests/io/parser/test_common.py

+13-3
Original file line numberDiff line numberDiff line change
@@ -960,13 +960,23 @@ def test_nonexistent_path(all_parsers):
960960
parser = all_parsers
961961
path = f"{tm.rands(10)}.csv"
962962

963-
msg = f"File {path} does not exist" if parser.engine == "c" else r"\[Errno 2\]"
963+
msg = r"\[Errno 2\]"
964964
with pytest.raises(FileNotFoundError, match=msg) as e:
965965
parser.read_csv(path)
966+
assert path == e.value.filename
966967

967-
filename = e.value.filename
968968

969-
assert path == filename
969+
@td.skip_if_windows # os.chmod does not work in windows
970+
def test_no_permission(all_parsers):
971+
# GH 23784
972+
parser = all_parsers
973+
974+
msg = r"\[Errno 13\]"
975+
with tm.ensure_clean() as path:
976+
os.chmod(path, 0) # make file unreadable
977+
with pytest.raises(PermissionError, match=msg) as e:
978+
parser.read_csv(path)
979+
assert path == e.value.filename
970980

971981

972982
def test_missing_trailing_delimiters(all_parsers):

0 commit comments

Comments
 (0)