-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: csv parsing with EOF byte on windows, Revert gh-16039 #16663
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,40 +9,33 @@ The full license is in the LICENSE file, distributed with this software. | |
|
||
#include "io.h" | ||
|
||
#include <sys/types.h> | ||
#include <sys/stat.h> | ||
#include <fcntl.h> | ||
|
||
/* | ||
On-disk FILE, uncompressed | ||
*/ | ||
|
||
void *new_file_source(char *fname, size_t buffer_size) { | ||
file_source *fs = (file_source *)malloc(sizeof(file_source)); | ||
if (fs == NULL) { | ||
fs->fp = fopen(fname, "rb"); | ||
|
||
if (fs->fp == NULL) { | ||
free(fs); | ||
return NULL; | ||
} | ||
setbuf(fs->fp, NULL); | ||
|
||
fs->fd = open(fname, O_RDONLY); | ||
if (fs->fd == -1) { | ||
goto err_free; | ||
} | ||
fs->initial_file_pos = ftell(fs->fp); | ||
|
||
// Only allocate this heap memory if we are not memory-mapping the file | ||
fs->buffer = (char *)malloc((buffer_size + 1) * sizeof(char)); | ||
|
||
if (fs->buffer == NULL) { | ||
goto err_free; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. don't you need to free fs here? (what about closing the fh)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, those would both be good to do here, if possible. |
||
return NULL; | ||
} | ||
|
||
memset(fs->buffer, '\0', buffer_size + 1); | ||
fs->size = buffer_size; | ||
memset(fs->buffer, 0, buffer_size + 1); | ||
fs->buffer[buffer_size] = '\0'; | ||
|
||
return (void *)fs; | ||
|
||
err_free: | ||
free(fs); | ||
return NULL; | ||
} | ||
|
||
void *new_rd_source(PyObject *obj) { | ||
|
@@ -63,12 +56,12 @@ void *new_rd_source(PyObject *obj) { | |
|
||
*/ | ||
|
||
int del_file_source(void *ptr) { | ||
file_source *fs = ptr; | ||
int del_file_source(void *fs) { | ||
if (fs == NULL) return 0; | ||
|
||
free(fs->buffer); | ||
close(fs->fd); | ||
/* allocated on the heap */ | ||
free(FS(fs)->buffer); | ||
fclose(FS(fs)->fp); | ||
free(fs); | ||
|
||
return 0; | ||
|
@@ -90,31 +83,17 @@ int del_rd_source(void *rds) { | |
|
||
void *buffer_file_bytes(void *source, size_t nbytes, size_t *bytes_read, | ||
int *status) { | ||
file_source *fs = FS(source); | ||
ssize_t rv; | ||
file_source *src = FS(source); | ||
|
||
if (nbytes > fs->size) { | ||
nbytes = fs->size; | ||
} | ||
*bytes_read = fread((void *)src->buffer, sizeof(char), nbytes, src->fp); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does this only read min(bytes left, nbytes)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, that is correct. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm pretty sure we'll hit end-of-file first, and it will terminate then, no? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if the file or the remainder of the file is shorter than fs->size, yes. that isn't the only situation say you have a 1MB file, and There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay, got it. I'll add that to my follow-up PR then. |
||
|
||
rv = read(fs->fd, fs->buffer, nbytes); | ||
switch (rv) { | ||
case -1: | ||
*status = CALLING_READ_FAILED; | ||
*bytes_read = 0; | ||
return NULL; | ||
case 0: | ||
if (*bytes_read == 0) { | ||
*status = REACHED_EOF; | ||
*bytes_read = 0; | ||
return NULL; | ||
default: | ||
} else { | ||
*status = 0; | ||
*bytes_read = rv; | ||
fs->buffer[rv] = '\0'; | ||
break; | ||
} | ||
|
||
return (void *)fs->buffer; | ||
return (void *)src->buffer; | ||
} | ||
|
||
void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read, | ||
|
@@ -173,86 +152,81 @@ void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read, | |
#ifdef HAVE_MMAP | ||
|
||
#include <sys/mman.h> | ||
#include <sys/stat.h> | ||
|
||
void *new_mmap(char *fname) { | ||
struct stat buf; | ||
int fd; | ||
memory_map *mm; | ||
struct stat stat; | ||
size_t filesize; | ||
off_t filesize; | ||
|
||
mm = (memory_map *)malloc(sizeof(memory_map)); | ||
mm->fp = fopen(fname, "rb"); | ||
|
||
fd = fileno(mm->fp); | ||
if (fstat(fd, &buf) == -1) { | ||
fprintf(stderr, "new_file_buffer: fstat() failed. errno =%d\n", errno); | ||
return NULL; | ||
} | ||
filesize = buf.st_size; /* XXX This might be 32 bits. */ | ||
|
||
if (mm == NULL) { | ||
/* XXX Eventually remove this print statement. */ | ||
fprintf(stderr, "new_file_buffer: malloc() failed.\n"); | ||
return (NULL); | ||
} | ||
mm->fd = open(fname, O_RDONLY); | ||
if (mm->fd == -1) { | ||
fprintf(stderr, "new_file_buffer: open(%s) failed. errno =%d\n", | ||
fname, errno); | ||
goto err_free; | ||
return NULL; | ||
} | ||
mm->size = (off_t)filesize; | ||
mm->line_number = 0; | ||
|
||
if (fstat(mm->fd, &stat) == -1) { | ||
fprintf(stderr, "new_file_buffer: fstat() failed. errno =%d\n", | ||
errno); | ||
goto err_close; | ||
} | ||
filesize = stat.st_size; /* XXX This might be 32 bits. */ | ||
mm->fileno = fd; | ||
mm->position = ftell(mm->fp); | ||
mm->last_pos = (off_t)filesize; | ||
|
||
mm->memmap = mmap(NULL, filesize, PROT_READ, MAP_SHARED, mm->fd, 0); | ||
if (mm->memmap == MAP_FAILED) { | ||
mm->memmap = mmap(NULL, filesize, PROT_READ, MAP_SHARED, fd, 0); | ||
if (mm->memmap == NULL) { | ||
/* XXX Eventually remove this print statement. */ | ||
fprintf(stderr, "new_file_buffer: mmap() failed.\n"); | ||
goto err_close; | ||
free(mm); | ||
mm = NULL; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shouldn't just return NULL here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, you can just return |
||
} | ||
|
||
mm->size = (off_t)filesize; | ||
mm->position = 0; | ||
|
||
return mm; | ||
|
||
err_close: | ||
close(mm->fd); | ||
err_free: | ||
free(mm); | ||
return NULL; | ||
return (void *)mm; | ||
} | ||
|
||
int del_mmap(void *ptr) { | ||
memory_map *mm = ptr; | ||
|
||
if (mm == NULL) return 0; | ||
int del_mmap(void *src) { | ||
munmap(MM(src)->memmap, MM(src)->size); | ||
|
||
munmap(mm->memmap, mm->size); | ||
close(mm->fd); | ||
free(mm); | ||
fclose(MM(src)->fp); | ||
free(src); | ||
|
||
return 0; | ||
} | ||
|
||
void *buffer_mmap_bytes(void *source, size_t nbytes, size_t *bytes_read, | ||
int *status) { | ||
void *retval; | ||
memory_map *src = source; | ||
size_t remaining = src->size - src->position; | ||
memory_map *src = MM(source); | ||
|
||
if (remaining == 0) { | ||
if (src->position == src->last_pos) { | ||
*bytes_read = 0; | ||
*status = REACHED_EOF; | ||
return NULL; | ||
} | ||
|
||
if (nbytes > remaining) { | ||
nbytes = remaining; | ||
} | ||
|
||
retval = src->memmap + src->position; | ||
|
||
/* advance position in mmap data structure */ | ||
src->position += nbytes; | ||
if (src->position + (off_t)nbytes > src->last_pos) { | ||
// fewer than nbytes remaining | ||
*bytes_read = src->last_pos - src->position; | ||
} else { | ||
*bytes_read = nbytes; | ||
} | ||
|
||
*bytes_read = nbytes; | ||
*status = 0; | ||
|
||
/* advance position in mmap data structure */ | ||
src->position += *bytes_read; | ||
|
||
return retval; | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fs can still be a NULL pointer no?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, it can be on some architectures. We should protect against that.