Skip to content

Commit 348d43f

Browse files
authored
CLN: remove unused file opening and mmap code from parsers.pyx (#40431)
1 parent 9cc9053 commit 348d43f

File tree

3 files changed

+0
-254
lines changed

3 files changed

+0
-254
lines changed

pandas/_libs/parsers.pyx

-11
Original file line numberDiff line numberDiff line change
@@ -284,21 +284,10 @@ cdef extern from "parser/tokenizer.h":
284284

285285

286286
cdef extern from "parser/io.h":
287-
void *new_mmap(char *fname)
288-
int del_mmap(void *src)
289-
void* buffer_mmap_bytes(void *source, size_t nbytes,
290-
size_t *bytes_read, int *status)
291-
292-
void *new_file_source(char *fname, size_t buffer_size) except NULL
293-
294287
void *new_rd_source(object obj) except NULL
295288

296-
int del_file_source(void *src)
297289
int del_rd_source(void *src)
298290

299-
void* buffer_file_bytes(void *source, size_t nbytes,
300-
size_t *bytes_read, int *status)
301-
302291
void* buffer_rd_bytes(void *source, size_t nbytes,
303292
size_t *bytes_read, int *status, const char *encoding_errors)
304293

pandas/_libs/src/parser/io.c

-208
Original file line numberDiff line numberDiff line change
@@ -9,83 +9,10 @@ The full license is in the LICENSE file, distributed with this software.
99

1010
#include "io.h"
1111

12-
#include <sys/stat.h>
13-
#include <fcntl.h>
14-
15-
#ifndef O_BINARY
16-
#define O_BINARY 0
17-
#endif // O_BINARY
18-
19-
#ifdef _WIN32
20-
#define USE_WIN_UTF16
21-
#include <Windows.h>
22-
#endif
23-
2412
/*
2513
On-disk FILE, uncompressed
2614
*/
2715

28-
void *new_file_source(char *fname, size_t buffer_size) {
29-
file_source *fs = (file_source *)malloc(sizeof(file_source));
30-
if (fs == NULL) {
31-
PyErr_NoMemory();
32-
return NULL;
33-
}
34-
35-
#ifdef USE_WIN_UTF16
36-
// Fix gh-15086 properly - convert UTF8 to UTF16 that Windows widechar API
37-
// accepts. This is needed because UTF8 might _not_ be convertible to MBCS
38-
// for some conditions, as MBCS is locale-dependent, and not all unicode
39-
// symbols can be expressed in it.
40-
{
41-
wchar_t* wname = NULL;
42-
int required = MultiByteToWideChar(CP_UTF8, 0, fname, -1, NULL, 0);
43-
if (required == 0) {
44-
free(fs);
45-
PyErr_SetFromWindowsErr(0);
46-
return NULL;
47-
}
48-
wname = (wchar_t*)malloc(required * sizeof(wchar_t));
49-
if (wname == NULL) {
50-
free(fs);
51-
PyErr_NoMemory();
52-
return NULL;
53-
}
54-
if (MultiByteToWideChar(CP_UTF8, 0, fname, -1, wname, required) <
55-
required) {
56-
free(wname);
57-
free(fs);
58-
PyErr_SetFromWindowsErr(0);
59-
return NULL;
60-
}
61-
fs->fd = _wopen(wname, O_RDONLY | O_BINARY);
62-
free(wname);
63-
}
64-
#else
65-
fs->fd = open(fname, O_RDONLY | O_BINARY);
66-
#endif
67-
if (fs->fd == -1) {
68-
free(fs);
69-
PyErr_SetFromErrnoWithFilename(PyExc_OSError, fname);
70-
return NULL;
71-
}
72-
73-
// Only allocate this heap memory if we are not memory-mapping the file
74-
fs->buffer = (char *)malloc((buffer_size + 1) * sizeof(char));
75-
76-
if (fs->buffer == NULL) {
77-
close(fs->fd);
78-
free(fs);
79-
PyErr_NoMemory();
80-
return NULL;
81-
}
82-
83-
memset(fs->buffer, '\0', buffer_size + 1);
84-
fs->size = buffer_size;
85-
86-
return (void *)fs;
87-
}
88-
8916
void *new_rd_source(PyObject *obj) {
9017
rd_source *rds = (rd_source *)malloc(sizeof(rd_source));
9118

@@ -108,17 +35,6 @@ void *new_rd_source(PyObject *obj) {
10835
10936
*/
11037

111-
int del_file_source(void *ptr) {
112-
file_source *fs = ptr;
113-
if (fs == NULL) return 0;
114-
115-
free(fs->buffer);
116-
close(fs->fd);
117-
free(fs);
118-
119-
return 0;
120-
}
121-
12238
int del_rd_source(void *rds) {
12339
Py_XDECREF(RDS(rds)->obj);
12440
Py_XDECREF(RDS(rds)->buffer);
@@ -133,35 +49,6 @@ int del_rd_source(void *rds) {
13349
13450
*/
13551

136-
void *buffer_file_bytes(void *source, size_t nbytes, size_t *bytes_read,
137-
int *status) {
138-
file_source *fs = FS(source);
139-
ssize_t rv;
140-
141-
if (nbytes > fs->size) {
142-
nbytes = fs->size;
143-
}
144-
145-
rv = read(fs->fd, fs->buffer, nbytes);
146-
switch (rv) {
147-
case -1:
148-
*status = CALLING_READ_FAILED;
149-
*bytes_read = 0;
150-
return NULL;
151-
case 0:
152-
*status = REACHED_EOF;
153-
*bytes_read = 0;
154-
return NULL;
155-
default:
156-
*status = 0;
157-
*bytes_read = rv;
158-
fs->buffer[rv] = '\0';
159-
break;
160-
}
161-
162-
return (void *)fs->buffer;
163-
}
164-
16552
void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read,
16653
int *status, const char *encoding_errors) {
16754
PyGILState_STATE state;
@@ -218,98 +105,3 @@ void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read,
218105

219106
return retval;
220107
}
221-
222-
#ifdef HAVE_MMAP
223-
224-
#include <sys/mman.h>
225-
226-
void *new_mmap(char *fname) {
227-
memory_map *mm;
228-
struct stat stat;
229-
size_t filesize;
230-
231-
mm = (memory_map *)malloc(sizeof(memory_map));
232-
if (mm == NULL) {
233-
return NULL;
234-
}
235-
mm->fd = open(fname, O_RDONLY | O_BINARY);
236-
if (mm->fd == -1) {
237-
free(mm);
238-
return NULL;
239-
}
240-
241-
if (fstat(mm->fd, &stat) == -1) {
242-
close(mm->fd);
243-
free(mm);
244-
return NULL;
245-
}
246-
filesize = stat.st_size; /* XXX This might be 32 bits. */
247-
248-
mm->memmap = mmap(NULL, filesize, PROT_READ, MAP_SHARED, mm->fd, 0);
249-
if (mm->memmap == MAP_FAILED) {
250-
close(mm->fd);
251-
free(mm);
252-
return NULL;
253-
}
254-
255-
mm->size = (off_t)filesize;
256-
mm->position = 0;
257-
258-
return mm;
259-
}
260-
261-
int del_mmap(void *ptr) {
262-
memory_map *mm = ptr;
263-
264-
if (mm == NULL) return 0;
265-
266-
munmap(mm->memmap, mm->size);
267-
close(mm->fd);
268-
free(mm);
269-
270-
return 0;
271-
}
272-
273-
void *buffer_mmap_bytes(void *source, size_t nbytes, size_t *bytes_read,
274-
int *status) {
275-
void *retval;
276-
memory_map *src = source;
277-
size_t remaining = src->size - src->position;
278-
279-
if (remaining == 0) {
280-
*bytes_read = 0;
281-
*status = REACHED_EOF;
282-
return NULL;
283-
}
284-
285-
if (nbytes > remaining) {
286-
nbytes = remaining;
287-
}
288-
289-
retval = src->memmap + src->position;
290-
291-
/* advance position in mmap data structure */
292-
src->position += nbytes;
293-
294-
*bytes_read = nbytes;
295-
*status = 0;
296-
297-
return retval;
298-
}
299-
300-
#else
301-
302-
/* kludgy */
303-
304-
void *new_mmap(char *fname) { return NULL; }
305-
306-
int del_mmap(void *src) { return 0; }
307-
308-
/* don't use this! */
309-
310-
void *buffer_mmap_bytes(void *source, size_t nbytes, size_t *bytes_read,
311-
int *status) {
312-
return NULL;
313-
}
314-
315-
#endif // HAVE_MMAP

pandas/_libs/src/parser/io.h

-35
Original file line numberDiff line numberDiff line change
@@ -14,37 +14,8 @@ The full license is in the LICENSE file, distributed with this software.
1414
#include <Python.h>
1515
#include "tokenizer.h"
1616

17-
typedef struct _file_source {
18-
/* The file being read. */
19-
int fd;
20-
21-
char *buffer;
22-
size_t size;
23-
} file_source;
24-
2517
#define FS(source) ((file_source *)source)
2618

27-
#if !defined(_WIN32) && !defined(HAVE_MMAP)
28-
#define HAVE_MMAP
29-
#endif // HAVE_MMAP
30-
31-
typedef struct _memory_map {
32-
int fd;
33-
34-
/* Size of the file, in bytes. */
35-
char *memmap;
36-
size_t size;
37-
38-
size_t position;
39-
} memory_map;
40-
41-
void *new_mmap(char *fname);
42-
43-
int del_mmap(void *src);
44-
45-
void *buffer_mmap_bytes(void *source, size_t nbytes, size_t *bytes_read,
46-
int *status);
47-
4819
typedef struct _rd_source {
4920
PyObject *obj;
5021
PyObject *buffer;
@@ -53,16 +24,10 @@ typedef struct _rd_source {
5324

5425
#define RDS(source) ((rd_source *)source)
5526

56-
void *new_file_source(char *fname, size_t buffer_size);
57-
5827
void *new_rd_source(PyObject *obj);
5928

60-
int del_file_source(void *src);
6129
int del_rd_source(void *src);
6230

63-
void *buffer_file_bytes(void *source, size_t nbytes, size_t *bytes_read,
64-
int *status);
65-
6631
void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read,
6732
int *status, const char *encoding_errors);
6833

0 commit comments

Comments
 (0)