Skip to content

Commit 80bddaf

Browse files
WillAydjreback
authored andcommitted
Clean Up src/parsers (#26445)
1 parent 7ee26a8 commit 80bddaf

File tree

4 files changed

+6
-78
lines changed

4 files changed

+6
-78
lines changed

pandas/_libs/parsers.pyx

-3
Original file line numberDiff line numberDiff line change
@@ -149,9 +149,6 @@ cdef extern from "parser/tokenizer.h":
149149
int skipinitialspace # ignore spaces following delimiter? */
150150
int quoting # style of quoting to write */
151151

152-
# hmm =/
153-
# int numeric_field
154-
155152
char commentchar
156153
int allow_embedded_newline
157154
int strict # raise exception on bad CSV */

pandas/_libs/src/parser/io.h

+2-3
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ The full license is in the LICENSE file, distributed with this software.
1010
#ifndef PANDAS__LIBS_SRC_PARSER_IO_H_
1111
#define PANDAS__LIBS_SRC_PARSER_IO_H_
1212

13-
#include "Python.h"
13+
#define PY_SSIZE_T_CLEAN
14+
#include <Python.h>
1415
#include "tokenizer.h"
1516

1617
typedef struct _file_source {
@@ -37,8 +38,6 @@ typedef struct _memory_map {
3738
size_t position;
3839
} memory_map;
3940

40-
#define MM(src) ((memory_map *)src)
41-
4241
void *new_mmap(char *fname);
4342

4443
int del_mmap(void *src);

pandas/_libs/src/parser/tokenizer.c

-35
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,6 @@ void parser_set_default_options(parser_t *self) {
131131
self->skip_footer = 0;
132132
}
133133

134-
int get_parser_memory_footprint(parser_t *self) { return 0; }
135-
136134
parser_t *parser_new() { return (parser_t *)calloc(1, sizeof(parser_t)); }
137135

138136
int parser_clear_data_buffers(parser_t *self) {
@@ -1426,21 +1424,6 @@ PANDAS_INLINE void uppercase(char *p) {
14261424
for (; *p; ++p) *p = toupper_ascii(*p);
14271425
}
14281426

1429-
int PANDAS_INLINE to_longlong(char *item, long long *p_value) {
1430-
char *p_end;
1431-
1432-
// Try integer conversion. We explicitly give the base to be 10. If
1433-
// we used 0, strtoll() would convert '012' to 10, because the leading 0 in
1434-
// '012' signals an octal number in C. For a general purpose reader, that
1435-
// would be a bug, not a feature.
1436-
*p_value = strtoll(item, &p_end, 10);
1437-
1438-
// Allow trailing spaces.
1439-
while (isspace_ascii(*p_end)) ++p_end;
1440-
1441-
return (errno == 0) && (!*p_end);
1442-
}
1443-
14441427
int to_boolean(const char *item, uint8_t *val) {
14451428
char *tmp;
14461429
int i, status = 0;
@@ -1474,24 +1457,6 @@ int to_boolean(const char *item, uint8_t *val) {
14741457
return status;
14751458
}
14761459

1477-
#ifdef TEST
1478-
1479-
int main(int argc, char *argv[]) {
1480-
double x, y;
1481-
long long xi;
1482-
int status;
1483-
char *s;
1484-
1485-
s = "123,789";
1486-
status = to_longlong_thousands(s, &xi, ',');
1487-
printf("s = '%s'\n", s);
1488-
printf("status = %d\n", status);
1489-
printf("x = %d\n", (int)xi);
1490-
1491-
return 0;
1492-
}
1493-
#endif // TEST
1494-
14951460
// ---------------------------------------------------------------------------
14961461
// Implementation of xstrtod
14971462

pandas/_libs/src/parser/tokenizer.h

+4-37
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,8 @@ See LICENSE for the license
1212
#ifndef PANDAS__LIBS_SRC_PARSER_TOKENIZER_H_
1313
#define PANDAS__LIBS_SRC_PARSER_TOKENIZER_H_
1414

15-
#include <errno.h>
16-
#include <stdio.h>
17-
#include <stdlib.h>
18-
#include <string.h>
19-
#include <time.h>
20-
#include "Python.h"
21-
22-
#include <ctype.h>
15+
#define PY_SSIZE_T_CLEAN
16+
#include <Python.h>
2317

2418
#define ERROR_OK 0
2519
#define ERROR_NO_DIGITS 1
@@ -31,9 +25,6 @@ See LICENSE for the license
3125

3226
#include "khash.h"
3327

34-
#define CHUNKSIZE 1024 * 256
35-
#define KB 1024
36-
#define MB 1024 * KB
3728
#define STREAM_INIT_SIZE 32
3829

3930
#define REACHED_EOF 1
@@ -50,25 +41,10 @@ See LICENSE for the license
5041
5142
*/
5243

53-
#define FALSE 0
54-
#define TRUE 1
55-
56-
// Maximum number of columns in a file.
57-
#define MAX_NUM_COLUMNS 2000
58-
59-
// Maximum number of characters in single field.
60-
#define FIELD_BUFFER_SIZE 2000
61-
6244
/*
6345
* Common set of error types for the read_rows() and tokenize()
6446
* functions.
6547
*/
66-
#define ERROR_OUT_OF_MEMORY 1
67-
#define ERROR_INVALID_COLUMN_INDEX 10
68-
#define ERROR_CHANGED_NUMBER_OF_FIELDS 12
69-
#define ERROR_TOO_MANY_CHARS 21
70-
#define ERROR_TOO_MANY_FIELDS 22
71-
#define ERROR_NO_DATA 23
7248

7349
// #define VERBOSE
7450
#if defined(VERBOSE)
@@ -84,12 +60,6 @@ See LICENSE for the license
8460
* of some file I/O.
8561
*/
8662

87-
/*
88-
* WORD_BUFFER_SIZE determines the maximum amount of non-delimiter
89-
* text in a row.
90-
*/
91-
#define WORD_BUFFER_SIZE 4000
92-
9363
typedef enum {
9464
START_RECORD,
9565
START_FIELD,
@@ -164,9 +134,6 @@ typedef struct parser_t {
164134
int skipinitialspace; /* ignore spaces following delimiter? */
165135
int quoting; /* style of quoting to write */
166136

167-
// krufty, hmm =/
168-
int numeric_field;
169-
170137
char commentchar;
171138
int allow_embedded_newline;
172139
int strict; /* raise exception on bad CSV */
@@ -191,7 +158,7 @@ typedef struct parser_t {
191158
void *skipset;
192159
PyObject *skipfunc;
193160
int64_t skip_first_N_rows;
194-
int skip_footer;
161+
int64_t skip_footer;
195162
// pick one, depending on whether the converter requires GIL
196163
double (*double_converter_nogil)(const char *, char **,
197164
char, char, char, int);
@@ -208,7 +175,7 @@ typedef struct parser_t {
208175
typedef struct coliter_t {
209176
char **words;
210177
int64_t *line_start;
211-
int col;
178+
int64_t col;
212179
} coliter_t;
213180

214181
void coliter_setup(coliter_t *self, parser_t *parser, int i, int start);

0 commit comments

Comments
 (0)