diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 2eb19ef1dd082..88b918e9cc515 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -149,9 +149,6 @@ cdef extern from "parser/tokenizer.h": int skipinitialspace # ignore spaces following delimiter? */ int quoting # style of quoting to write */ - # hmm =/ - # int numeric_field - char commentchar int allow_embedded_newline int strict # raise exception on bad CSV */ diff --git a/pandas/_libs/src/parser/io.h b/pandas/_libs/src/parser/io.h index 074322c7bdf78..aac418457d3b6 100644 --- a/pandas/_libs/src/parser/io.h +++ b/pandas/_libs/src/parser/io.h @@ -10,7 +10,8 @@ The full license is in the LICENSE file, distributed with this software. #ifndef PANDAS__LIBS_SRC_PARSER_IO_H_ #define PANDAS__LIBS_SRC_PARSER_IO_H_ -#include "Python.h" +#define PY_SSIZE_T_CLEAN +#include #include "tokenizer.h" typedef struct _file_source { @@ -37,8 +38,6 @@ typedef struct _memory_map { size_t position; } memory_map; -#define MM(src) ((memory_map *)src) - void *new_mmap(char *fname); int del_mmap(void *src); diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c index fd5fc0df299ae..723bf56a79512 100644 --- a/pandas/_libs/src/parser/tokenizer.c +++ b/pandas/_libs/src/parser/tokenizer.c @@ -131,8 +131,6 @@ void parser_set_default_options(parser_t *self) { self->skip_footer = 0; } -int get_parser_memory_footprint(parser_t *self) { return 0; } - parser_t *parser_new() { return (parser_t *)calloc(1, sizeof(parser_t)); } int parser_clear_data_buffers(parser_t *self) { @@ -1426,21 +1424,6 @@ PANDAS_INLINE void uppercase(char *p) { for (; *p; ++p) *p = toupper_ascii(*p); } -int PANDAS_INLINE to_longlong(char *item, long long *p_value) { - char *p_end; - - // Try integer conversion. We explicitly give the base to be 10. If - // we used 0, strtoll() would convert '012' to 10, because the leading 0 in - // '012' signals an octal number in C. For a general purpose reader, that - // would be a bug, not a feature. - *p_value = strtoll(item, &p_end, 10); - - // Allow trailing spaces. - while (isspace_ascii(*p_end)) ++p_end; - - return (errno == 0) && (!*p_end); -} - int to_boolean(const char *item, uint8_t *val) { char *tmp; int i, status = 0; @@ -1474,24 +1457,6 @@ int to_boolean(const char *item, uint8_t *val) { return status; } -#ifdef TEST - -int main(int argc, char *argv[]) { - double x, y; - long long xi; - int status; - char *s; - - s = "123,789"; - status = to_longlong_thousands(s, &xi, ','); - printf("s = '%s'\n", s); - printf("status = %d\n", status); - printf("x = %d\n", (int)xi); - - return 0; -} -#endif // TEST - // --------------------------------------------------------------------------- // Implementation of xstrtod diff --git a/pandas/_libs/src/parser/tokenizer.h b/pandas/_libs/src/parser/tokenizer.h index 6cad4c932cb07..b6d5d6937f4db 100644 --- a/pandas/_libs/src/parser/tokenizer.h +++ b/pandas/_libs/src/parser/tokenizer.h @@ -12,14 +12,8 @@ See LICENSE for the license #ifndef PANDAS__LIBS_SRC_PARSER_TOKENIZER_H_ #define PANDAS__LIBS_SRC_PARSER_TOKENIZER_H_ -#include -#include -#include -#include -#include -#include "Python.h" - -#include +#define PY_SSIZE_T_CLEAN +#include #define ERROR_OK 0 #define ERROR_NO_DIGITS 1 @@ -31,9 +25,6 @@ See LICENSE for the license #include "khash.h" -#define CHUNKSIZE 1024 * 256 -#define KB 1024 -#define MB 1024 * KB #define STREAM_INIT_SIZE 32 #define REACHED_EOF 1 @@ -50,25 +41,10 @@ See LICENSE for the license */ -#define FALSE 0 -#define TRUE 1 - -// Maximum number of columns in a file. -#define MAX_NUM_COLUMNS 2000 - -// Maximum number of characters in single field. -#define FIELD_BUFFER_SIZE 2000 - /* * Common set of error types for the read_rows() and tokenize() * functions. */ -#define ERROR_OUT_OF_MEMORY 1 -#define ERROR_INVALID_COLUMN_INDEX 10 -#define ERROR_CHANGED_NUMBER_OF_FIELDS 12 -#define ERROR_TOO_MANY_CHARS 21 -#define ERROR_TOO_MANY_FIELDS 22 -#define ERROR_NO_DATA 23 // #define VERBOSE #if defined(VERBOSE) @@ -84,12 +60,6 @@ See LICENSE for the license * of some file I/O. */ -/* - * WORD_BUFFER_SIZE determines the maximum amount of non-delimiter - * text in a row. - */ -#define WORD_BUFFER_SIZE 4000 - typedef enum { START_RECORD, START_FIELD, @@ -164,9 +134,6 @@ typedef struct parser_t { int skipinitialspace; /* ignore spaces following delimiter? */ int quoting; /* style of quoting to write */ - // krufty, hmm =/ - int numeric_field; - char commentchar; int allow_embedded_newline; int strict; /* raise exception on bad CSV */ @@ -191,7 +158,7 @@ typedef struct parser_t { void *skipset; PyObject *skipfunc; int64_t skip_first_N_rows; - int skip_footer; + int64_t skip_footer; // pick one, depending on whether the converter requires GIL double (*double_converter_nogil)(const char *, char **, char, char, char, int); @@ -208,7 +175,7 @@ typedef struct parser_t { typedef struct coliter_t { char **words; int64_t *line_start; - int col; + int64_t col; } coliter_t; void coliter_setup(coliter_t *self, parser_t *parser, int i, int start);