diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 0772cadf6e737..e860d59f2e5bd 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -88,6 +88,7 @@ Other enhancements - Improve error message when setting :class:`DataFrame` with wrong number of columns through :meth:`DataFrame.isetitem` (:issue:`51701`) - Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`) - Let :meth:`DataFrame.to_feather` accept a non-default :class:`Index` and non-string column names (:issue:`51787`) +- Performance improvement in :func:`read_csv` (:issue:`52632`) with ``engine="c"`` - .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c index fed9b26d479cb..e60fc6bf75f91 100644 --- a/pandas/_libs/src/parser/tokenizer.c +++ b/pandas/_libs/src/parser/tokenizer.c @@ -664,9 +664,7 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes, ((!self->delim_whitespace && c == ' ' && self->skipinitialspace)) // applied when in a field -#define IS_DELIMITER(c) \ - ((!self->delim_whitespace && c == self->delimiter) || \ - (self->delim_whitespace && isblank(c))) +#define IS_DELIMITER(c) ((c == delimiter) || (delim_whitespace && isblank(c))) #define _TOKEN_CLEANUP() \ self->stream_len = slen; \ @@ -721,6 +719,9 @@ int tokenize_bytes(parser_t *self, const char lineterminator = (self->lineterminator == '\0') ? '\n' : self->lineterminator; + const int delim_whitespace = self->delim_whitespace; + const char delimiter = self->delimiter; + // 1000 is something that couldn't fit in "char" // thus comparing a char to it would always be "false" const int carriage_symbol = (self->lineterminator == '\0') ? '\r' : 1000;