From 3de23319e0116170bf55c172419977c71af22933 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Sun, 25 Dec 2022 10:21:40 +0000 Subject: [PATCH 01/11] fixup --- .pre-commit-config.yaml | 2 +- pandas/_libs/tslib.pyx | 3 +- pandas/_libs/tslibs/np_datetime.pxd | 8 +- pandas/_libs/tslibs/np_datetime.pyx | 6 +- .../tslibs/src/datetime/np_datetime_strings.c | 76 +++++++++++++++---- .../tslibs/src/datetime/np_datetime_strings.h | 18 ++++- pandas/tests/tools/test_to_datetime.py | 31 ++++++++ 7 files changed, 123 insertions(+), 21 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1c3dd35ef47f5..01ea514ef18b7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -63,7 +63,7 @@ repos: '--extensions=c,h', '--headers=h', --recursive, - '--filter=-readability/casting,-runtime/int,-build/include_subdir' + '--filter=-readability/casting,-runtime/int,-build/include_subdir,-readability/fn_size' ] - repo: https://github.com/PyCQA/flake8 rev: 6.0.0 diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index c1a30e03235b5..87b81d186b91b 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -28,6 +28,7 @@ cnp.import_array() from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, + Exact, NPY_FR_ns, check_dts_bounds, get_datetime64_value, @@ -411,7 +412,7 @@ cpdef array_to_datetime( bint utc=False, bint require_iso8601=False, format: str | None=None, - bint exact=True, + Exact exact=Exact.EXACT_MATCH, ): """ Converts a 1D array of date-like values to a numpy array of either: diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index de81c611c9ee9..c40702d09ecd3 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -96,7 +96,7 @@ cdef int string_to_dts( int* out_tzoffset, bint want_exc, format: str | None = *, - bint exact = * + Exact exact = * ) except? -1 cdef NPY_DATETIMEUNIT get_unit_from_dtype(cnp.dtype dtype) @@ -120,3 +120,9 @@ cdef int64_t convert_reso( NPY_DATETIMEUNIT to_reso, bint round_ok, ) except? -1 + +cdef extern from "src/datetime/np_datetime_strings.h": + cdef enum Exact: + PARTIAL_MATCH + EXACT_MATCH + NO_MATCH diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 9db3f7cb4648e..fee3da31a244f 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -53,7 +53,7 @@ cdef extern from "src/datetime/np_datetime_strings.h": npy_datetimestruct *out, NPY_DATETIMEUNIT *out_bestunit, int *out_local, int *out_tzoffset, - const char *format, int format_len, int exact) + const char *format, int format_len, Exact exact) # ---------------------------------------------------------------------- @@ -279,7 +279,7 @@ cdef int string_to_dts( int* out_tzoffset, bint want_exc, format: str | None=None, - bint exact=True, + Exact exact=EXACT_MATCH, ) except? -1: cdef: Py_ssize_t length @@ -291,7 +291,7 @@ cdef int string_to_dts( if format is None: format_buf = b"" format_length = 0 - exact = False + exact = NO_MATCH else: format_buf = get_c_string_buf_and_size(format, &format_length) return parse_iso_8601_datetime(buf, length, want_exc, diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c index 7bb94012fad0c..b91f64855b52d 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c +++ b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c @@ -71,20 +71,13 @@ This file implements string parsing and creation for NumPy datetime. // and decrement characters_remaining by n on success // On failure will return -1 without incrementing static int compare_format(const char **format, int *characters_remaining, - const char *compare_to, int n, const int exact) { + const char *compare_to, int n, const enum Exact exact) { + if (exact == NO_MATCH) { + return 0; + } if (*characters_remaining < n) { - if (exact) { - // TODO(pandas-dev): in the future we should set a PyErr here - // to be very clear about what went wrong - return -1; - } else if (*characters_remaining) { - // TODO(pandas-dev): same return value in this function as - // above branch, but stub out a future where - // we have a better error message - return -1; - } else { - return 0; - } + // TODO(pandas-dev): PyErr to differentiate what went wrong + return -1; } else { if (strncmp(*format, compare_to, n)) { // TODO(pandas-dev): PyErr to differentiate what went wrong @@ -102,7 +95,8 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, npy_datetimestruct *out, NPY_DATETIMEUNIT *out_bestunit, int *out_local, int *out_tzoffset, - const char* format, int format_len, int exact) { + const char* format, int format_len, + enum Exact exact) { if (len < 0 || format_len < 0) goto parse_error; int year_leap = 0; @@ -139,6 +133,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, while (sublen > 0 && isspace(*substr)) { ++substr; --sublen; + if (exact == PARTIAL_MATCH && !format_len) { + goto finish; + } if (compare_format(&format, &format_len, " ", 1, exact)) { goto parse_error; } @@ -155,6 +152,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, } /* PARSE THE YEAR (4 digits) */ + if (exact == PARTIAL_MATCH && !format_len) { + goto finish; + } if (compare_format(&format, &format_len, "%Y", 2, exact)) { goto parse_error; } @@ -202,6 +202,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, ++substr; --sublen; + if (exact == PARTIAL_MATCH && !format_len) { + goto finish; + } if (compare_format(&format, &format_len, &ymd_sep, 1, exact)) { goto parse_error; } @@ -212,6 +215,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, } /* PARSE THE MONTH */ + if (exact == PARTIAL_MATCH && !format_len) { + goto finish; + } if (compare_format(&format, &format_len, "%m", 2, exact)) { goto parse_error; } @@ -258,12 +264,18 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, } ++substr; --sublen; + if (exact == PARTIAL_MATCH && !format_len) { + goto finish; + } if (compare_format(&format, &format_len, &ymd_sep, 1, exact)) { goto parse_error; } } /* PARSE THE DAY */ + if (exact == PARTIAL_MATCH && !format_len) { + goto finish; + } if (compare_format(&format, &format_len, "%d", 2, exact)) { goto parse_error; } @@ -306,6 +318,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, if ((*substr != 'T' && *substr != ' ') || sublen == 1) { goto parse_error; } + if (exact == PARTIAL_MATCH && !format_len) { + goto finish; + } if (compare_format(&format, &format_len, substr, 1, exact)) { goto parse_error; } @@ -313,6 +328,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, --sublen; /* PARSE THE HOURS */ + if (exact == PARTIAL_MATCH && !format_len) { + goto finish; + } if (compare_format(&format, &format_len, "%H", 2, exact)) { goto parse_error; } @@ -359,6 +377,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, if (sublen == 0 || !isdigit(*substr)) { goto parse_error; } + if (exact == PARTIAL_MATCH && !format_len) { + goto finish; + } if (compare_format(&format, &format_len, ":", 1, exact)) { goto parse_error; } @@ -370,6 +391,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, } /* PARSE THE MINUTES */ + if (exact == PARTIAL_MATCH && !format_len) { + goto finish; + } if (compare_format(&format, &format_len, "%M", 2, exact)) { goto parse_error; } @@ -405,6 +429,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, /* If we make it through this condition block, then the next * character is a digit. */ if (has_hms_sep && *substr == ':') { + if (exact == PARTIAL_MATCH && !format_len) { + goto finish; + } if (compare_format(&format, &format_len, ":", 1, exact)) { goto parse_error; } @@ -420,6 +447,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, } /* PARSE THE SECONDS */ + if (exact == PARTIAL_MATCH && !format_len) { + goto finish; + } if (compare_format(&format, &format_len, "%S", 2, exact)) { goto parse_error; } @@ -448,6 +478,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, if (sublen > 0 && *substr == '.') { ++substr; --sublen; + if (exact == PARTIAL_MATCH && !format_len) { + goto finish; + } if (compare_format(&format, &format_len, ".", 1, exact)) { goto parse_error; } @@ -457,6 +490,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, } /* PARSE THE MICROSECONDS (0 to 6 digits) */ + if (exact == PARTIAL_MATCH && !format_len) { + goto finish; + } if (compare_format(&format, &format_len, "%f", 2, exact)) { goto parse_error; } @@ -524,6 +560,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, while (sublen > 0 && isspace(*substr)) { ++substr; --sublen; + if (exact == PARTIAL_MATCH && !format_len) { + goto finish; + } if (compare_format(&format, &format_len, " ", 1, exact)) { goto parse_error; } @@ -539,6 +578,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, /* UTC specifier */ if (*substr == 'Z') { + if (exact == PARTIAL_MATCH && !format_len) { + goto finish; + } if (compare_format(&format, &format_len, "%z", 2, exact)) { goto parse_error; } @@ -561,6 +603,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, --sublen; } } else if (*substr == '-' || *substr == '+') { + if (exact == PARTIAL_MATCH && !format_len) { + goto finish; + } if (compare_format(&format, &format_len, "%z", 2, exact)) { goto parse_error; } @@ -647,6 +692,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, while (sublen > 0 && isspace(*substr)) { ++substr; --sublen; + if (exact == PARTIAL_MATCH && !format_len) { + goto finish; + } if (compare_format(&format, &format_len, " ", 1, exact)) { goto parse_error; } diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h index 734f7daceba05..d325adf2c930c 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h +++ b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h @@ -26,6 +26,22 @@ This file implements string parsing and creation for NumPy datetime. #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #endif // NPY_NO_DEPRECATED_API +/* 'exact' can be one of three values: + * * PARTIAL_MATCH : Only require a partial match with 'format'. + * For example, if the string is '2020-01-01 05:00:00' and + * 'format' is '%Y-%m-%d', then parse '2020-01-01'; + * * EXACT_MATCH : require an exact match with 'format'. If the + * string is '2020-01-01', then the only format which will + * be able to parse it without error is '%Y-%m-%d'; + * * NO_MATCH: don't require any match - parse without comparing + * with 'format'. + */ +enum Exact { + PARTIAL_MATCH, + EXACT_MATCH, + NO_MATCH +}; + /* * Parses (almost) standard ISO 8601 date strings. The differences are: * @@ -61,7 +77,7 @@ parse_iso_8601_datetime(const char *str, int len, int want_exc, int *out_tzoffset, const char* format, int format_len, - int exact); + enum Exact exact); /* * Provides a string length to use for converting datetime diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 83e40f5f1d98b..8bb26650d09bd 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -353,6 +353,37 @@ def test_to_datetime_with_non_exact(self, cache): ) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize( + "format, expected", + [ + ("%Y-%m-%d", Timestamp(2000, 1, 3)), + ("%Y-%d-%m", Timestamp(2000, 3, 1)), + ("%Y-%m-%d %H", Timestamp(2000, 1, 3, 12)), + ("%Y-%d-%m %H", Timestamp(2000, 3, 1, 12)), + ("%Y-%m-%d %H:%M", Timestamp(2000, 1, 3, 12, 34)), + ("%Y-%d-%m %H:%M", Timestamp(2000, 3, 1, 12, 34)), + ("%Y-%m-%d %H:%M:%S", Timestamp(2000, 1, 3, 12, 34, 56)), + ("%Y-%d-%m %H:%M:%S", Timestamp(2000, 3, 1, 12, 34, 56)), + ("%Y-%m-%d %H:%M:%S.%f", Timestamp(2000, 1, 3, 12, 34, 56, 123456)), + ("%Y-%d-%m %H:%M:%S.%f", Timestamp(2000, 3, 1, 12, 34, 56, 123456)), + ( + "%Y-%m-%d %H:%M:%S.%f%z", + Timestamp(2000, 1, 3, 12, 34, 56, 123456, tz="UTC+01:00"), + ), + ( + "%Y-%d-%m %H:%M:%S.%f%z", + Timestamp(2000, 3, 1, 12, 34, 56, 123456, tz="UTC+01:00"), + ), + ], + ) + def test_non_exact_doesnt_parse_whole_string(self, cache, format, expected): + # https://github.com/pandas-dev/pandas/issues/50412 + # the formats alternate between ISO8601 and non-ISO8601 to check both paths + result = to_datetime( + "2000-01-03 12:34:56.123456+01:00", format=format, exact=False + ) + assert result == expected + @pytest.mark.parametrize( "arg", [ From e3fe55bdd04818b6c5c5eedc8ebae74f9fe632e5 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Wed, 28 Dec 2022 10:51:59 +0000 Subject: [PATCH 02/11] use enum --- .../tslibs/src/datetime/np_datetime_strings.c | 169 ++++++++++-------- 1 file changed, 94 insertions(+), 75 deletions(-) diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c index b91f64855b52d..a03becd922c85 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c +++ b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c @@ -67,28 +67,43 @@ This file implements string parsing and creation for NumPy datetime. * Returns 0 on success, -1 on failure. */ +enum Comparison { + ADVANCE, + RETURN, + ERROR +}; // This function will advance the pointer on format // and decrement characters_remaining by n on success -// On failure will return -1 without incrementing -static int compare_format(const char **format, int *characters_remaining, - const char *compare_to, int n, const enum Exact exact) { +// On failure will return ERROR without incrementing +// If `exact` is PARTIAL_MATCH, and the `format` string has +// been exhausted, then signal to the caller to finish parsing. +static enum Comparison compare_format( + const char **format, + int *characters_remaining, + const char *compare_to, + int n, + const enum Exact exact +) { + if (exact == PARTIAL_MATCH && !*characters_remaining) { + return RETURN; + } if (exact == NO_MATCH) { - return 0; + return ADVANCE; } if (*characters_remaining < n) { // TODO(pandas-dev): PyErr to differentiate what went wrong - return -1; + return ERROR; } else { if (strncmp(*format, compare_to, n)) { // TODO(pandas-dev): PyErr to differentiate what went wrong - return -1; + return ERROR; } else { *format += n; *characters_remaining -= n; - return 0; + return ADVANCE; } } - return 0; + return ADVANCE; } int parse_iso_8601_datetime(const char *str, int len, int want_exc, @@ -104,6 +119,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, const char *substr; int sublen; NPY_DATETIMEUNIT bestunit = NPY_FR_GENERIC; + enum Comparison comparison; /* If year-month-day are separated by a valid separator, * months/days without leading zeroes will be parsed @@ -133,11 +149,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, while (sublen > 0 && isspace(*substr)) { ++substr; --sublen; - if (exact == PARTIAL_MATCH && !format_len) { - goto finish; - } - if (compare_format(&format, &format_len, " ", 1, exact)) { + comparison = compare_format(&format, &format_len, " ", 1, exact); + if (comparison == ERROR) { goto parse_error; + } else if (comparison == RETURN) { + goto finish; } } @@ -155,8 +171,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, if (exact == PARTIAL_MATCH && !format_len) { goto finish; } - if (compare_format(&format, &format_len, "%Y", 2, exact)) { + comparison = compare_format(&format, &format_len, "%Y", 2, exact); + if (comparison == ERROR) { goto parse_error; + } else if (comparison == RETURN) { + goto finish; } out->year = 0; @@ -202,11 +221,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, ++substr; --sublen; - if (exact == PARTIAL_MATCH && !format_len) { - goto finish; - } - if (compare_format(&format, &format_len, &ymd_sep, 1, exact)) { + comparison = compare_format(&format, &format_len, &ymd_sep, 1, exact); + if (comparison == ERROR) { goto parse_error; + } else if (comparison == RETURN) { + goto finish; } /* Cannot have trailing separator */ if (sublen == 0 || !isdigit(*substr)) { @@ -215,11 +234,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, } /* PARSE THE MONTH */ - if (exact == PARTIAL_MATCH && !format_len) { - goto finish; - } - if (compare_format(&format, &format_len, "%m", 2, exact)) { + comparison = compare_format(&format, &format_len, "%m", 2, exact); + if (comparison == ERROR) { goto parse_error; + } else if (comparison == RETURN) { + goto finish; } /* First digit required */ out->month = (*substr - '0'); @@ -264,20 +283,20 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, } ++substr; --sublen; - if (exact == PARTIAL_MATCH && !format_len) { - goto finish; - } - if (compare_format(&format, &format_len, &ymd_sep, 1, exact)) { + comparison = compare_format(&format, &format_len, &ymd_sep, 1, exact); + if (comparison == ERROR) { goto parse_error; + } else if (comparison == RETURN) { + goto finish; } } /* PARSE THE DAY */ - if (exact == PARTIAL_MATCH && !format_len) { - goto finish; - } - if (compare_format(&format, &format_len, "%d", 2, exact)) { + comparison = compare_format(&format, &format_len, "%d", 2, exact); + if (comparison == ERROR) { goto parse_error; + } else if (comparison == RETURN) { + goto finish; } /* First digit required */ if (!isdigit(*substr)) { @@ -318,21 +337,21 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, if ((*substr != 'T' && *substr != ' ') || sublen == 1) { goto parse_error; } - if (exact == PARTIAL_MATCH && !format_len) { - goto finish; - } - if (compare_format(&format, &format_len, substr, 1, exact)) { - goto parse_error; - } + comparison = compare_format(&format, &format_len, substr, 1, exact); + if (comparison == ERROR) { + goto parse_error; + } else if (comparison == RETURN) { + goto finish; + } ++substr; --sublen; /* PARSE THE HOURS */ - if (exact == PARTIAL_MATCH && !format_len) { - goto finish; - } - if (compare_format(&format, &format_len, "%H", 2, exact)) { + comparison = compare_format(&format, &format_len, "%H", 2, exact); + if (comparison == ERROR) { goto parse_error; + } else if (comparison == RETURN) { + goto finish; } /* First digit required */ if (!isdigit(*substr)) { @@ -377,11 +396,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, if (sublen == 0 || !isdigit(*substr)) { goto parse_error; } - if (exact == PARTIAL_MATCH && !format_len) { - goto finish; - } - if (compare_format(&format, &format_len, ":", 1, exact)) { + comparison = compare_format(&format, &format_len, ":", 1, exact); + if (comparison == ERROR) { goto parse_error; + } else if (comparison == RETURN) { + goto finish; } } else if (!isdigit(*substr)) { if (!hour_was_2_digits) { @@ -391,11 +410,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, } /* PARSE THE MINUTES */ - if (exact == PARTIAL_MATCH && !format_len) { - goto finish; - } - if (compare_format(&format, &format_len, "%M", 2, exact)) { + comparison = compare_format(&format, &format_len, "%M", 2, exact); + if (comparison == ERROR) { goto parse_error; + } else if (comparison == RETURN) { + goto finish; } /* First digit required */ out->min = (*substr - '0'); @@ -429,11 +448,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, /* If we make it through this condition block, then the next * character is a digit. */ if (has_hms_sep && *substr == ':') { - if (exact == PARTIAL_MATCH && !format_len) { - goto finish; - } - if (compare_format(&format, &format_len, ":", 1, exact)) { + comparison = compare_format(&format, &format_len, ":", 1, exact); + if (comparison == ERROR) { goto parse_error; + } else if (comparison == RETURN) { + goto finish; } ++substr; --sublen; @@ -447,11 +466,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, } /* PARSE THE SECONDS */ - if (exact == PARTIAL_MATCH && !format_len) { - goto finish; - } - if (compare_format(&format, &format_len, "%S", 2, exact)) { + comparison = compare_format(&format, &format_len, "%S", 2, exact); + if (comparison == ERROR) { goto parse_error; + } else if (comparison == RETURN) { + goto finish; } /* First digit required */ out->sec = (*substr - '0'); @@ -478,11 +497,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, if (sublen > 0 && *substr == '.') { ++substr; --sublen; - if (exact == PARTIAL_MATCH && !format_len) { - goto finish; - } - if (compare_format(&format, &format_len, ".", 1, exact)) { + comparison = compare_format(&format, &format_len, ".", 1, exact); + if (comparison == ERROR) { goto parse_error; + } else if (comparison == RETURN) { + goto finish; } } else { bestunit = NPY_FR_s; @@ -490,11 +509,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, } /* PARSE THE MICROSECONDS (0 to 6 digits) */ - if (exact == PARTIAL_MATCH && !format_len) { - goto finish; - } - if (compare_format(&format, &format_len, "%f", 2, exact)) { + comparison = compare_format(&format, &format_len, "%f", 2, exact); + if (comparison == ERROR) { goto parse_error; + } else if (comparison == RETURN) { + goto finish; } numdigits = 0; for (i = 0; i < 6; ++i) { @@ -560,11 +579,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, while (sublen > 0 && isspace(*substr)) { ++substr; --sublen; - if (exact == PARTIAL_MATCH && !format_len) { - goto finish; - } - if (compare_format(&format, &format_len, " ", 1, exact)) { + comparison = compare_format(&format, &format_len, " ", 1, exact); + if (comparison == ERROR) { goto parse_error; + } else if (comparison == RETURN) { + goto finish; } } @@ -578,11 +597,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, /* UTC specifier */ if (*substr == 'Z') { - if (exact == PARTIAL_MATCH && !format_len) { - goto finish; - } - if (compare_format(&format, &format_len, "%z", 2, exact)) { + comparison = compare_format(&format, &format_len, "%z", 2, exact); + if (comparison == ERROR) { goto parse_error; + } else if (comparison == RETURN) { + goto finish; } /* "Z" should be equivalent to tz offset "+00:00" */ if (out_local != NULL) { @@ -603,11 +622,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, --sublen; } } else if (*substr == '-' || *substr == '+') { - if (exact == PARTIAL_MATCH && !format_len) { - goto finish; - } - if (compare_format(&format, &format_len, "%z", 2, exact)) { + comparison = compare_format(&format, &format_len, "%z", 2, exact); + if (comparison == ERROR) { goto parse_error; + } else if (comparison == RETURN) { + goto finish; } /* Time zone offset */ int offset_neg = 0, offset_hour = 0, offset_minute = 0; From efeaf7aba732e0426312e95b865488068cf60d02 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 29 Dec 2022 09:52:24 +0000 Subject: [PATCH 03/11] more descriptive names --- .pre-commit-config.yaml | 1 + pandas/_libs/tslib.pyx | 3 +- pandas/_libs/tslibs/np_datetime.pxd | 6 +- pandas/_libs/tslibs/np_datetime.pyx | 12 +- .../tslibs/src/datetime/np_datetime_strings.c | 148 +++++++++--------- .../tslibs/src/datetime/np_datetime_strings.h | 9 +- 6 files changed, 92 insertions(+), 87 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 909ca86bc373d..f3158e64df8dd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -63,6 +63,7 @@ repos: '--extensions=c,h', '--headers=h', --recursive, + --linelength=88, '--filter=-readability/casting,-runtime/int,-build/include_subdir,-readability/fn_size' ] - repo: https://github.com/PyCQA/flake8 diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index b0307fed97f07..976a53e9117de 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -28,7 +28,6 @@ cnp.import_array() from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, - Exact, NPY_FR_ns, check_dts_bounds, get_datetime64_value, @@ -404,7 +403,7 @@ cpdef array_to_datetime( bint utc=False, bint require_iso8601=False, format: str | None=None, - Exact exact=Exact.EXACT_MATCH, + bint exact=True, ): """ Converts a 1D array of date-like values to a numpy array of either: diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index c40702d09ecd3..89198e7ac3798 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -96,7 +96,7 @@ cdef int string_to_dts( int* out_tzoffset, bint want_exc, format: str | None = *, - Exact exact = * + bint exact = * ) except? -1 cdef NPY_DATETIMEUNIT get_unit_from_dtype(cnp.dtype dtype) @@ -122,7 +122,7 @@ cdef int64_t convert_reso( ) except? -1 cdef extern from "src/datetime/np_datetime_strings.h": - cdef enum Exact: + cdef enum FormatRequirement: PARTIAL_MATCH EXACT_MATCH - NO_MATCH + INFER_FORMAT diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index fee3da31a244f..7f54f2c5a6a31 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -53,7 +53,8 @@ cdef extern from "src/datetime/np_datetime_strings.h": npy_datetimestruct *out, NPY_DATETIMEUNIT *out_bestunit, int *out_local, int *out_tzoffset, - const char *format, int format_len, Exact exact) + const char *format, int format_len, + FormatRequirement exact) # ---------------------------------------------------------------------- @@ -279,24 +280,27 @@ cdef int string_to_dts( int* out_tzoffset, bint want_exc, format: str | None=None, - Exact exact=EXACT_MATCH, + bint exact=EXACT_MATCH, ) except? -1: cdef: Py_ssize_t length const char* buf Py_ssize_t format_length const char* format_buf + FormatRequirement format_requirement buf = get_c_string_buf_and_size(val, &length) if format is None: format_buf = b"" format_length = 0 - exact = NO_MATCH + format_requirement = INFER_FORMAT else: format_buf = get_c_string_buf_and_size(format, &format_length) + format_requirement = int(exact) return parse_iso_8601_datetime(buf, length, want_exc, dts, out_bestunit, out_local, out_tzoffset, - format_buf, format_length, exact) + format_buf, format_length, + format_requirement) cpdef ndarray astype_overflowsafe( diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c index a03becd922c85..c41ed6646bf73 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c +++ b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c @@ -67,43 +67,43 @@ This file implements string parsing and creation for NumPy datetime. * Returns 0 on success, -1 on failure. */ -enum Comparison { - ADVANCE, - RETURN, - ERROR +enum DatetimePartParseResult { + COMPARISON_SUCCESS, + COMPLETED_PARTIAL_MATCH, + COMPARISON_ERROR }; // This function will advance the pointer on format // and decrement characters_remaining by n on success -// On failure will return ERROR without incrementing -// If `exact` is PARTIAL_MATCH, and the `format` string has -// been exhausted, then signal to the caller to finish parsing. -static enum Comparison compare_format( +// On failure will return COMPARISON_ERROR without incrementing +// If `format_requirement` is PARTIAL_MATCH, and the `format` string has +// been exhausted, then return COMPLETED_PARTIAL_MATCH. +static enum DatetimePartParseResult compare_format( const char **format, int *characters_remaining, const char *compare_to, int n, - const enum Exact exact + const enum DatetimeFormatRequirement format_requirement ) { - if (exact == PARTIAL_MATCH && !*characters_remaining) { - return RETURN; + if (format_requirement == PARTIAL_MATCH && !*characters_remaining) { + return COMPLETED_PARTIAL_MATCH; } - if (exact == NO_MATCH) { - return ADVANCE; + if (format_requirement == INFER_FORMAT) { + return COMPARISON_SUCCESS; } if (*characters_remaining < n) { // TODO(pandas-dev): PyErr to differentiate what went wrong - return ERROR; + return COMPARISON_ERROR; } else { if (strncmp(*format, compare_to, n)) { // TODO(pandas-dev): PyErr to differentiate what went wrong - return ERROR; + return COMPARISON_ERROR; } else { *format += n; *characters_remaining -= n; - return ADVANCE; + return COMPARISON_SUCCESS; } } - return ADVANCE; + return COMPARISON_SUCCESS; } int parse_iso_8601_datetime(const char *str, int len, int want_exc, @@ -111,7 +111,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, NPY_DATETIMEUNIT *out_bestunit, int *out_local, int *out_tzoffset, const char* format, int format_len, - enum Exact exact) { + enum DatetimeFormatRequirement format_requirement) { if (len < 0 || format_len < 0) goto parse_error; int year_leap = 0; @@ -119,7 +119,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, const char *substr; int sublen; NPY_DATETIMEUNIT bestunit = NPY_FR_GENERIC; - enum Comparison comparison; + enum DatetimePartParseResult comparison; /* If year-month-day are separated by a valid separator, * months/days without leading zeroes will be parsed @@ -149,10 +149,10 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, while (sublen > 0 && isspace(*substr)) { ++substr; --sublen; - comparison = compare_format(&format, &format_len, " ", 1, exact); - if (comparison == ERROR) { + comparison = compare_format(&format, &format_len, " ", 1, format_requirement); + if (comparison == COMPARISON_ERROR) { goto parse_error; - } else if (comparison == RETURN) { + } else if (comparison == COMPLETED_PARTIAL_MATCH) { goto finish; } } @@ -168,13 +168,13 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, } /* PARSE THE YEAR (4 digits) */ - if (exact == PARTIAL_MATCH && !format_len) { + if (format_requirement == PARTIAL_MATCH && !format_len) { goto finish; } - comparison = compare_format(&format, &format_len, "%Y", 2, exact); - if (comparison == ERROR) { + comparison = compare_format(&format, &format_len, "%Y", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { goto parse_error; - } else if (comparison == RETURN) { + } else if (comparison == COMPLETED_PARTIAL_MATCH) { goto finish; } @@ -221,10 +221,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, ++substr; --sublen; - comparison = compare_format(&format, &format_len, &ymd_sep, 1, exact); - if (comparison == ERROR) { + comparison = compare_format(&format, &format_len, &ymd_sep, 1, + format_requirement); + if (comparison == COMPARISON_ERROR) { goto parse_error; - } else if (comparison == RETURN) { + } else if (comparison == COMPLETED_PARTIAL_MATCH) { goto finish; } /* Cannot have trailing separator */ @@ -234,10 +235,10 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, } /* PARSE THE MONTH */ - comparison = compare_format(&format, &format_len, "%m", 2, exact); - if (comparison == ERROR) { + comparison = compare_format(&format, &format_len, "%m", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { goto parse_error; - } else if (comparison == RETURN) { + } else if (comparison == COMPLETED_PARTIAL_MATCH) { goto finish; } /* First digit required */ @@ -283,19 +284,20 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, } ++substr; --sublen; - comparison = compare_format(&format, &format_len, &ymd_sep, 1, exact); - if (comparison == ERROR) { + comparison = compare_format(&format, &format_len, &ymd_sep, 1, + format_requirement); + if (comparison == COMPARISON_ERROR) { goto parse_error; - } else if (comparison == RETURN) { + } else if (comparison == COMPLETED_PARTIAL_MATCH) { goto finish; } } /* PARSE THE DAY */ - comparison = compare_format(&format, &format_len, "%d", 2, exact); - if (comparison == ERROR) { + comparison = compare_format(&format, &format_len, "%d", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { goto parse_error; - } else if (comparison == RETURN) { + } else if (comparison == COMPLETED_PARTIAL_MATCH) { goto finish; } /* First digit required */ @@ -337,20 +339,20 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, if ((*substr != 'T' && *substr != ' ') || sublen == 1) { goto parse_error; } - comparison = compare_format(&format, &format_len, substr, 1, exact); - if (comparison == ERROR) { + comparison = compare_format(&format, &format_len, substr, 1, format_requirement); + if (comparison == COMPARISON_ERROR) { goto parse_error; - } else if (comparison == RETURN) { + } else if (comparison == COMPLETED_PARTIAL_MATCH) { goto finish; } ++substr; --sublen; /* PARSE THE HOURS */ - comparison = compare_format(&format, &format_len, "%H", 2, exact); - if (comparison == ERROR) { + comparison = compare_format(&format, &format_len, "%H", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { goto parse_error; - } else if (comparison == RETURN) { + } else if (comparison == COMPLETED_PARTIAL_MATCH) { goto finish; } /* First digit required */ @@ -396,10 +398,10 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, if (sublen == 0 || !isdigit(*substr)) { goto parse_error; } - comparison = compare_format(&format, &format_len, ":", 1, exact); - if (comparison == ERROR) { + comparison = compare_format(&format, &format_len, ":", 1, format_requirement); + if (comparison == COMPARISON_ERROR) { goto parse_error; - } else if (comparison == RETURN) { + } else if (comparison == COMPLETED_PARTIAL_MATCH) { goto finish; } } else if (!isdigit(*substr)) { @@ -410,10 +412,10 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, } /* PARSE THE MINUTES */ - comparison = compare_format(&format, &format_len, "%M", 2, exact); - if (comparison == ERROR) { + comparison = compare_format(&format, &format_len, "%M", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { goto parse_error; - } else if (comparison == RETURN) { + } else if (comparison == COMPLETED_PARTIAL_MATCH) { goto finish; } /* First digit required */ @@ -448,10 +450,10 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, /* If we make it through this condition block, then the next * character is a digit. */ if (has_hms_sep && *substr == ':') { - comparison = compare_format(&format, &format_len, ":", 1, exact); - if (comparison == ERROR) { + comparison = compare_format(&format, &format_len, ":", 1, format_requirement); + if (comparison == COMPARISON_ERROR) { goto parse_error; - } else if (comparison == RETURN) { + } else if (comparison == COMPLETED_PARTIAL_MATCH) { goto finish; } ++substr; @@ -466,10 +468,10 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, } /* PARSE THE SECONDS */ - comparison = compare_format(&format, &format_len, "%S", 2, exact); - if (comparison == ERROR) { + comparison = compare_format(&format, &format_len, "%S", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { goto parse_error; - } else if (comparison == RETURN) { + } else if (comparison == COMPLETED_PARTIAL_MATCH) { goto finish; } /* First digit required */ @@ -497,10 +499,10 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, if (sublen > 0 && *substr == '.') { ++substr; --sublen; - comparison = compare_format(&format, &format_len, ".", 1, exact); - if (comparison == ERROR) { + comparison = compare_format(&format, &format_len, ".", 1, format_requirement); + if (comparison == COMPARISON_ERROR) { goto parse_error; - } else if (comparison == RETURN) { + } else if (comparison == COMPLETED_PARTIAL_MATCH) { goto finish; } } else { @@ -509,10 +511,10 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, } /* PARSE THE MICROSECONDS (0 to 6 digits) */ - comparison = compare_format(&format, &format_len, "%f", 2, exact); - if (comparison == ERROR) { + comparison = compare_format(&format, &format_len, "%f", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { goto parse_error; - } else if (comparison == RETURN) { + } else if (comparison == COMPLETED_PARTIAL_MATCH) { goto finish; } numdigits = 0; @@ -579,10 +581,10 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, while (sublen > 0 && isspace(*substr)) { ++substr; --sublen; - comparison = compare_format(&format, &format_len, " ", 1, exact); - if (comparison == ERROR) { + comparison = compare_format(&format, &format_len, " ", 1, format_requirement); + if (comparison == COMPARISON_ERROR) { goto parse_error; - } else if (comparison == RETURN) { + } else if (comparison == COMPLETED_PARTIAL_MATCH) { goto finish; } } @@ -597,10 +599,10 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, /* UTC specifier */ if (*substr == 'Z') { - comparison = compare_format(&format, &format_len, "%z", 2, exact); - if (comparison == ERROR) { + comparison = compare_format(&format, &format_len, "%z", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { goto parse_error; - } else if (comparison == RETURN) { + } else if (comparison == COMPLETED_PARTIAL_MATCH) { goto finish; } /* "Z" should be equivalent to tz offset "+00:00" */ @@ -622,10 +624,10 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, --sublen; } } else if (*substr == '-' || *substr == '+') { - comparison = compare_format(&format, &format_len, "%z", 2, exact); - if (comparison == ERROR) { + comparison = compare_format(&format, &format_len, "%z", 2, format_requirement); + if (comparison == COMPARISON_ERROR) { goto parse_error; - } else if (comparison == RETURN) { + } else if (comparison == COMPLETED_PARTIAL_MATCH) { goto finish; } /* Time zone offset */ @@ -711,10 +713,10 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, while (sublen > 0 && isspace(*substr)) { ++substr; --sublen; - if (exact == PARTIAL_MATCH && !format_len) { + if (format_requirement == PARTIAL_MATCH && !format_len) { goto finish; } - if (compare_format(&format, &format_len, " ", 1, exact)) { + if (compare_format(&format, &format_len, " ", 1, format_requirement)) { goto parse_error; } } diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h index d325adf2c930c..04439f9f3f81b 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h +++ b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h @@ -33,13 +33,12 @@ This file implements string parsing and creation for NumPy datetime. * * EXACT_MATCH : require an exact match with 'format'. If the * string is '2020-01-01', then the only format which will * be able to parse it without error is '%Y-%m-%d'; - * * NO_MATCH: don't require any match - parse without comparing - * with 'format'. + * * INFER_FORMAT: parse without comparing 'format' (i.e. infer it). */ -enum Exact { +enum FormatRequirement { PARTIAL_MATCH, EXACT_MATCH, - NO_MATCH + INFER_FORMAT }; /* @@ -77,7 +76,7 @@ parse_iso_8601_datetime(const char *str, int len, int want_exc, int *out_tzoffset, const char* format, int format_len, - enum Exact exact); + enum FormatRequirement format_requirement); /* * Provides a string length to use for converting datetime From 6c519249c8329e49fa1889619a9b82583bbe5af4 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 29 Dec 2022 10:04:23 +0000 Subject: [PATCH 04/11] renaming fixup --- pandas/_libs/tslibs/src/datetime/np_datetime_strings.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c index c41ed6646bf73..5971b642d775a 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c +++ b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c @@ -82,7 +82,7 @@ static enum DatetimePartParseResult compare_format( int *characters_remaining, const char *compare_to, int n, - const enum DatetimeFormatRequirement format_requirement + const enum FormatRequirement format_requirement ) { if (format_requirement == PARTIAL_MATCH && !*characters_remaining) { return COMPLETED_PARTIAL_MATCH; @@ -111,7 +111,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, NPY_DATETIMEUNIT *out_bestunit, int *out_local, int *out_tzoffset, const char* format, int format_len, - enum DatetimeFormatRequirement format_requirement) { + enum FormatRequirement format_requirement) { if (len < 0 || format_len < 0) goto parse_error; int year_leap = 0; From b96158ff031d9807a05590e678de55b271f36c46 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 29 Dec 2022 10:50:47 +0000 Subject: [PATCH 05/11] cast --- pandas/_libs/tslibs/np_datetime.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 7f54f2c5a6a31..e4ab4de1b8ee6 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -296,7 +296,7 @@ cdef int string_to_dts( format_requirement = INFER_FORMAT else: format_buf = get_c_string_buf_and_size(format, &format_length) - format_requirement = int(exact) + format_requirement = exact return parse_iso_8601_datetime(buf, length, want_exc, dts, out_bestunit, out_local, out_tzoffset, format_buf, format_length, From 0ebff5c09f1e18fe295c27e54ca244bf2b98ca47 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 29 Dec 2022 10:55:40 +0000 Subject: [PATCH 06/11] clean up --- .../_libs/tslibs/src/datetime/np_datetime_strings.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c index 5971b642d775a..f6ba329966dae 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c +++ b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c @@ -168,9 +168,6 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, } /* PARSE THE YEAR (4 digits) */ - if (format_requirement == PARTIAL_MATCH && !format_len) { - goto finish; - } comparison = compare_format(&format, &format_len, "%Y", 2, format_requirement); if (comparison == COMPARISON_ERROR) { goto parse_error; @@ -713,11 +710,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, while (sublen > 0 && isspace(*substr)) { ++substr; --sublen; - if (format_requirement == PARTIAL_MATCH && !format_len) { - goto finish; - } - if (compare_format(&format, &format_len, " ", 1, format_requirement)) { + comparison = compare_format(&format, &format_len, " ", 1, format_requirement) + if (comparison == COMPARISON_ERROR) { goto parse_error; + } else if (comparison == COMPLETED_PARTIAL_MATCH) { + goto finish; } } From caa9c905bc63f87bb4857a9edfd2dc20c0053e42 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 29 Dec 2022 10:59:12 +0000 Subject: [PATCH 07/11] doc --- pandas/_libs/tslibs/src/datetime/np_datetime_strings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h index 04439f9f3f81b..727cdee977e38 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h +++ b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h @@ -26,7 +26,7 @@ This file implements string parsing and creation for NumPy datetime. #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #endif // NPY_NO_DEPRECATED_API -/* 'exact' can be one of three values: +/* 'format_requirement' can be one of three values: * * PARTIAL_MATCH : Only require a partial match with 'format'. * For example, if the string is '2020-01-01 05:00:00' and * 'format' is '%Y-%m-%d', then parse '2020-01-01'; From 84eeb3d6920ddb5c207d4f092ab7d7f954a3ebd6 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 29 Dec 2022 11:00:27 +0000 Subject: [PATCH 08/11] correct syntax --- pandas/_libs/tslibs/src/datetime/np_datetime_strings.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c index f6ba329966dae..a8c126539051e 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c +++ b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c @@ -710,7 +710,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, while (sublen > 0 && isspace(*substr)) { ++substr; --sublen; - comparison = compare_format(&format, &format_len, " ", 1, format_requirement) + comparison = compare_format(&format, &format_len, " ", 1, format_requirement); if (comparison == COMPARISON_ERROR) { goto parse_error; } else if (comparison == COMPLETED_PARTIAL_MATCH) { From 5c67ed308646fbe5d89ce686ab2f29a0719f4dce Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 29 Dec 2022 17:27:19 +0000 Subject: [PATCH 09/11] use typedef --- pandas/_libs/tslibs/np_datetime.pxd | 2 +- .../_libs/tslibs/src/datetime/np_datetime_strings.c | 12 ++++++------ .../_libs/tslibs/src/datetime/np_datetime_strings.h | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index 89198e7ac3798..492f45af09e80 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -122,7 +122,7 @@ cdef int64_t convert_reso( ) except? -1 cdef extern from "src/datetime/np_datetime_strings.h": - cdef enum FormatRequirement: + ctypedef enum FormatRequirement: PARTIAL_MATCH EXACT_MATCH INFER_FORMAT diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c index a8c126539051e..9451be41aca9d 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c +++ b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c @@ -67,22 +67,22 @@ This file implements string parsing and creation for NumPy datetime. * Returns 0 on success, -1 on failure. */ -enum DatetimePartParseResult { +typedef enum { COMPARISON_SUCCESS, COMPLETED_PARTIAL_MATCH, COMPARISON_ERROR -}; +} DatetimePartParseResult; // This function will advance the pointer on format // and decrement characters_remaining by n on success // On failure will return COMPARISON_ERROR without incrementing // If `format_requirement` is PARTIAL_MATCH, and the `format` string has // been exhausted, then return COMPLETED_PARTIAL_MATCH. -static enum DatetimePartParseResult compare_format( +static DatetimePartParseResult compare_format( const char **format, int *characters_remaining, const char *compare_to, int n, - const enum FormatRequirement format_requirement + const FormatRequirement format_requirement ) { if (format_requirement == PARTIAL_MATCH && !*characters_remaining) { return COMPLETED_PARTIAL_MATCH; @@ -111,7 +111,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, NPY_DATETIMEUNIT *out_bestunit, int *out_local, int *out_tzoffset, const char* format, int format_len, - enum FormatRequirement format_requirement) { + FormatRequirement format_requirement) { if (len < 0 || format_len < 0) goto parse_error; int year_leap = 0; @@ -119,7 +119,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc, const char *substr; int sublen; NPY_DATETIMEUNIT bestunit = NPY_FR_GENERIC; - enum DatetimePartParseResult comparison; + DatetimePartParseResult comparison; /* If year-month-day are separated by a valid separator, * months/days without leading zeroes will be parsed diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h index 727cdee977e38..a635192d70809 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h +++ b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h @@ -35,11 +35,11 @@ This file implements string parsing and creation for NumPy datetime. * be able to parse it without error is '%Y-%m-%d'; * * INFER_FORMAT: parse without comparing 'format' (i.e. infer it). */ -enum FormatRequirement { +typedef enum { PARTIAL_MATCH, EXACT_MATCH, INFER_FORMAT -}; +} FormatRequirement; /* * Parses (almost) standard ISO 8601 date strings. The differences are: @@ -76,7 +76,7 @@ parse_iso_8601_datetime(const char *str, int len, int want_exc, int *out_tzoffset, const char* format, int format_len, - enum FormatRequirement format_requirement); + FormatRequirement format_requirement); /* * Provides a string length to use for converting datetime From bad704ebf84ed8c313ab339c299aeb26e710c30b Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 29 Dec 2022 18:30:59 +0000 Subject: [PATCH 10/11] check for negative characters remaining --- pandas/_libs/tslibs/src/datetime/np_datetime_strings.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c index 9451be41aca9d..f1f03e6467eac 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c +++ b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c @@ -84,12 +84,15 @@ static DatetimePartParseResult compare_format( int n, const FormatRequirement format_requirement ) { - if (format_requirement == PARTIAL_MATCH && !*characters_remaining) { - return COMPLETED_PARTIAL_MATCH; - } if (format_requirement == INFER_FORMAT) { return COMPARISON_SUCCESS; } + if (*characters_remaining < 0) { + return COMPARISON_ERROR; + } + if (format_requirement == PARTIAL_MATCH && *characters_remaining == 0) { + return COMPLETED_PARTIAL_MATCH; + } if (*characters_remaining < n) { // TODO(pandas-dev): PyErr to differentiate what went wrong return COMPARISON_ERROR; From 8d8f90edf92c79889ef5dc2ab625ee346ca16aa8 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Fri, 30 Dec 2022 15:04:49 +0000 Subject: [PATCH 11/11] reduce diff --- pandas/_libs/tslibs/np_datetime.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index e4ab4de1b8ee6..b1e4022527437 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -280,7 +280,7 @@ cdef int string_to_dts( int* out_tzoffset, bint want_exc, format: str | None=None, - bint exact=EXACT_MATCH, + bint exact=True, ) except? -1: cdef: Py_ssize_t length