pandas-dev · MarcoGorelli · Nov 17, 2022 · Oct 26, 2022 · Oct 28, 2022 · Oct 28, 2022
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -48,7 +48,14 @@ repos:
         # this particular codebase (e.g. src/headers, src/klib). However,
         # we can lint all header files since they aren't "generated" like C files are.
         exclude: ^pandas/_libs/src/(klib|headers)/
-        args: [--quiet, '--extensions=c,h', '--headers=h', --recursive, '--filter=-readability/casting,-runtime/int,-build/include_subdir']
+        args: [
+            --quiet,
+            '--extensions=c,h',
+            '--headers=h',
+            --recursive,
+            '--filter=-readability/casting,-runtime/int,-build/include_subdir',
+            '--linelength=88'
+        ]
 -   repo: https://github.com/PyCQA/flake8
     rev: 5.0.4
     hooks:

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -354,6 +354,7 @@ Conversion
 - Bug in :meth:`Series.convert_dtypes` not converting dtype to nullable dtype when :class:`Series` contains ``NA`` and has dtype ``object`` (:issue:`48791`)
 - Bug where any :class:`ExtensionDtype` subclass with ``kind="M"`` would be interpreted as a timezone type (:issue:`34986`)
 - Bug in :class:`.arrays.ArrowExtensionArray` that would raise ``NotImplementedError`` when passed a sequence of strings or binary (:issue:`49172`)
+- Bug in :func:`to_datetime` was not respecting ``exact`` argument when ``format`` was an ISO8601 format (:issue:`12649`)
 
 Strings
 ^^^^^^^

diff --git a/pandas/_libs/tslib.pyi b/pandas/_libs/tslib.pyi
@@ -25,6 +25,8 @@ def array_to_datetime(
     utc: bool = ...,
     require_iso8601: bool = ...,
     allow_mixed: bool = ...,
+    format: str = ...,
+    exact: bool = ...,
 ) -> tuple[np.ndarray, tzinfo | None]: ...
 
 # returned ndarray may be object dtype or datetime64[ns]
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
@@ -445,6 +445,8 @@ cpdef array_to_datetime(
     bint utc=False,
     bint require_iso8601=False,
     bint allow_mixed=False,
+    format: str | None=None,
+    bint exact=True,
 ):
     """
     Converts a 1D array of date-like values to a numpy array of either:
@@ -564,6 +566,15 @@ cpdef array_to_datetime(
                     iresult[i] = get_datetime64_nanos(val, NPY_FR_ns)
 
                 elif is_integer_object(val) or is_float_object(val):
+                    if require_iso8601:
+                        if is_coerce:
+                            iresult[i] = NPY_NAT
+                            continue
+                        elif is_raise:
+                            raise ValueError(
+                                f"time data \"{val}\" at position {i} doesn't match format \"{format}\""
+                            )
+                        return values, tz_out
                     # these must be ns unit by-definition
                     seen_integer = True
 
@@ -594,7 +605,7 @@ cpdef array_to_datetime(
 
                     string_to_dts_failed = string_to_dts(
                         val, &dts, &out_bestunit, &out_local,
-                        &out_tzoffset, False
+                        &out_tzoffset, False, format, exact
                     )
                     if string_to_dts_failed:
                         # An error at this point is a _parsing_ error
@@ -609,7 +620,7 @@ cpdef array_to_datetime(
                                 continue
                             elif is_raise:
                                 raise ValueError(
-                                    f"time data \"{val}\" at position {i} doesn't match format specified"
+                                    f"time data \"{val}\" at position {i} doesn't match format \"{format}\""
                                 )
                             return values, tz_out
 

diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd
@@ -95,6 +95,8 @@ cdef int string_to_dts(
     int* out_local,
     int* out_tzoffset,
     bint want_exc,
+    format: str | None = *,
+    bint exact = *
 ) except? -1
 
 cdef NPY_DATETIMEUNIT get_unit_from_dtype(cnp.dtype dtype)

diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx
@@ -52,7 +52,8 @@ cdef extern from "src/datetime/np_datetime_strings.h":
     int parse_iso_8601_datetime(const char *str, int len, int want_exc,
                                 npy_datetimestruct *out,
                                 NPY_DATETIMEUNIT *out_bestunit,
-                                int *out_local, int *out_tzoffset)
+                                int *out_local, int *out_tzoffset,
+                                const char *format, int format_len, int exact)
 
 
 # ----------------------------------------------------------------------
@@ -273,14 +274,25 @@ cdef inline int string_to_dts(
     int* out_local,
     int* out_tzoffset,
     bint want_exc,
+    format: str | None = None,
+    bint exact = True,
 ) except? -1:
     cdef:
         Py_ssize_t length
         const char* buf
+        Py_ssize_t format_length
+        const char* format_buf
 
     buf = get_c_string_buf_and_size(val, &length)
+    if format is None:
+        format_buf = b''
+        format_length = 0
+        exact = False
+    else:
+        format_buf = get_c_string_buf_and_size(format, &format_length)
     return parse_iso_8601_datetime(buf, length, want_exc,
-                                   dts, out_bestunit, out_local, out_tzoffset)
+                                   dts, out_bestunit, out_local, out_tzoffset,
+                                   format_buf, format_length, exact)
 
 
 cpdef ndarray astype_overflowsafe(

diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c
@@ -66,10 +66,25 @@ This file implements string parsing and creation for NumPy datetime.
  *
  * Returns 0 on success, -1 on failure.
  */
+
+inline int format_startswith(char ch, int format_len, char format, int exact) {
+    /* Check if the current character in `format` is `ch`.
+
+    Always error on character mismatch conditioned on non-exhausted format,
+    or when format is exhausted in the exact case.
+    Note that if `format` hasn't been exhausted, it should be advanced
+    outside of this function. */
+    if ((format_len && format != ch) || (exact && !format_len)) {
+        return 0;
+    }
+    return 1;
+}
+
 int parse_iso_8601_datetime(const char *str, int len, int want_exc,
                             npy_datetimestruct *out,
                             NPY_DATETIMEUNIT *out_bestunit,
-                            int *out_local, int *out_tzoffset) {
+                            int *out_local, int *out_tzoffset,
+                            const char* format, int format_len, int exact) {
     int year_leap = 0;
     int i, numdigits;
     const char *substr;
@@ -104,19 +119,28 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
     while (sublen > 0 && isspace(*substr)) {
         ++substr;
         --sublen;
+        if (!format_startswith(' ', format_len, *format, exact)) goto parse_error;
+        if (format_len) {++format; --format_len;}
     }
 
     /* Leading '-' sign for negative year */
     if (*substr == '-') {
         ++substr;
         --sublen;
+        if (!format_startswith('-', format_len, *format, exact)) goto parse_error;
+        if (format_len) {++format; --format_len;}
     }
 
     if (sublen == 0) {
         goto parse_error;
     }
 
     /* PARSE THE YEAR (4 digits) */
+    if (!format_startswith('%', format_len, *format, exact)) goto parse_error;
+    if (format_len) {++format; --format_len;}
+    if (!format_startswith('Y', format_len, *format, exact)) goto parse_error;
+    if (format_len) {++format; --format_len;}
+
     out->year = 0;
     if (sublen >= 4 && isdigit(substr[0]) && isdigit(substr[1]) &&
         isdigit(substr[2]) && isdigit(substr[3])) {
@@ -139,6 +163,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
         if (out_local != NULL) {
             *out_local = 0;
         }
+        if (format_len) {
+          goto parse_error;
+        }
         bestunit = NPY_FR_Y;
         goto finish;
     }
@@ -156,13 +183,19 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
         ymd_sep = valid_ymd_sep[i];
         ++substr;
         --sublen;
+        if (!format_startswith(ymd_sep, format_len, *format, exact)) goto parse_error;
+        if (format_len) {++format; --format_len;}
         /* Cannot have trailing separator */
         if (sublen == 0 || !isdigit(*substr)) {
             goto parse_error;
         }
     }
 
     /* PARSE THE MONTH */
+    if (!format_startswith('%', format_len, *format, exact)) goto parse_error;
+    if (format_len) {++format; --format_len;}
+    if (!format_startswith('m', format_len, *format, exact)) goto parse_error;
+    if (format_len) {++format; --format_len;}
     /* First digit required */
     out->month = (*substr - '0');
     ++substr;
@@ -190,6 +223,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
         if (!has_ymd_sep) {
             goto parse_error;
         }
+        if (format_len) {
+          goto parse_error;
+        }
         if (out_local != NULL) {
             *out_local = 0;
         }
@@ -203,9 +239,15 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
         }
         ++substr;
         --sublen;
+        if (!format_startswith(ymd_sep, format_len, *format, exact)) goto parse_error;
+        if (format_len) {++format; --format_len;}
     }
 
     /* PARSE THE DAY */
+    if (!format_startswith('%', format_len, *format, exact)) goto parse_error;
+    if (format_len) {++format; --format_len;}
+    if (!format_startswith('d', format_len, *format, exact)) goto parse_error;
+    if (format_len) {++format; --format_len;}
     /* First digit required */
     if (!isdigit(*substr)) {
         goto parse_error;
@@ -235,17 +277,26 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
         if (out_local != NULL) {
             *out_local = 0;
         }
+        if (format_len) {
+          goto parse_error;
+        }
         bestunit = NPY_FR_D;
         goto finish;
     }
 
     if ((*substr != 'T' && *substr != ' ') || sublen == 1) {
         goto parse_error;
     }
+    if (!format_startswith(*substr, format_len, *format, exact)) goto parse_error;
+    if (format_len) {++format; --format_len;}
     ++substr;
     --sublen;
 
     /* PARSE THE HOURS */
+    if (!format_startswith('%', format_len, *format, exact)) goto parse_error;
+    if (format_len) {++format; --format_len;}
+    if (!format_startswith('H', format_len, *format, exact)) goto parse_error;
+    if (format_len) {++format; --format_len;}
     /* First digit required */
     if (!isdigit(*substr)) {
         goto parse_error;
@@ -274,6 +325,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
         if (!hour_was_2_digits) {
             goto parse_error;
         }
+        if (format_len) {
+          goto parse_error;
+        }
         bestunit = NPY_FR_h;
         goto finish;
     }
@@ -286,6 +340,8 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
         if (sublen == 0 || !isdigit(*substr)) {
             goto parse_error;
         }
+        if (!format_startswith(':', format_len, *format, exact)) goto parse_error;
+        if (format_len) {++format; --format_len;}
     } else if (!isdigit(*substr)) {
         if (!hour_was_2_digits) {
             goto parse_error;
@@ -294,6 +350,10 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
     }
 
     /* PARSE THE MINUTES */
+    if (!format_startswith('%', format_len, *format, exact)) goto parse_error;
+    if (format_len) {++format; --format_len;}
+    if (!format_startswith('M', format_len, *format, exact)) goto parse_error;
+    if (format_len) {++format; --format_len;}
     /* First digit required */
     out->min = (*substr - '0');
     ++substr;
@@ -317,12 +377,17 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
 
     if (sublen == 0) {
         bestunit = NPY_FR_m;
+        if (format_len) {
+          goto parse_error;
+        }
         goto finish;
     }
 
     /* If we make it through this condition block, then the next
      * character is a digit. */
     if (has_hms_sep && *substr == ':') {
+        if (!format_startswith(':', format_len, *format, exact)) goto parse_error;
+        if (format_len) {++format; --format_len;}
         ++substr;
         --sublen;
         /* Cannot have a trailing ':' */
@@ -335,6 +400,10 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
     }
 
     /* PARSE THE SECONDS */
+    if (!format_startswith('%', format_len, *format, exact)) goto parse_error;
+    if (format_len) {++format; --format_len;}
+    if (!format_startswith('S', format_len, *format, exact)) goto parse_error;
+    if (format_len) {++format; --format_len;}
     /* First digit required */
     out->sec = (*substr - '0');
     ++substr;
@@ -360,12 +429,18 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
     if (sublen > 0 && *substr == '.') {
         ++substr;
         --sublen;
+        if (!format_startswith('.', format_len, *format, exact)) goto parse_error;
+        if (format_len) {++format; --format_len;}
     } else {
         bestunit = NPY_FR_s;
         goto parse_timezone;
     }
 
     /* PARSE THE MICROSECONDS (0 to 6 digits) */
+    if (!format_startswith('%', format_len, *format, exact)) goto parse_error;
+    if (format_len) {++format; --format_len;}
+    if (!format_startswith('f', format_len, *format, exact)) goto parse_error;
+    if (format_len) {++format; --format_len;}
     numdigits = 0;
     for (i = 0; i < 6; ++i) {
         out->us *= 10;
@@ -430,15 +505,24 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
     while (sublen > 0 && isspace(*substr)) {
         ++substr;
         --sublen;
+        if (!format_startswith(' ', format_len, *format, exact)) goto parse_error;
+        if (format_len) {++format; --format_len;}
     }
 
     if (sublen == 0) {
         // Unlike NumPy, treating no time zone as naive
+        if (format_len > 0) {
+            goto parse_error;
+        }
         goto finish;
     }
 
     /* UTC specifier */
     if (*substr == 'Z') {
+        if (!format_startswith('%', format_len, *format, exact)) goto parse_error;
+        if (format_len) {++format; --format_len;}
+        if (!format_startswith('Z', format_len, *format, exact)) goto parse_error;
+        if (format_len) {++format; --format_len;}
         /* "Z" should be equivalent to tz offset "+00:00" */
         if (out_local != NULL) {
             *out_local = 1;
@@ -449,12 +533,19 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
         }
 
         if (sublen == 1) {
+            if (format_len > 0) {
+                goto parse_error;
+            }
             goto finish;
         } else {
             ++substr;
             --sublen;
         }
     } else if (*substr == '-' || *substr == '+') {
+        if (!format_startswith('%', format_len, *format, exact)) goto parse_error;
+        if (format_len) {++format; --format_len;}
+        if (!format_startswith('z', format_len, *format, exact)) goto parse_error;
+        if (format_len) {++format; --format_len;}
         /* Time zone offset */
         int offset_neg = 0, offset_hour = 0, offset_minute = 0;
 
@@ -538,9 +629,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
     while (sublen > 0 && isspace(*substr)) {
         ++substr;
         --sublen;
+        if (!format_startswith(' ', format_len, *format, exact)) goto parse_error;
+        if (format_len) {++format; --format_len;}
     }
 
-    if (sublen != 0) {
+    if ((sublen != 0) || (format_len != 0)) {
         goto parse_error;
     }