Skip to content

Commit 45f82c3

Browse files
nikitavedMarco Gorelli
authored and
Marco Gorelli
committed
initial format support
Co-Authored-By: MarcoGorelli <> Co-Authored-By: FDRocha <>
1 parent 8564b70 commit 45f82c3

File tree

11 files changed

+239
-14
lines changed

11 files changed

+239
-14
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,7 @@ Conversion
333333
- Bug in :meth:`Series.convert_dtypes` not converting dtype to nullable dtype when :class:`Series` contains ``NA`` and has dtype ``object`` (:issue:`48791`)
334334
- Bug where any :class:`ExtensionDtype` subclass with ``kind="M"`` would be interpreted as a timezone type (:issue:`34986`)
335335
- Bug in :class:`.arrays.ArrowExtensionArray` that would raise ``NotImplementedError`` when passed a sequence of strings or binary (:issue:`49172`)
336+
- Bug in :func:`to_datetime` was not respecting ``exact`` argument when ``format`` was an ISO8601 format (:issue:`12649`)
336337

337338
Strings
338339
^^^^^^^

pandas/_libs/tslib.pyx

+14-3
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ def _test_parse_iso8601(ts: str):
8989
elif ts == 'today':
9090
return Timestamp.now().normalize()
9191

92-
string_to_dts(ts, &obj.dts, &out_bestunit, &out_local, &out_tzoffset, True)
92+
string_to_dts(ts, &obj.dts, &out_bestunit, &out_local, &out_tzoffset, True, "", False)
9393
obj.value = npy_datetimestruct_to_datetime(NPY_FR_ns, &obj.dts)
9494
check_dts_bounds(&obj.dts)
9595
if out_local == 1:
@@ -445,6 +445,8 @@ cpdef array_to_datetime(
445445
bint utc=False,
446446
bint require_iso8601=False,
447447
bint allow_mixed=False,
448+
str format="",
449+
bint exact=False,
448450
):
449451
"""
450452
Converts a 1D array of date-like values to a numpy array of either:
@@ -564,6 +566,15 @@ cpdef array_to_datetime(
564566
iresult[i] = get_datetime64_nanos(val, NPY_FR_ns)
565567

566568
elif is_integer_object(val) or is_float_object(val):
569+
if require_iso8601:
570+
if is_coerce:
571+
iresult[i] = NPY_NAT
572+
continue
573+
elif is_raise:
574+
raise ValueError(
575+
f"time data \"{val}\" at position {i} doesn't match format \"{format}\""
576+
)
577+
return values, tz_out
567578
# these must be ns unit by-definition
568579
seen_integer = True
569580

@@ -594,7 +605,7 @@ cpdef array_to_datetime(
594605

595606
string_to_dts_failed = string_to_dts(
596607
val, &dts, &out_bestunit, &out_local,
597-
&out_tzoffset, False
608+
&out_tzoffset, False, format, exact
598609
)
599610
if string_to_dts_failed:
600611
# An error at this point is a _parsing_ error
@@ -609,7 +620,7 @@ cpdef array_to_datetime(
609620
continue
610621
elif is_raise:
611622
raise ValueError(
612-
f"time data \"{val}\" at position {i} doesn't match format specified"
623+
f"time data \"{val}\" at position {i} doesn't match format \"{format}\""
613624
)
614625
return values, tz_out
615626

pandas/_libs/tslibs/conversion.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -488,7 +488,7 @@ cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit,
488488
else:
489489
string_to_dts_failed = string_to_dts(
490490
ts, &dts, &out_bestunit, &out_local,
491-
&out_tzoffset, False
491+
&out_tzoffset, False, "", False
492492
)
493493
if not string_to_dts_failed:
494494
try:

pandas/_libs/tslibs/np_datetime.pxd

+2
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ cdef int string_to_dts(
9595
int* out_local,
9696
int* out_tzoffset,
9797
bint want_exc,
98+
str format,
99+
bint exact
98100
) except? -1
99101

100102
cdef NPY_DATETIMEUNIT get_unit_from_dtype(cnp.dtype dtype)

pandas/_libs/tslibs/np_datetime.pyx

+9-2
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ cdef extern from "src/datetime/np_datetime_strings.h":
5252
int parse_iso_8601_datetime(const char *str, int len, int want_exc,
5353
npy_datetimestruct *out,
5454
NPY_DATETIMEUNIT *out_bestunit,
55-
int *out_local, int *out_tzoffset)
55+
int *out_local, int *out_tzoffset,
56+
const char *format, int format_len, int exact)
5657

5758

5859
# ----------------------------------------------------------------------
@@ -273,14 +274,20 @@ cdef inline int string_to_dts(
273274
int* out_local,
274275
int* out_tzoffset,
275276
bint want_exc,
277+
str format,
278+
bint exact,
276279
) except? -1:
277280
cdef:
278281
Py_ssize_t length
279282
const char* buf
283+
Py_ssize_t format_length
284+
const char* format_buf
280285

281286
buf = get_c_string_buf_and_size(val, &length)
287+
format_buf = get_c_string_buf_and_size(format, &format_length)
282288
return parse_iso_8601_datetime(buf, length, want_exc,
283-
dts, out_bestunit, out_local, out_tzoffset)
289+
dts, out_bestunit, out_local, out_tzoffset,
290+
format_buf, format_length, exact)
284291

285292

286293
cpdef ndarray astype_overflowsafe(

pandas/_libs/tslibs/parsing.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,7 @@ cdef parse_datetime_string_with_reso(
409409
# TODO: does this render some/all of parse_delimited_date redundant?
410410
string_to_dts_failed = string_to_dts(
411411
date_string, &dts, &out_bestunit, &out_local,
412-
&out_tzoffset, False
412+
&out_tzoffset, False, "", False
413413
)
414414
if not string_to_dts_failed:
415415
if dts.ps != 0 or out_local:

pandas/_libs/tslibs/src/datetime/np_datetime_strings.c

+72-2
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,29 @@ This file implements string parsing and creation for NumPy datetime.
6666
*
6767
* Returns 0 on success, -1 on failure.
6868
*/
69+
70+
#define FORMAT_STARTSWITH(ch) \
71+
if (exact) { \
72+
if (!format_len || *format != ch) { \
73+
goto parse_error; \
74+
} \
75+
++format; \
76+
--format_len; \
77+
} else { \
78+
if (format_len > 0) { \
79+
if (*format != ch) { \
80+
goto parse_error; \
81+
} \
82+
++format; \
83+
--format_len; \
84+
} \
85+
} \
86+
6987
int parse_iso_8601_datetime(const char *str, int len, int want_exc,
7088
npy_datetimestruct *out,
7189
NPY_DATETIMEUNIT *out_bestunit,
72-
int *out_local, int *out_tzoffset) {
90+
int *out_local, int *out_tzoffset,
91+
const char* format, int format_len, int exact) {
7392
int year_leap = 0;
7493
int i, numdigits;
7594
const char *substr;
@@ -104,14 +123,19 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
104123
while (sublen > 0 && isspace(*substr)) {
105124
++substr;
106125
--sublen;
126+
FORMAT_STARTSWITH(' ');
107127
}
108128

109129
/* Leading '-' sign for negative year */
110130
if (*substr == '-') {
111131
++substr;
112132
--sublen;
133+
FORMAT_STARTSWITH('-');
113134
}
114135

136+
FORMAT_STARTSWITH('%');
137+
FORMAT_STARTSWITH('Y');
138+
115139
if (sublen == 0) {
116140
goto parse_error;
117141
}
@@ -139,6 +163,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
139163
if (out_local != NULL) {
140164
*out_local = 0;
141165
}
166+
if (format_len) {
167+
goto parse_error;
168+
}
142169
bestunit = NPY_FR_Y;
143170
goto finish;
144171
}
@@ -156,6 +183,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
156183
ymd_sep = valid_ymd_sep[i];
157184
++substr;
158185
--sublen;
186+
FORMAT_STARTSWITH(ymd_sep);
159187
/* Cannot have trailing separator */
160188
if (sublen == 0 || !isdigit(*substr)) {
161189
goto parse_error;
@@ -167,6 +195,8 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
167195
out->month = (*substr - '0');
168196
++substr;
169197
--sublen;
198+
FORMAT_STARTSWITH('%');
199+
FORMAT_STARTSWITH('m');
170200
/* Second digit optional if there was a separator */
171201
if (isdigit(*substr)) {
172202
out->month = 10 * out->month + (*substr - '0');
@@ -190,6 +220,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
190220
if (!has_ymd_sep) {
191221
goto parse_error;
192222
}
223+
if (format_len) {
224+
goto parse_error;
225+
}
193226
if (out_local != NULL) {
194227
*out_local = 0;
195228
}
@@ -203,6 +236,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
203236
}
204237
++substr;
205238
--sublen;
239+
FORMAT_STARTSWITH(ymd_sep);
206240
}
207241

208242
/* PARSE THE DAY */
@@ -213,6 +247,8 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
213247
out->day = (*substr - '0');
214248
++substr;
215249
--sublen;
250+
FORMAT_STARTSWITH('%');
251+
FORMAT_STARTSWITH('d');
216252
/* Second digit optional if there was a separator */
217253
if (isdigit(*substr)) {
218254
out->day = 10 * out->day + (*substr - '0');
@@ -235,13 +271,17 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
235271
if (out_local != NULL) {
236272
*out_local = 0;
237273
}
274+
if (format_len) {
275+
goto parse_error;
276+
}
238277
bestunit = NPY_FR_D;
239278
goto finish;
240279
}
241280

242281
if ((*substr != 'T' && *substr != ' ') || sublen == 1) {
243282
goto parse_error;
244283
}
284+
FORMAT_STARTSWITH(*substr);
245285
++substr;
246286
--sublen;
247287

@@ -250,6 +290,8 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
250290
if (!isdigit(*substr)) {
251291
goto parse_error;
252292
}
293+
FORMAT_STARTSWITH('%');
294+
FORMAT_STARTSWITH('H');
253295
out->hour = (*substr - '0');
254296
++substr;
255297
--sublen;
@@ -274,6 +316,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
274316
if (!hour_was_2_digits) {
275317
goto parse_error;
276318
}
319+
if (format_len) {
320+
goto parse_error;
321+
}
277322
bestunit = NPY_FR_h;
278323
goto finish;
279324
}
@@ -286,6 +331,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
286331
if (sublen == 0 || !isdigit(*substr)) {
287332
goto parse_error;
288333
}
334+
FORMAT_STARTSWITH(':');
289335
} else if (!isdigit(*substr)) {
290336
if (!hour_was_2_digits) {
291337
goto parse_error;
@@ -298,6 +344,8 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
298344
out->min = (*substr - '0');
299345
++substr;
300346
--sublen;
347+
FORMAT_STARTSWITH('%');
348+
FORMAT_STARTSWITH('M');
301349
/* Second digit optional if there was a separator */
302350
if (isdigit(*substr)) {
303351
out->min = 10 * out->min + (*substr - '0');
@@ -317,12 +365,16 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
317365

318366
if (sublen == 0) {
319367
bestunit = NPY_FR_m;
368+
if (format_len) {
369+
goto parse_error;
370+
}
320371
goto finish;
321372
}
322373

323374
/* If we make it through this condition block, then the next
324375
* character is a digit. */
325376
if (has_hms_sep && *substr == ':') {
377+
FORMAT_STARTSWITH(':');
326378
++substr;
327379
--sublen;
328380
/* Cannot have a trailing ':' */
@@ -339,6 +391,8 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
339391
out->sec = (*substr - '0');
340392
++substr;
341393
--sublen;
394+
FORMAT_STARTSWITH('%');
395+
FORMAT_STARTSWITH('S');
342396
/* Second digit optional if there was a separator */
343397
if (isdigit(*substr)) {
344398
out->sec = 10 * out->sec + (*substr - '0');
@@ -360,12 +414,15 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
360414
if (sublen > 0 && *substr == '.') {
361415
++substr;
362416
--sublen;
417+
FORMAT_STARTSWITH('.');
363418
} else {
364419
bestunit = NPY_FR_s;
365420
goto parse_timezone;
366421
}
367422

368423
/* PARSE THE MICROSECONDS (0 to 6 digits) */
424+
FORMAT_STARTSWITH('%');
425+
FORMAT_STARTSWITH('f');
369426
numdigits = 0;
370427
for (i = 0; i < 6; ++i) {
371428
out->us *= 10;
@@ -430,15 +487,22 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
430487
while (sublen > 0 && isspace(*substr)) {
431488
++substr;
432489
--sublen;
490+
FORMAT_STARTSWITH(' ');
433491
}
434492

435493
if (sublen == 0) {
436494
// Unlike NumPy, treating no time zone as naive
495+
if (format_len > 0) {
496+
goto parse_error;
497+
}
437498
goto finish;
438499
}
439500

440501
/* UTC specifier */
441502
if (*substr == 'Z') {
503+
FORMAT_STARTSWITH('%');
504+
FORMAT_STARTSWITH('Z');
505+
442506
/* "Z" should be equivalent to tz offset "+00:00" */
443507
if (out_local != NULL) {
444508
*out_local = 1;
@@ -449,12 +513,17 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
449513
}
450514

451515
if (sublen == 1) {
516+
if (format_len > 0) {
517+
goto parse_error;
518+
}
452519
goto finish;
453520
} else {
454521
++substr;
455522
--sublen;
456523
}
457524
} else if (*substr == '-' || *substr == '+') {
525+
FORMAT_STARTSWITH('%');
526+
FORMAT_STARTSWITH('z');
458527
/* Time zone offset */
459528
int offset_neg = 0, offset_hour = 0, offset_minute = 0;
460529

@@ -538,9 +607,10 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
538607
while (sublen > 0 && isspace(*substr)) {
539608
++substr;
540609
--sublen;
610+
FORMAT_STARTSWITH(' ');
541611
}
542612

543-
if (sublen != 0) {
613+
if ((sublen != 0) || (format_len != 0)) {
544614
goto parse_error;
545615
}
546616

pandas/_libs/tslibs/src/datetime/np_datetime_strings.h

+4-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,10 @@ parse_iso_8601_datetime(const char *str, int len, int want_exc,
5858
npy_datetimestruct *out,
5959
NPY_DATETIMEUNIT *out_bestunit,
6060
int *out_local,
61-
int *out_tzoffset);
61+
int *out_tzoffset,
62+
const char* format,
63+
int format_len,
64+
int exact);
6265

6366
/*
6467
* Provides a string length to use for converting datetime

0 commit comments

Comments
 (0)