@@ -66,10 +66,45 @@ This file implements string parsing and creation for NumPy datetime.
66
66
*
67
67
* Returns 0 on success, -1 on failure.
68
68
*/
69
+
70
+ // This function will advance the pointer on format
71
+ // and decrement characters_remaining by n on success
72
+ // On failure will return -1 without incrementing
73
+ static int compare_format (const char * * format , int * characters_remaining ,
74
+ const char * compare_to , int n , const int exact ) {
75
+ if (* characters_remaining < n ) {
76
+ if (exact ) {
77
+ // TODO(pandas-dev): in the future we should set a PyErr here
78
+ // to be very clear about what went wrong
79
+ return -1 ;
80
+ } else if (* characters_remaining ) {
81
+ // TODO(pandas-dev): same return value in this function as
82
+ // above branch, but stub out a future where
83
+ // we have a better error message
84
+ return -1 ;
85
+ } else {
86
+ return 0 ;
87
+ }
88
+ } else {
89
+ if (strncmp (* format , compare_to , n )) {
90
+ // TODO(pandas-dev): PyErr to differentiate what went wrong
91
+ return -1 ;
92
+ } else {
93
+ * format += n ;
94
+ * characters_remaining -= n ;
95
+ return 0 ;
96
+ }
97
+ }
98
+ return 0 ;
99
+ }
100
+
69
101
int parse_iso_8601_datetime (const char * str , int len , int want_exc ,
70
102
npy_datetimestruct * out ,
71
103
NPY_DATETIMEUNIT * out_bestunit ,
72
- int * out_local , int * out_tzoffset ) {
104
+ int * out_local , int * out_tzoffset ,
105
+ const char * format , int format_len , int exact ) {
106
+ if (len < 0 || format_len < 0 )
107
+ goto parse_error ;
73
108
int year_leap = 0 ;
74
109
int i , numdigits ;
75
110
const char * substr ;
@@ -104,6 +139,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
104
139
while (sublen > 0 && isspace (* substr )) {
105
140
++ substr ;
106
141
-- sublen ;
142
+ if (compare_format (& format , & format_len , " " , 1 , exact )) {
143
+ goto parse_error ;
144
+ }
107
145
}
108
146
109
147
/* Leading '-' sign for negative year */
@@ -117,6 +155,10 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
117
155
}
118
156
119
157
/* PARSE THE YEAR (4 digits) */
158
+ if (compare_format (& format , & format_len , "%Y" , 2 , exact )) {
159
+ goto parse_error ;
160
+ }
161
+
120
162
out -> year = 0 ;
121
163
if (sublen >= 4 && isdigit (substr [0 ]) && isdigit (substr [1 ]) &&
122
164
isdigit (substr [2 ]) && isdigit (substr [3 ])) {
@@ -139,6 +181,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
139
181
if (out_local != NULL ) {
140
182
* out_local = 0 ;
141
183
}
184
+ if (format_len ) {
185
+ goto parse_error ;
186
+ }
142
187
bestunit = NPY_FR_Y ;
143
188
goto finish ;
144
189
}
@@ -156,13 +201,20 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
156
201
ymd_sep = valid_ymd_sep [i ];
157
202
++ substr ;
158
203
-- sublen ;
204
+
205
+ if (compare_format (& format , & format_len , & ymd_sep , 1 , exact )) {
206
+ goto parse_error ;
207
+ }
159
208
/* Cannot have trailing separator */
160
209
if (sublen == 0 || !isdigit (* substr )) {
161
210
goto parse_error ;
162
211
}
163
212
}
164
213
165
214
/* PARSE THE MONTH */
215
+ if (compare_format (& format , & format_len , "%m" , 2 , exact )) {
216
+ goto parse_error ;
217
+ }
166
218
/* First digit required */
167
219
out -> month = (* substr - '0' );
168
220
++ substr ;
@@ -190,6 +242,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
190
242
if (!has_ymd_sep ) {
191
243
goto parse_error ;
192
244
}
245
+ if (format_len ) {
246
+ goto parse_error ;
247
+ }
193
248
if (out_local != NULL ) {
194
249
* out_local = 0 ;
195
250
}
@@ -203,9 +258,15 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
203
258
}
204
259
++ substr ;
205
260
-- sublen ;
261
+ if (compare_format (& format , & format_len , & ymd_sep , 1 , exact )) {
262
+ goto parse_error ;
263
+ }
206
264
}
207
265
208
266
/* PARSE THE DAY */
267
+ if (compare_format (& format , & format_len , "%d" , 2 , exact )) {
268
+ goto parse_error ;
269
+ }
209
270
/* First digit required */
210
271
if (!isdigit (* substr )) {
211
272
goto parse_error ;
@@ -235,17 +296,26 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
235
296
if (out_local != NULL ) {
236
297
* out_local = 0 ;
237
298
}
299
+ if (format_len ) {
300
+ goto parse_error ;
301
+ }
238
302
bestunit = NPY_FR_D ;
239
303
goto finish ;
240
304
}
241
305
242
306
if ((* substr != 'T' && * substr != ' ' ) || sublen == 1 ) {
243
307
goto parse_error ;
244
308
}
309
+ if (compare_format (& format , & format_len , substr , 1 , exact )) {
310
+ goto parse_error ;
311
+ }
245
312
++ substr ;
246
313
-- sublen ;
247
314
248
315
/* PARSE THE HOURS */
316
+ if (compare_format (& format , & format_len , "%H" , 2 , exact )) {
317
+ goto parse_error ;
318
+ }
249
319
/* First digit required */
250
320
if (!isdigit (* substr )) {
251
321
goto parse_error ;
@@ -274,6 +344,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
274
344
if (!hour_was_2_digits ) {
275
345
goto parse_error ;
276
346
}
347
+ if (format_len ) {
348
+ goto parse_error ;
349
+ }
277
350
bestunit = NPY_FR_h ;
278
351
goto finish ;
279
352
}
@@ -286,6 +359,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
286
359
if (sublen == 0 || !isdigit (* substr )) {
287
360
goto parse_error ;
288
361
}
362
+ if (compare_format (& format , & format_len , ":" , 1 , exact )) {
363
+ goto parse_error ;
364
+ }
289
365
} else if (!isdigit (* substr )) {
290
366
if (!hour_was_2_digits ) {
291
367
goto parse_error ;
@@ -294,6 +370,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
294
370
}
295
371
296
372
/* PARSE THE MINUTES */
373
+ if (compare_format (& format , & format_len , "%M" , 2 , exact )) {
374
+ goto parse_error ;
375
+ }
297
376
/* First digit required */
298
377
out -> min = (* substr - '0' );
299
378
++ substr ;
@@ -317,12 +396,18 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
317
396
318
397
if (sublen == 0 ) {
319
398
bestunit = NPY_FR_m ;
399
+ if (format_len ) {
400
+ goto parse_error ;
401
+ }
320
402
goto finish ;
321
403
}
322
404
323
405
/* If we make it through this condition block, then the next
324
406
* character is a digit. */
325
407
if (has_hms_sep && * substr == ':' ) {
408
+ if (compare_format (& format , & format_len , ":" , 1 , exact )) {
409
+ goto parse_error ;
410
+ }
326
411
++ substr ;
327
412
-- sublen ;
328
413
/* Cannot have a trailing ':' */
@@ -335,6 +420,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
335
420
}
336
421
337
422
/* PARSE THE SECONDS */
423
+ if (compare_format (& format , & format_len , "%S" , 2 , exact )) {
424
+ goto parse_error ;
425
+ }
338
426
/* First digit required */
339
427
out -> sec = (* substr - '0' );
340
428
++ substr ;
@@ -360,12 +448,18 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
360
448
if (sublen > 0 && * substr == '.' ) {
361
449
++ substr ;
362
450
-- sublen ;
451
+ if (compare_format (& format , & format_len , "." , 1 , exact )) {
452
+ goto parse_error ;
453
+ }
363
454
} else {
364
455
bestunit = NPY_FR_s ;
365
456
goto parse_timezone ;
366
457
}
367
458
368
459
/* PARSE THE MICROSECONDS (0 to 6 digits) */
460
+ if (compare_format (& format , & format_len , "%f" , 2 , exact )) {
461
+ goto parse_error ;
462
+ }
369
463
numdigits = 0 ;
370
464
for (i = 0 ; i < 6 ; ++ i ) {
371
465
out -> us *= 10 ;
@@ -430,15 +524,24 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
430
524
while (sublen > 0 && isspace (* substr )) {
431
525
++ substr ;
432
526
-- sublen ;
527
+ if (compare_format (& format , & format_len , " " , 1 , exact )) {
528
+ goto parse_error ;
529
+ }
433
530
}
434
531
435
532
if (sublen == 0 ) {
436
533
// Unlike NumPy, treating no time zone as naive
534
+ if (format_len > 0 ) {
535
+ goto parse_error ;
536
+ }
437
537
goto finish ;
438
538
}
439
539
440
540
/* UTC specifier */
441
541
if (* substr == 'Z' ) {
542
+ if (compare_format (& format , & format_len , "%Z" , 2 , exact )) {
543
+ goto parse_error ;
544
+ }
442
545
/* "Z" should be equivalent to tz offset "+00:00" */
443
546
if (out_local != NULL ) {
444
547
* out_local = 1 ;
@@ -449,12 +552,18 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
449
552
}
450
553
451
554
if (sublen == 1 ) {
555
+ if (format_len > 0 ) {
556
+ goto parse_error ;
557
+ }
452
558
goto finish ;
453
559
} else {
454
560
++ substr ;
455
561
-- sublen ;
456
562
}
457
563
} else if (* substr == '-' || * substr == '+' ) {
564
+ if (compare_format (& format , & format_len , "%z" , 2 , exact )) {
565
+ goto parse_error ;
566
+ }
458
567
/* Time zone offset */
459
568
int offset_neg = 0 , offset_hour = 0 , offset_minute = 0 ;
460
569
@@ -538,9 +647,12 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
538
647
while (sublen > 0 && isspace (* substr )) {
539
648
++ substr ;
540
649
-- sublen ;
650
+ if (compare_format (& format , & format_len , " " , 1 , exact )) {
651
+ goto parse_error ;
652
+ }
541
653
}
542
654
543
- if (sublen != 0 ) {
655
+ if (( sublen != 0 ) || ( format_len != 0 ) ) {
544
656
goto parse_error ;
545
657
}
546
658
0 commit comments