@@ -355,6 +355,8 @@ convert_datetimestruct_local_to_utc(pandas_datetimestruct *out_dts_utc,
355
355
* + Doesn't handle 24:00:00 as synonym for midnight (00:00:00) tomorrow
356
356
* + Accepts special values "NaT" (not a time), "Today", (current
357
357
* day according to local time) and "Now" (current time in UTC).
358
+ * + ':' separator between hours, minutes, and seconds is optional. When
359
+ * omitted, each component must be 2 digits if it appears. (GH-10041)
358
360
*
359
361
* 'str' must be a NULL-terminated string, and 'len' must be its length.
360
362
* 'unit' should contain -1 if the unit is unknown, or the unit
@@ -394,15 +396,21 @@ parse_iso_8601_datetime(char *str, int len,
394
396
char * substr , sublen ;
395
397
PANDAS_DATETIMEUNIT bestunit ;
396
398
397
- /* if date components in are separated by one of valid separators
398
- * months/days without leadings 0s will be parsed
399
+ /* If year-month-day are separated by a valid separator,
400
+ * months/days without leading zeroes will be parsed
399
401
* (though not iso8601). If the components aren't separated,
400
- * an error code will be retuned because the date is ambigous
402
+ * 4 (YYYY) or 8 (YYYYMMDD) digits are expected. 6 digits are
403
+ * forbidden here (but parsed as YYMMDD elsewhere).
401
404
*/
402
- int has_sep = 0 ;
403
- char sep = '\0' ;
404
- char valid_sep [] = {'-' , '.' , '/' , '\\' , ' ' };
405
- int valid_sep_len = 5 ;
405
+ int has_ymd_sep = 0 ;
406
+ char ymd_sep = '\0' ;
407
+ char valid_ymd_sep [] = {'-' , '.' , '/' , '\\' , ' ' };
408
+ int valid_ymd_sep_len = sizeof (valid_ymd_sep );
409
+
410
+ /* hour-minute-second may or may not separated by ':'. If not, then
411
+ * each component must be 2 digits. */
412
+ int has_hms_sep = 0 ;
413
+ int hour_was_2_digits = 0 ;
406
414
407
415
/* Initialize the output to all zeros */
408
416
memset (out , 0 , sizeof (pandas_datetimestruct ));
@@ -550,67 +558,58 @@ parse_iso_8601_datetime(char *str, int len,
550
558
/* Check whether it's a leap-year */
551
559
year_leap = is_leapyear (out -> year );
552
560
553
- /* Next character must be a separator, start of month or end */
561
+ /* Next character must be a separator, start of month, or end of string */
554
562
if (sublen == 0 ) {
555
563
if (out_local != NULL ) {
556
564
* out_local = 0 ;
557
565
}
558
566
bestunit = PANDAS_FR_Y ;
559
567
goto finish ;
560
568
}
561
- else if (!isdigit (* substr )) {
562
- for (i = 0 ; i < valid_sep_len ; ++ i ) {
563
- if (* substr == valid_sep [i ]) {
564
- has_sep = 1 ;
565
- sep = valid_sep [i ];
566
- ++ substr ;
567
- -- sublen ;
569
+
570
+ if (!isdigit (* substr )) {
571
+ for (i = 0 ; i < valid_ymd_sep_len ; ++ i ) {
572
+ if (* substr == valid_ymd_sep [i ]) {
568
573
break ;
569
574
}
570
575
}
571
- if (i == valid_sep_len ) {
576
+ if (i == valid_ymd_sep_len ) {
572
577
goto parse_error ;
573
578
}
574
- }
575
-
576
- /* Can't have a trailing sep */
577
- if (sublen == 0 ) {
578
- goto parse_error ;
579
- }
580
-
581
-
582
- /* PARSE THE MONTH (2 digits) */
583
- if (has_sep && ((sublen >= 2 && isdigit (substr [0 ]) && !isdigit (substr [1 ]))
584
- || (sublen == 1 && isdigit (substr [0 ])))) {
585
- out -> month = (substr [0 ] - '0' );
586
-
587
- if (out -> month < 1 ) {
588
- PyErr_Format (PyExc_ValueError ,
589
- "Month out of range in datetime string \"%s\"" , str );
590
- goto error ;
591
- }
579
+ has_ymd_sep = 1 ;
580
+ ymd_sep = valid_ymd_sep [i ];
592
581
++ substr ;
593
582
-- sublen ;
583
+ /* Cannot have trailing separator */
584
+ if (sublen == 0 || !isdigit (* substr )) {
585
+ goto parse_error ;
586
+ }
594
587
}
595
- else if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
596
- out -> month = 10 * (substr [0 ] - '0' ) + (substr [1 ] - '0' );
597
588
598
- if (out -> month < 1 || out -> month > 12 ) {
599
- PyErr_Format (PyExc_ValueError ,
600
- "Month out of range in datetime string \"%s\"" , str );
601
- goto error ;
602
- }
603
- substr += 2 ;
604
- sublen -= 2 ;
589
+ /* PARSE THE MONTH */
590
+ /* First digit required */
591
+ out -> month = (* substr - '0' );
592
+ ++ substr ;
593
+ -- sublen ;
594
+ /* Second digit optional if there was a separator */
595
+ if (isdigit (* substr )) {
596
+ out -> month = 10 * out -> month + (* substr - '0' );
597
+ ++ substr ;
598
+ -- sublen ;
605
599
}
606
- else {
600
+ else if (! has_ymd_sep ) {
607
601
goto parse_error ;
608
602
}
603
+ if (out -> month < 1 || out -> month > 12 ) {
604
+ PyErr_Format (PyExc_ValueError ,
605
+ "Month out of range in datetime string \"%s\"" , str );
606
+ goto error ;
607
+ }
609
608
610
- /* Next character must be a '-' or the end of the string */
609
+ /* Next character must be the separator, start of day, or end of string */
611
610
if (sublen == 0 ) {
612
- /* dates of form YYYYMM are not valid */
613
- if (!has_sep ) {
611
+ /* Forbid YYYYMM. Parsed instead as YYMMDD by someone else. */
612
+ if (!has_ymd_sep ) {
614
613
goto parse_error ;
615
614
}
616
615
if (out_local != NULL ) {
@@ -619,47 +618,40 @@ parse_iso_8601_datetime(char *str, int len,
619
618
bestunit = PANDAS_FR_M ;
620
619
goto finish ;
621
620
}
622
- else if (has_sep && * substr == sep ) {
621
+
622
+ if (has_ymd_sep ) {
623
+ /* Must have separator, but cannot be trailing */
624
+ if (* substr != ymd_sep || sublen == 1 ) {
625
+ goto parse_error ;
626
+ }
623
627
++ substr ;
624
628
-- sublen ;
625
629
}
626
- else if (!isdigit (* substr )) {
627
- goto parse_error ;
628
- }
629
630
630
- /* Can't have a trailing '-' */
631
- if (sublen == 0 ) {
632
- goto parse_error ;
631
+ /* PARSE THE DAY */
632
+ /* First digit required */
633
+ if (!isdigit (* substr )) {
634
+ goto parse_error ;
633
635
}
634
-
635
- /* PARSE THE DAY (2 digits) */
636
- if (has_sep && ((sublen >= 2 && isdigit (substr [0 ]) && !isdigit (substr [1 ]))
637
- || (sublen == 1 && isdigit (substr [0 ])))) {
638
- out -> day = (substr [0 ] - '0' );
639
-
640
- if (out -> day < 1 ) {
641
- PyErr_Format (PyExc_ValueError ,
642
- "Day out of range in datetime string \"%s\"" , str );
643
- goto error ;
644
- }
636
+ out -> day = (* substr - '0' );
637
+ ++ substr ;
638
+ -- sublen ;
639
+ /* Second digit optional if there was a separator */
640
+ if (isdigit (* substr )) {
641
+ out -> day = 10 * out -> day + (* substr - '0' );
645
642
++ substr ;
646
643
-- sublen ;
647
644
}
648
- else if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
649
- out -> day = 10 * (substr [0 ] - '0' ) + (substr [1 ] - '0' );
650
-
651
- if (out -> day < 1 ||
652
- out -> day > days_per_month_table [year_leap ][out -> month - 1 ]) {
653
- PyErr_Format (PyExc_ValueError ,
654
- "Day out of range in datetime string \"%s\"" , str );
655
- goto error ;
656
- }
657
- substr += 2 ;
658
- sublen -= 2 ;
659
- }
660
- else {
645
+ else if (!has_ymd_sep ) {
661
646
goto parse_error ;
662
647
}
648
+ if (out -> day < 1 ||
649
+ out -> day > days_per_month_table [year_leap ][out -> month - 1 ])
650
+ {
651
+ PyErr_Format (PyExc_ValueError ,
652
+ "Day out of range in datetime string \"%s\"" , str );
653
+ goto error ;
654
+ }
663
655
664
656
/* Next character must be a 'T', ' ', or end of string */
665
657
if (sublen == 0 ) {
@@ -669,104 +661,119 @@ parse_iso_8601_datetime(char *str, int len,
669
661
bestunit = PANDAS_FR_D ;
670
662
goto finish ;
671
663
}
672
- else if (* substr != 'T' && * substr != ' ' ) {
664
+
665
+ if ((* substr != 'T' && * substr != ' ' ) || sublen == 1 ) {
673
666
goto parse_error ;
674
667
}
675
- else {
668
+ ++ substr ;
669
+ -- sublen ;
670
+
671
+ /* PARSE THE HOURS */
672
+ /* First digit required */
673
+ if (!isdigit (* substr )) {
674
+ goto parse_error ;
675
+ }
676
+ out -> hour = (* substr - '0' );
677
+ ++ substr ;
678
+ -- sublen ;
679
+ /* Second digit optional */
680
+ if (isdigit (* substr )) {
681
+ hour_was_2_digits = 1 ;
682
+ out -> hour = 10 * out -> hour + (* substr - '0' );
676
683
++ substr ;
677
684
-- sublen ;
678
- }
679
-
680
- /* PARSE THE HOURS (2 digits) */
681
- if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
682
- out -> hour = 10 * (substr [0 ] - '0' ) + (substr [1 ] - '0' );
683
-
684
685
if (out -> hour >= 24 ) {
685
686
PyErr_Format (PyExc_ValueError ,
686
687
"Hours out of range in datetime string \"%s\"" , str );
687
688
goto error ;
688
689
}
689
- substr += 2 ;
690
- sublen -= 2 ;
691
- }
692
- else if (sublen >= 1 && isdigit (substr [0 ])) {
693
- out -> hour = substr [0 ] - '0' ;
694
- ++ substr ;
695
- -- sublen ;
696
- }
697
- else {
698
- goto parse_error ;
699
690
}
700
691
701
692
/* Next character must be a ':' or the end of the string */
702
- if (sublen > 0 && * substr == ':' ) {
693
+ if (sublen == 0 ) {
694
+ if (!hour_was_2_digits ) {
695
+ goto parse_error ;
696
+ }
697
+ bestunit = PANDAS_FR_h ;
698
+ goto finish ;
699
+ }
700
+
701
+ if (* substr == ':' ) {
702
+ has_hms_sep = 1 ;
703
703
++ substr ;
704
704
-- sublen ;
705
+ /* Cannot have a trailing separator */
706
+ if (sublen == 0 || !isdigit (* substr )) {
707
+ goto parse_error ;
708
+ }
705
709
}
706
- else {
710
+ else if (!isdigit (* substr )) {
711
+ if (!hour_was_2_digits ) {
712
+ goto parse_error ;
713
+ }
707
714
bestunit = PANDAS_FR_h ;
708
715
goto parse_timezone ;
709
716
}
710
717
711
- /* Can't have a trailing ':' */
712
- if (sublen == 0 ) {
713
- goto parse_error ;
714
- }
715
-
716
- /* PARSE THE MINUTES (2 digits) */
717
- if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
718
- out -> min = 10 * (substr [0 ] - '0' ) + (substr [1 ] - '0' );
719
-
718
+ /* PARSE THE MINUTES */
719
+ /* First digit required */
720
+ out -> min = (* substr - '0' );
721
+ ++ substr ;
722
+ -- sublen ;
723
+ /* Second digit optional if there was a separator */
724
+ if (isdigit (* substr )) {
725
+ out -> min = 10 * out -> min + (* substr - '0' );
726
+ ++ substr ;
727
+ -- sublen ;
720
728
if (out -> min >= 60 ) {
721
729
PyErr_Format (PyExc_ValueError ,
722
- "Minutes out of range in datetime string \"%s\"" , str );
730
+ "Minutes out of range in datetime string \"%s\"" , str );
723
731
goto error ;
724
732
}
725
- substr += 2 ;
726
- sublen -= 2 ;
727
- }
728
- else if (sublen >= 1 && isdigit (substr [0 ])) {
729
- out -> min = substr [0 ] - '0' ;
730
- ++ substr ;
731
- -- sublen ;
732
733
}
733
- else {
734
+ else if (! has_hms_sep ) {
734
735
goto parse_error ;
735
736
}
736
737
737
- /* Next character must be a ':' or the end of the string */
738
- if (sublen > 0 && * substr == ':' ) {
738
+ if (sublen == 0 ) {
739
+ bestunit = PANDAS_FR_m ;
740
+ goto finish ;
741
+ }
742
+
743
+ /* If we make it through this condition block, then the next
744
+ * character is a digit. */
745
+ if (has_hms_sep && * substr == ':' ) {
739
746
++ substr ;
740
747
-- sublen ;
748
+ /* Cannot have a trailing ':' */
749
+ if (sublen == 0 || !isdigit (* substr )) {
750
+ goto parse_error ;
751
+ }
752
+ }
753
+ else if (!has_hms_sep && isdigit (* substr )) {
741
754
}
742
755
else {
743
756
bestunit = PANDAS_FR_m ;
744
757
goto parse_timezone ;
745
758
}
746
759
747
- /* Can't have a trailing ':' */
748
- if (sublen == 0 ) {
749
- goto parse_error ;
750
- }
751
-
752
- /* PARSE THE SECONDS (2 digits) */
753
- if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
754
- out -> sec = 10 * (substr [0 ] - '0' ) + (substr [1 ] - '0' );
755
-
760
+ /* PARSE THE SECONDS */
761
+ /* First digit required */
762
+ out -> sec = (* substr - '0' );
763
+ ++ substr ;
764
+ -- sublen ;
765
+ /* Second digit optional if there was a separator */
766
+ if (isdigit (* substr )) {
767
+ out -> sec = 10 * out -> sec + (* substr - '0' );
768
+ ++ substr ;
769
+ -- sublen ;
756
770
if (out -> sec >= 60 ) {
757
771
PyErr_Format (PyExc_ValueError ,
758
- "Seconds out of range in datetime string \"%s\"" , str );
772
+ "Seconds out of range in datetime string \"%s\"" , str );
759
773
goto error ;
760
774
}
761
- substr += 2 ;
762
- sublen -= 2 ;
763
- }
764
- else if (sublen >= 1 && isdigit (substr [0 ])) {
765
- out -> sec = substr [0 ] - '0' ;
766
- ++ substr ;
767
- -- sublen ;
768
775
}
769
- else {
776
+ else if (! has_hms_sep ) {
770
777
goto parse_error ;
771
778
}
772
779
0 commit comments