@@ -355,8 +355,6 @@ convert_datetimestruct_local_to_utc(pandas_datetimestruct *out_dts_utc,
355
355
* + Doesn't handle 24:00:00 as synonym for midnight (00:00:00) tomorrow
356
356
* + Accepts special values "NaT" (not a time), "Today", (current
357
357
* day according to local time) and "Now" (current time in UTC).
358
- * + ':' separator between hours, minutes, and seconds is optional. When
359
- * omitted, each component must be 2 digits if it appears. (GH-10041)
360
358
*
361
359
* 'str' must be a NULL-terminated string, and 'len' must be its length.
362
360
* 'unit' should contain -1 if the unit is unknown, or the unit
@@ -396,21 +394,15 @@ parse_iso_8601_datetime(char *str, int len,
396
394
char * substr , sublen ;
397
395
PANDAS_DATETIMEUNIT bestunit ;
398
396
399
- /* If year-month-day are separated by a valid separator,
400
- * months/days without leading zeroes will be parsed
397
+ /* if date components in are separated by one of valid separators
398
+ * months/days without leadings 0s will be parsed
401
399
* (though not iso8601). If the components aren't separated,
402
- * 4 (YYYY) or 8 (YYYYMMDD) digits are expected. 6 digits are
403
- * forbidden here (but parsed as YYMMDD elsewhere).
400
+ * an error code will be retuned because the date is ambigous
404
401
*/
405
- int has_ymd_sep = 0 ;
406
- char ymd_sep = '\0' ;
407
- char valid_ymd_sep [] = {'-' , '.' , '/' , '\\' , ' ' };
408
- int valid_ymd_sep_len = sizeof (valid_ymd_sep );
409
-
410
- /* hour-minute-second may or may not separated by ':'. If not, then
411
- * each component must be 2 digits. */
412
- int has_hms_sep = 0 ;
413
- int hour_was_2_digits = 0 ;
402
+ int has_sep = 0 ;
403
+ char sep = '\0' ;
404
+ char valid_sep [] = {'-' , '.' , '/' , '\\' , ' ' };
405
+ int valid_sep_len = 5 ;
414
406
415
407
/* Initialize the output to all zeros */
416
408
memset (out , 0 , sizeof (pandas_datetimestruct ));
@@ -558,58 +550,67 @@ parse_iso_8601_datetime(char *str, int len,
558
550
/* Check whether it's a leap-year */
559
551
year_leap = is_leapyear (out -> year );
560
552
561
- /* Next character must be a separator, start of month, or end of string */
553
+ /* Next character must be a separator, start of month or end */
562
554
if (sublen == 0 ) {
563
555
if (out_local != NULL ) {
564
556
* out_local = 0 ;
565
557
}
566
558
bestunit = PANDAS_FR_Y ;
567
559
goto finish ;
568
560
}
569
-
570
- if (!isdigit (* substr )) {
571
- for (i = 0 ; i < valid_ymd_sep_len ; ++ i ) {
572
- if (* substr == valid_ymd_sep [i ]) {
561
+ else if (!isdigit (* substr )) {
562
+ for (i = 0 ; i < valid_sep_len ; ++ i ) {
563
+ if (* substr == valid_sep [i ]) {
564
+ has_sep = 1 ;
565
+ sep = valid_sep [i ];
566
+ ++ substr ;
567
+ -- sublen ;
573
568
break ;
574
569
}
575
570
}
576
- if (i == valid_ymd_sep_len ) {
577
- goto parse_error ;
578
- }
579
- has_ymd_sep = 1 ;
580
- ymd_sep = valid_ymd_sep [i ];
581
- ++ substr ;
582
- -- sublen ;
583
- /* Cannot have trailing separator */
584
- if (sublen == 0 || !isdigit (* substr )) {
571
+ if (i == valid_sep_len ) {
585
572
goto parse_error ;
586
573
}
587
574
}
588
575
589
- /* PARSE THE MONTH */
590
- /* First digit required */
591
- out -> month = (* substr - '0' );
592
- ++ substr ;
593
- -- sublen ;
594
- /* Second digit optional if there was a separator */
595
- if (isdigit (* substr )) {
596
- out -> month = 10 * out -> month + (* substr - '0' );
576
+ /* Can't have a trailing sep */
577
+ if (sublen == 0 ) {
578
+ goto parse_error ;
579
+ }
580
+
581
+
582
+ /* PARSE THE MONTH (2 digits) */
583
+ if (has_sep && ((sublen >= 2 && isdigit (substr [0 ]) && !isdigit (substr [1 ]))
584
+ || (sublen == 1 && isdigit (substr [0 ])))) {
585
+ out -> month = (substr [0 ] - '0' );
586
+
587
+ if (out -> month < 1 ) {
588
+ PyErr_Format (PyExc_ValueError ,
589
+ "Month out of range in datetime string \"%s\"" , str );
590
+ goto error ;
591
+ }
597
592
++ substr ;
598
593
-- sublen ;
599
594
}
600
- else if (!has_ymd_sep ) {
601
- goto parse_error ;
595
+ else if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
596
+ out -> month = 10 * (substr [0 ] - '0' ) + (substr [1 ] - '0' );
597
+
598
+ if (out -> month < 1 || out -> month > 12 ) {
599
+ PyErr_Format (PyExc_ValueError ,
600
+ "Month out of range in datetime string \"%s\"" , str );
601
+ goto error ;
602
+ }
603
+ substr += 2 ;
604
+ sublen -= 2 ;
602
605
}
603
- if (out -> month < 1 || out -> month > 12 ) {
604
- PyErr_Format (PyExc_ValueError ,
605
- "Month out of range in datetime string \"%s\"" , str );
606
- goto error ;
606
+ else {
607
+ goto parse_error ;
607
608
}
608
609
609
- /* Next character must be the separator, start of day, or end of string */
610
+ /* Next character must be a '-' or the end of the string */
610
611
if (sublen == 0 ) {
611
- /* Forbid YYYYMM. Parsed instead as YYMMDD by someone else. */
612
- if (!has_ymd_sep ) {
612
+ /* dates of form YYYYMM are not valid */
613
+ if (!has_sep ) {
613
614
goto parse_error ;
614
615
}
615
616
if (out_local != NULL ) {
@@ -618,39 +619,46 @@ parse_iso_8601_datetime(char *str, int len,
618
619
bestunit = PANDAS_FR_M ;
619
620
goto finish ;
620
621
}
621
-
622
- if (has_ymd_sep ) {
623
- /* Must have separator, but cannot be trailing */
624
- if (* substr != ymd_sep || sublen == 1 ) {
625
- goto parse_error ;
626
- }
622
+ else if (has_sep && * substr == sep ) {
627
623
++ substr ;
628
624
-- sublen ;
629
625
}
626
+ else if (!isdigit (* substr )) {
627
+ goto parse_error ;
628
+ }
630
629
631
- /* PARSE THE DAY */
632
- /* First digit required */
633
- if (!isdigit (* substr )) {
634
- goto parse_error ;
630
+ /* Can't have a trailing '-' */
631
+ if (sublen == 0 ) {
632
+ goto parse_error ;
635
633
}
636
- out -> day = (* substr - '0' );
637
- ++ substr ;
638
- -- sublen ;
639
- /* Second digit optional if there was a separator */
640
- if (isdigit (* substr )) {
641
- out -> day = 10 * out -> day + (* substr - '0' );
634
+
635
+ /* PARSE THE DAY (2 digits) */
636
+ if (has_sep && ((sublen >= 2 && isdigit (substr [0 ]) && !isdigit (substr [1 ]))
637
+ || (sublen == 1 && isdigit (substr [0 ])))) {
638
+ out -> day = (substr [0 ] - '0' );
639
+
640
+ if (out -> day < 1 ) {
641
+ PyErr_Format (PyExc_ValueError ,
642
+ "Day out of range in datetime string \"%s\"" , str );
643
+ goto error ;
644
+ }
642
645
++ substr ;
643
646
-- sublen ;
644
647
}
645
- else if (!has_ymd_sep ) {
646
- goto parse_error ;
648
+ else if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
649
+ out -> day = 10 * (substr [0 ] - '0' ) + (substr [1 ] - '0' );
650
+
651
+ if (out -> day < 1 ||
652
+ out -> day > days_per_month_table [year_leap ][out -> month - 1 ]) {
653
+ PyErr_Format (PyExc_ValueError ,
654
+ "Day out of range in datetime string \"%s\"" , str );
655
+ goto error ;
656
+ }
657
+ substr += 2 ;
658
+ sublen -= 2 ;
647
659
}
648
- if (out -> day < 1 ||
649
- out -> day > days_per_month_table [year_leap ][out -> month - 1 ])
650
- {
651
- PyErr_Format (PyExc_ValueError ,
652
- "Day out of range in datetime string \"%s\"" , str );
653
- goto error ;
660
+ else {
661
+ goto parse_error ;
654
662
}
655
663
656
664
/* Next character must be a 'T', ' ', or end of string */
@@ -661,119 +669,104 @@ parse_iso_8601_datetime(char *str, int len,
661
669
bestunit = PANDAS_FR_D ;
662
670
goto finish ;
663
671
}
664
-
665
- if ((* substr != 'T' && * substr != ' ' ) || sublen == 1 ) {
666
- goto parse_error ;
667
- }
668
- ++ substr ;
669
- -- sublen ;
670
-
671
- /* PARSE THE HOURS */
672
- /* First digit required */
673
- if (!isdigit (* substr )) {
672
+ else if (* substr != 'T' && * substr != ' ' ) {
674
673
goto parse_error ;
675
674
}
676
- out -> hour = (* substr - '0' );
677
- ++ substr ;
678
- -- sublen ;
679
- /* Second digit optional */
680
- if (isdigit (* substr )) {
681
- hour_was_2_digits = 1 ;
682
- out -> hour = 10 * out -> hour + (* substr - '0' );
675
+ else {
683
676
++ substr ;
684
677
-- sublen ;
678
+ }
679
+
680
+ /* PARSE THE HOURS (2 digits) */
681
+ if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
682
+ out -> hour = 10 * (substr [0 ] - '0' ) + (substr [1 ] - '0' );
683
+
685
684
if (out -> hour >= 24 ) {
686
685
PyErr_Format (PyExc_ValueError ,
687
686
"Hours out of range in datetime string \"%s\"" , str );
688
687
goto error ;
689
688
}
689
+ substr += 2 ;
690
+ sublen -= 2 ;
690
691
}
691
-
692
- /* Next character must be a ':' or the end of the string */
693
- if (sublen == 0 ) {
694
- if (!hour_was_2_digits ) {
695
- goto parse_error ;
696
- }
697
- bestunit = PANDAS_FR_h ;
698
- goto finish ;
692
+ else if (sublen >= 1 && isdigit (substr [0 ])) {
693
+ out -> hour = substr [0 ] - '0' ;
694
+ ++ substr ;
695
+ -- sublen ;
696
+ }
697
+ else {
698
+ goto parse_error ;
699
699
}
700
700
701
- if ( * substr == ':' ) {
702
- has_hms_sep = 1 ;
701
+ /* Next character must be a ':' or the end of the string */
702
+ if ( sublen > 0 && * substr == ':' ) {
703
703
++ substr ;
704
704
-- sublen ;
705
- /* Cannot have a trailing separator */
706
- if (sublen == 0 || !isdigit (* substr )) {
707
- goto parse_error ;
708
- }
709
705
}
710
- else if (!isdigit (* substr )) {
711
- if (!hour_was_2_digits ) {
712
- goto parse_error ;
713
- }
706
+ else {
714
707
bestunit = PANDAS_FR_h ;
715
708
goto parse_timezone ;
716
709
}
717
710
718
- /* PARSE THE MINUTES */
719
- /* First digit required */
720
- out -> min = (* substr - '0' );
721
- ++ substr ;
722
- -- sublen ;
723
- /* Second digit optional if there was a separator */
724
- if (isdigit (* substr )) {
725
- out -> min = 10 * out -> min + (* substr - '0' );
726
- ++ substr ;
727
- -- sublen ;
711
+ /* Can't have a trailing ':' */
712
+ if (sublen == 0 ) {
713
+ goto parse_error ;
714
+ }
715
+
716
+ /* PARSE THE MINUTES (2 digits) */
717
+ if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
718
+ out -> min = 10 * (substr [0 ] - '0' ) + (substr [1 ] - '0' );
719
+
728
720
if (out -> min >= 60 ) {
729
721
PyErr_Format (PyExc_ValueError ,
730
- "Minutes out of range in datetime string \"%s\"" , str );
722
+ "Minutes out of range in datetime string \"%s\"" , str );
731
723
goto error ;
732
724
}
725
+ substr += 2 ;
726
+ sublen -= 2 ;
733
727
}
734
- else if (!has_hms_sep ) {
735
- goto parse_error ;
728
+ else if (sublen >= 1 && isdigit (substr [0 ])) {
729
+ out -> min = substr [0 ] - '0' ;
730
+ ++ substr ;
731
+ -- sublen ;
736
732
}
737
-
738
- if (sublen == 0 ) {
739
- bestunit = PANDAS_FR_m ;
740
- goto finish ;
733
+ else {
734
+ goto parse_error ;
741
735
}
742
736
743
- /* If we make it through this condition block, then the next
744
- * character is a digit. */
745
- if (has_hms_sep && * substr == ':' ) {
737
+ /* Next character must be a ':' or the end of the string */
738
+ if (sublen > 0 && * substr == ':' ) {
746
739
++ substr ;
747
740
-- sublen ;
748
- /* Cannot have a trailing ':' */
749
- if (sublen == 0 || !isdigit (* substr )) {
750
- goto parse_error ;
751
- }
752
- }
753
- else if (!has_hms_sep && isdigit (* substr )) {
754
741
}
755
742
else {
756
743
bestunit = PANDAS_FR_m ;
757
744
goto parse_timezone ;
758
745
}
759
746
760
- /* PARSE THE SECONDS */
761
- /* First digit required */
762
- out -> sec = (* substr - '0' );
763
- ++ substr ;
764
- -- sublen ;
765
- /* Second digit optional if there was a separator */
766
- if (isdigit (* substr )) {
767
- out -> sec = 10 * out -> sec + (* substr - '0' );
768
- ++ substr ;
769
- -- sublen ;
747
+ /* Can't have a trailing ':' */
748
+ if (sublen == 0 ) {
749
+ goto parse_error ;
750
+ }
751
+
752
+ /* PARSE THE SECONDS (2 digits) */
753
+ if (sublen >= 2 && isdigit (substr [0 ]) && isdigit (substr [1 ])) {
754
+ out -> sec = 10 * (substr [0 ] - '0' ) + (substr [1 ] - '0' );
755
+
770
756
if (out -> sec >= 60 ) {
771
757
PyErr_Format (PyExc_ValueError ,
772
- "Seconds out of range in datetime string \"%s\"" , str );
758
+ "Seconds out of range in datetime string \"%s\"" , str );
773
759
goto error ;
774
760
}
761
+ substr += 2 ;
762
+ sublen -= 2 ;
763
+ }
764
+ else if (sublen >= 1 && isdigit (substr [0 ])) {
765
+ out -> sec = substr [0 ] - '0' ;
766
+ ++ substr ;
767
+ -- sublen ;
775
768
}
776
- else if (! has_hms_sep ) {
769
+ else {
777
770
goto parse_error ;
778
771
}
779
772
0 commit comments