Skip to content

Commit 55f21ca

Browse files
committed
Revert "ENH: optional ':' separator in ISO8601 strings"
This reverts commit 60b307f.
1 parent 486fcfa commit 55f21ca

File tree

3 files changed

+140
-168
lines changed

3 files changed

+140
-168
lines changed

doc/source/whatsnew/v0.18.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1169,7 +1169,7 @@ Bug Fixes
11691169
- Bug in ``.to_csv`` ignoring formatting parameters ``decimal``, ``na_rep``, ``float_format`` for float indexes (:issue:`11553`)
11701170
- Bug in ``Int64Index`` and ``Float64Index`` preventing the use of the modulo operator (:issue:`9244`)
11711171
- Bug in ``MultiIndex.drop`` for not lexsorted multi-indexes (:issue:`12078`)
1172-
- Bug in ``Timestamp`` constructor where microsecond resolution was lost if HHMMSS were not separated with ':' (:issue:`10041`)
1172+
11731173
- Bug in ``DataFrame`` when masking an empty ``DataFrame`` (:issue:`11859`)
11741174

11751175

pandas/src/datetime/np_datetime_strings.c

+136-143
Original file line numberDiff line numberDiff line change
@@ -355,8 +355,6 @@ convert_datetimestruct_local_to_utc(pandas_datetimestruct *out_dts_utc,
355355
* + Doesn't handle 24:00:00 as synonym for midnight (00:00:00) tomorrow
356356
* + Accepts special values "NaT" (not a time), "Today", (current
357357
* day according to local time) and "Now" (current time in UTC).
358-
* + ':' separator between hours, minutes, and seconds is optional. When
359-
* omitted, each component must be 2 digits if it appears. (GH-10041)
360358
*
361359
* 'str' must be a NULL-terminated string, and 'len' must be its length.
362360
* 'unit' should contain -1 if the unit is unknown, or the unit
@@ -396,21 +394,15 @@ parse_iso_8601_datetime(char *str, int len,
396394
char *substr, sublen;
397395
PANDAS_DATETIMEUNIT bestunit;
398396

399-
/* If year-month-day are separated by a valid separator,
400-
* months/days without leading zeroes will be parsed
397+
/* if date components in are separated by one of valid separators
398+
* months/days without leadings 0s will be parsed
401399
* (though not iso8601). If the components aren't separated,
402-
* 4 (YYYY) or 8 (YYYYMMDD) digits are expected. 6 digits are
403-
* forbidden here (but parsed as YYMMDD elsewhere).
400+
* an error code will be retuned because the date is ambigous
404401
*/
405-
int has_ymd_sep = 0;
406-
char ymd_sep = '\0';
407-
char valid_ymd_sep[] = {'-', '.', '/', '\\', ' '};
408-
int valid_ymd_sep_len = sizeof(valid_ymd_sep);
409-
410-
/* hour-minute-second may or may not separated by ':'. If not, then
411-
* each component must be 2 digits. */
412-
int has_hms_sep = 0;
413-
int hour_was_2_digits = 0;
402+
int has_sep = 0;
403+
char sep = '\0';
404+
char valid_sep[] = {'-', '.', '/', '\\', ' '};
405+
int valid_sep_len = 5;
414406

415407
/* Initialize the output to all zeros */
416408
memset(out, 0, sizeof(pandas_datetimestruct));
@@ -558,58 +550,67 @@ parse_iso_8601_datetime(char *str, int len,
558550
/* Check whether it's a leap-year */
559551
year_leap = is_leapyear(out->year);
560552

561-
/* Next character must be a separator, start of month, or end of string */
553+
/* Next character must be a separator, start of month or end */
562554
if (sublen == 0) {
563555
if (out_local != NULL) {
564556
*out_local = 0;
565557
}
566558
bestunit = PANDAS_FR_Y;
567559
goto finish;
568560
}
569-
570-
if (!isdigit(*substr)) {
571-
for (i = 0; i < valid_ymd_sep_len; ++i) {
572-
if (*substr == valid_ymd_sep[i]) {
561+
else if (!isdigit(*substr)) {
562+
for (i = 0; i < valid_sep_len; ++i) {
563+
if (*substr == valid_sep[i]) {
564+
has_sep = 1;
565+
sep = valid_sep[i];
566+
++substr;
567+
--sublen;
573568
break;
574569
}
575570
}
576-
if (i == valid_ymd_sep_len) {
577-
goto parse_error;
578-
}
579-
has_ymd_sep = 1;
580-
ymd_sep = valid_ymd_sep[i];
581-
++substr;
582-
--sublen;
583-
/* Cannot have trailing separator */
584-
if (sublen == 0 || !isdigit(*substr)) {
571+
if (i == valid_sep_len) {
585572
goto parse_error;
586573
}
587574
}
588575

589-
/* PARSE THE MONTH */
590-
/* First digit required */
591-
out->month = (*substr - '0');
592-
++substr;
593-
--sublen;
594-
/* Second digit optional if there was a separator */
595-
if (isdigit(*substr)) {
596-
out->month = 10 * out->month + (*substr - '0');
576+
/* Can't have a trailing sep */
577+
if (sublen == 0) {
578+
goto parse_error;
579+
}
580+
581+
582+
/* PARSE THE MONTH (2 digits) */
583+
if (has_sep && ((sublen >= 2 && isdigit(substr[0]) && !isdigit(substr[1]))
584+
|| (sublen == 1 && isdigit(substr[0])))) {
585+
out->month = (substr[0] - '0');
586+
587+
if (out->month < 1) {
588+
PyErr_Format(PyExc_ValueError,
589+
"Month out of range in datetime string \"%s\"", str);
590+
goto error;
591+
}
597592
++substr;
598593
--sublen;
599594
}
600-
else if (!has_ymd_sep) {
601-
goto parse_error;
595+
else if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) {
596+
out->month = 10 * (substr[0] - '0') + (substr[1] - '0');
597+
598+
if (out->month < 1 || out->month > 12) {
599+
PyErr_Format(PyExc_ValueError,
600+
"Month out of range in datetime string \"%s\"", str);
601+
goto error;
602+
}
603+
substr += 2;
604+
sublen -= 2;
602605
}
603-
if (out->month < 1 || out->month > 12) {
604-
PyErr_Format(PyExc_ValueError,
605-
"Month out of range in datetime string \"%s\"", str);
606-
goto error;
606+
else {
607+
goto parse_error;
607608
}
608609

609-
/* Next character must be the separator, start of day, or end of string */
610+
/* Next character must be a '-' or the end of the string */
610611
if (sublen == 0) {
611-
/* Forbid YYYYMM. Parsed instead as YYMMDD by someone else. */
612-
if (!has_ymd_sep) {
612+
/* dates of form YYYYMM are not valid */
613+
if (!has_sep) {
613614
goto parse_error;
614615
}
615616
if (out_local != NULL) {
@@ -618,39 +619,46 @@ parse_iso_8601_datetime(char *str, int len,
618619
bestunit = PANDAS_FR_M;
619620
goto finish;
620621
}
621-
622-
if (has_ymd_sep) {
623-
/* Must have separator, but cannot be trailing */
624-
if (*substr != ymd_sep || sublen == 1) {
625-
goto parse_error;
626-
}
622+
else if (has_sep && *substr == sep) {
627623
++substr;
628624
--sublen;
629625
}
626+
else if (!isdigit(*substr)) {
627+
goto parse_error;
628+
}
630629

631-
/* PARSE THE DAY */
632-
/* First digit required */
633-
if (!isdigit(*substr)) {
634-
goto parse_error;
630+
/* Can't have a trailing '-' */
631+
if (sublen == 0) {
632+
goto parse_error;
635633
}
636-
out->day = (*substr - '0');
637-
++substr;
638-
--sublen;
639-
/* Second digit optional if there was a separator */
640-
if (isdigit(*substr)) {
641-
out->day = 10 * out->day + (*substr - '0');
634+
635+
/* PARSE THE DAY (2 digits) */
636+
if (has_sep && ((sublen >= 2 && isdigit(substr[0]) && !isdigit(substr[1]))
637+
|| (sublen == 1 && isdigit(substr[0])))) {
638+
out->day = (substr[0] - '0');
639+
640+
if (out->day < 1) {
641+
PyErr_Format(PyExc_ValueError,
642+
"Day out of range in datetime string \"%s\"", str);
643+
goto error;
644+
}
642645
++substr;
643646
--sublen;
644647
}
645-
else if (!has_ymd_sep) {
646-
goto parse_error;
648+
else if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) {
649+
out->day = 10 * (substr[0] - '0') + (substr[1] - '0');
650+
651+
if (out->day < 1 ||
652+
out->day > days_per_month_table[year_leap][out->month-1]) {
653+
PyErr_Format(PyExc_ValueError,
654+
"Day out of range in datetime string \"%s\"", str);
655+
goto error;
656+
}
657+
substr += 2;
658+
sublen -= 2;
647659
}
648-
if (out->day < 1 ||
649-
out->day > days_per_month_table[year_leap][out->month-1])
650-
{
651-
PyErr_Format(PyExc_ValueError,
652-
"Day out of range in datetime string \"%s\"", str);
653-
goto error;
660+
else {
661+
goto parse_error;
654662
}
655663

656664
/* Next character must be a 'T', ' ', or end of string */
@@ -661,119 +669,104 @@ parse_iso_8601_datetime(char *str, int len,
661669
bestunit = PANDAS_FR_D;
662670
goto finish;
663671
}
664-
665-
if ((*substr != 'T' && *substr != ' ') || sublen == 1) {
666-
goto parse_error;
667-
}
668-
++substr;
669-
--sublen;
670-
671-
/* PARSE THE HOURS */
672-
/* First digit required */
673-
if (!isdigit(*substr)) {
672+
else if (*substr != 'T' && *substr != ' ') {
674673
goto parse_error;
675674
}
676-
out->hour = (*substr - '0');
677-
++substr;
678-
--sublen;
679-
/* Second digit optional */
680-
if (isdigit(*substr)) {
681-
hour_was_2_digits = 1;
682-
out->hour = 10 * out->hour + (*substr - '0');
675+
else {
683676
++substr;
684677
--sublen;
678+
}
679+
680+
/* PARSE THE HOURS (2 digits) */
681+
if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) {
682+
out->hour = 10 * (substr[0] - '0') + (substr[1] - '0');
683+
685684
if (out->hour >= 24) {
686685
PyErr_Format(PyExc_ValueError,
687686
"Hours out of range in datetime string \"%s\"", str);
688687
goto error;
689688
}
689+
substr += 2;
690+
sublen -= 2;
690691
}
691-
692-
/* Next character must be a ':' or the end of the string */
693-
if (sublen == 0) {
694-
if (!hour_was_2_digits) {
695-
goto parse_error;
696-
}
697-
bestunit = PANDAS_FR_h;
698-
goto finish;
692+
else if (sublen >= 1 && isdigit(substr[0])) {
693+
out->hour = substr[0] - '0';
694+
++substr;
695+
--sublen;
696+
}
697+
else {
698+
goto parse_error;
699699
}
700700

701-
if (*substr == ':') {
702-
has_hms_sep = 1;
701+
/* Next character must be a ':' or the end of the string */
702+
if (sublen > 0 && *substr == ':') {
703703
++substr;
704704
--sublen;
705-
/* Cannot have a trailing separator */
706-
if (sublen == 0 || !isdigit(*substr)) {
707-
goto parse_error;
708-
}
709705
}
710-
else if (!isdigit(*substr)) {
711-
if (!hour_was_2_digits) {
712-
goto parse_error;
713-
}
706+
else {
714707
bestunit = PANDAS_FR_h;
715708
goto parse_timezone;
716709
}
717710

718-
/* PARSE THE MINUTES */
719-
/* First digit required */
720-
out->min = (*substr - '0');
721-
++substr;
722-
--sublen;
723-
/* Second digit optional if there was a separator */
724-
if (isdigit(*substr)) {
725-
out->min = 10 * out->min + (*substr - '0');
726-
++substr;
727-
--sublen;
711+
/* Can't have a trailing ':' */
712+
if (sublen == 0) {
713+
goto parse_error;
714+
}
715+
716+
/* PARSE THE MINUTES (2 digits) */
717+
if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) {
718+
out->min = 10 * (substr[0] - '0') + (substr[1] - '0');
719+
728720
if (out->min >= 60) {
729721
PyErr_Format(PyExc_ValueError,
730-
"Minutes out of range in datetime string \"%s\"", str);
722+
"Minutes out of range in datetime string \"%s\"", str);
731723
goto error;
732724
}
725+
substr += 2;
726+
sublen -= 2;
733727
}
734-
else if (!has_hms_sep) {
735-
goto parse_error;
728+
else if (sublen >= 1 && isdigit(substr[0])) {
729+
out->min = substr[0] - '0';
730+
++substr;
731+
--sublen;
736732
}
737-
738-
if (sublen == 0) {
739-
bestunit = PANDAS_FR_m;
740-
goto finish;
733+
else {
734+
goto parse_error;
741735
}
742736

743-
/* If we make it through this condition block, then the next
744-
* character is a digit. */
745-
if (has_hms_sep && *substr == ':') {
737+
/* Next character must be a ':' or the end of the string */
738+
if (sublen > 0 && *substr == ':') {
746739
++substr;
747740
--sublen;
748-
/* Cannot have a trailing ':' */
749-
if (sublen == 0 || !isdigit(*substr)) {
750-
goto parse_error;
751-
}
752-
}
753-
else if (!has_hms_sep && isdigit(*substr)) {
754741
}
755742
else {
756743
bestunit = PANDAS_FR_m;
757744
goto parse_timezone;
758745
}
759746

760-
/* PARSE THE SECONDS */
761-
/* First digit required */
762-
out->sec = (*substr - '0');
763-
++substr;
764-
--sublen;
765-
/* Second digit optional if there was a separator */
766-
if (isdigit(*substr)) {
767-
out->sec = 10 * out->sec + (*substr - '0');
768-
++substr;
769-
--sublen;
747+
/* Can't have a trailing ':' */
748+
if (sublen == 0) {
749+
goto parse_error;
750+
}
751+
752+
/* PARSE THE SECONDS (2 digits) */
753+
if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) {
754+
out->sec = 10 * (substr[0] - '0') + (substr[1] - '0');
755+
770756
if (out->sec >= 60) {
771757
PyErr_Format(PyExc_ValueError,
772-
"Seconds out of range in datetime string \"%s\"", str);
758+
"Seconds out of range in datetime string \"%s\"", str);
773759
goto error;
774760
}
761+
substr += 2;
762+
sublen -= 2;
763+
}
764+
else if (sublen >= 1 && isdigit(substr[0])) {
765+
out->sec = substr[0] - '0';
766+
++substr;
767+
--sublen;
775768
}
776-
else if (!has_hms_sep) {
769+
else {
777770
goto parse_error;
778771
}
779772

0 commit comments

Comments
 (0)