Skip to content

Commit 54a65e0

Browse files
authored
REF: separate out _parse_with_format (#55612)
1 parent b446c4b commit 54a65e0

File tree

1 file changed

+188
-172
lines changed

1 file changed

+188
-172
lines changed

pandas/_libs/tslibs/strptime.pyx

+188-172
Original file line numberDiff line numberDiff line change
@@ -179,11 +179,7 @@ def array_strptime(
179179
npy_datetimestruct dts
180180
int64_t[::1] iresult
181181
object[::1] result_timezone
182-
int year, month, day, minute, hour, second, weekday, julian
183-
int week_of_year, week_of_year_start, parse_code, ordinal
184-
int iso_week, iso_year
185-
int64_t us, ns
186-
object val, group_key, ampm, found, tz
182+
object val, tz
187183
bint is_raise = errors=="raise"
188184
bint is_ignore = errors=="ignore"
189185
bint is_coerce = errors=="coerce"
@@ -351,173 +347,9 @@ def array_strptime(
351347
if not string_to_dts_succeeded and fmt == "ISO8601":
352348
raise ValueError(f"Time data {val} is not ISO8601 format")
353349

354-
# exact matching
355-
if exact:
356-
found = format_regex.match(val)
357-
if not found:
358-
raise ValueError(
359-
f"time data \"{val}\" doesn't match format \"{fmt}\""
360-
)
361-
if len(val) != found.end():
362-
raise ValueError(
363-
"unconverted data remains when parsing with "
364-
f"format \"{fmt}\": \"{val[found.end():]}\""
365-
)
366-
367-
# search
368-
else:
369-
found = format_regex.search(val)
370-
if not found:
371-
raise ValueError(
372-
f"time data \"{val}\" doesn't match format \"{fmt}\""
373-
)
374-
375-
iso_year = -1
376-
year = 1900
377-
month = day = 1
378-
hour = minute = second = ns = us = 0
379-
tz = None
380-
# Default to -1 to signify that values not known; not critical to have,
381-
# though
382-
iso_week = week_of_year = -1
383-
week_of_year_start = -1
384-
# weekday and julian defaulted to -1 so as to signal need to calculate
385-
# values
386-
weekday = julian = -1
387-
found_dict = found.groupdict()
388-
for group_key in found_dict.iterkeys():
389-
# Directives not explicitly handled below:
390-
# c, x, X
391-
# handled by making out of other directives
392-
# U, W
393-
# worthless without day of the week
394-
parse_code = _parse_code_table[group_key]
395-
396-
if parse_code == 0:
397-
year = int(found_dict["y"])
398-
# Open Group specification for strptime() states that a %y
399-
# value in the range of [00, 68] is in the century 2000, while
400-
# [69,99] is in the century 1900
401-
if year <= 68:
402-
year += 2000
403-
else:
404-
year += 1900
405-
elif parse_code == 1:
406-
year = int(found_dict["Y"])
407-
elif parse_code == 2:
408-
month = int(found_dict["m"])
409-
# elif group_key == 'B':
410-
elif parse_code == 3:
411-
month = locale_time.f_month.index(found_dict["B"].lower())
412-
# elif group_key == 'b':
413-
elif parse_code == 4:
414-
month = locale_time.a_month.index(found_dict["b"].lower())
415-
# elif group_key == 'd':
416-
elif parse_code == 5:
417-
day = int(found_dict["d"])
418-
# elif group_key == 'H':
419-
elif parse_code == 6:
420-
hour = int(found_dict["H"])
421-
elif parse_code == 7:
422-
hour = int(found_dict["I"])
423-
ampm = found_dict.get("p", "").lower()
424-
# If there was no AM/PM indicator, we'll treat this like AM
425-
if ampm in ("", locale_time.am_pm[0]):
426-
# We're in AM so the hour is correct unless we're
427-
# looking at 12 midnight.
428-
# 12 midnight == 12 AM == hour 0
429-
if hour == 12:
430-
hour = 0
431-
elif ampm == locale_time.am_pm[1]:
432-
# We're in PM so we need to add 12 to the hour unless
433-
# we're looking at 12 noon.
434-
# 12 noon == 12 PM == hour 12
435-
if hour != 12:
436-
hour += 12
437-
elif parse_code == 8:
438-
minute = int(found_dict["M"])
439-
elif parse_code == 9:
440-
second = int(found_dict["S"])
441-
elif parse_code == 10:
442-
s = found_dict["f"]
443-
# Pad to always return nanoseconds
444-
s += "0" * (9 - len(s))
445-
us = long(s)
446-
ns = us % 1000
447-
us = us // 1000
448-
elif parse_code == 11:
449-
weekday = locale_time.f_weekday.index(found_dict["A"].lower())
450-
elif parse_code == 12:
451-
weekday = locale_time.a_weekday.index(found_dict["a"].lower())
452-
elif parse_code == 13:
453-
weekday = int(found_dict["w"])
454-
if weekday == 0:
455-
weekday = 6
456-
else:
457-
weekday -= 1
458-
elif parse_code == 14:
459-
julian = int(found_dict["j"])
460-
elif parse_code == 15 or parse_code == 16:
461-
week_of_year = int(found_dict[group_key])
462-
if group_key == "U":
463-
# U starts week on Sunday.
464-
week_of_year_start = 6
465-
else:
466-
# W starts week on Monday.
467-
week_of_year_start = 0
468-
elif parse_code == 17:
469-
tz = pytz.timezone(found_dict["Z"])
470-
elif parse_code == 19:
471-
tz = parse_timezone_directive(found_dict["z"])
472-
elif parse_code == 20:
473-
iso_year = int(found_dict["G"])
474-
elif parse_code == 21:
475-
iso_week = int(found_dict["V"])
476-
elif parse_code == 22:
477-
weekday = int(found_dict["u"])
478-
weekday -= 1
479-
480-
# If we know the wk of the year and what day of that wk, we can figure
481-
# out the Julian day of the year.
482-
if julian == -1 and weekday != -1:
483-
if week_of_year != -1:
484-
week_starts_Mon = week_of_year_start == 0
485-
julian = _calc_julian_from_U_or_W(year, week_of_year, weekday,
486-
week_starts_Mon)
487-
elif iso_year != -1 and iso_week != -1:
488-
year, julian = _calc_julian_from_V(iso_year, iso_week,
489-
weekday + 1)
490-
# Cannot pre-calculate date() since can change in Julian
491-
# calculation and thus could have different value for the day of the wk
492-
# calculation.
493-
if julian == -1:
494-
# Need to add 1 to result since first day of the year is 1, not
495-
# 0.
496-
ordinal = date(year, month, day).toordinal()
497-
julian = ordinal - date(year, 1, 1).toordinal() + 1
498-
else:
499-
# Assume that if they bothered to include Julian day it will
500-
# be accurate.
501-
datetime_result = date.fromordinal(
502-
(julian - 1) + date(year, 1, 1).toordinal())
503-
year = datetime_result.year
504-
month = datetime_result.month
505-
day = datetime_result.day
506-
if weekday == -1:
507-
weekday = date(year, month, day).weekday()
508-
509-
dts.year = year
510-
dts.month = month
511-
dts.day = day
512-
dts.hour = hour
513-
dts.min = minute
514-
dts.sec = second
515-
dts.us = us
516-
dts.ps = ns * 1000
517-
518-
iresult[i] = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts)
519-
check_dts_bounds(&dts)
520-
350+
tz = _parse_with_format(
351+
val, fmt, exact, format_regex, locale_time, &iresult[i]
352+
)
521353
result_timezone[i] = tz
522354

523355
except (ValueError, OutOfBoundsDatetime) as ex:
@@ -540,6 +372,190 @@ def array_strptime(
540372
return result, result_timezone.base
541373

542374

375+
cdef tzinfo _parse_with_format(
376+
str val, str fmt, bint exact, format_regex, locale_time, int64_t* iresult
377+
):
378+
cdef:
379+
npy_datetimestruct dts
380+
int year, month, day, minute, hour, second, weekday, julian
381+
int week_of_year, week_of_year_start, parse_code, ordinal
382+
int iso_week, iso_year
383+
int64_t us, ns
384+
object found
385+
tzinfo tz
386+
dict found_dict
387+
str group_key, ampm
388+
389+
if exact:
390+
# exact matching
391+
found = format_regex.match(val)
392+
if not found:
393+
raise ValueError(
394+
f"time data \"{val}\" doesn't match format \"{fmt}\""
395+
)
396+
if len(val) != found.end():
397+
raise ValueError(
398+
"unconverted data remains when parsing with "
399+
f"format \"{fmt}\": \"{val[found.end():]}\""
400+
)
401+
402+
else:
403+
# search
404+
found = format_regex.search(val)
405+
if not found:
406+
raise ValueError(
407+
f"time data \"{val}\" doesn't match format \"{fmt}\""
408+
)
409+
410+
iso_year = -1
411+
year = 1900
412+
month = day = 1
413+
hour = minute = second = ns = us = 0
414+
tz = None
415+
# Default to -1 to signify that values not known; not critical to have,
416+
# though
417+
iso_week = week_of_year = -1
418+
week_of_year_start = -1
419+
# weekday and julian defaulted to -1 so as to signal need to calculate
420+
# values
421+
weekday = julian = -1
422+
found_dict = found.groupdict()
423+
for group_key in found_dict.iterkeys():
424+
# Directives not explicitly handled below:
425+
# c, x, X
426+
# handled by making out of other directives
427+
# U, W
428+
# worthless without day of the week
429+
parse_code = _parse_code_table[group_key]
430+
431+
if parse_code == 0:
432+
year = int(found_dict["y"])
433+
# Open Group specification for strptime() states that a %y
434+
# value in the range of [00, 68] is in the century 2000, while
435+
# [69,99] is in the century 1900
436+
if year <= 68:
437+
year += 2000
438+
else:
439+
year += 1900
440+
elif parse_code == 1:
441+
year = int(found_dict["Y"])
442+
elif parse_code == 2:
443+
month = int(found_dict["m"])
444+
# elif group_key == 'B':
445+
elif parse_code == 3:
446+
month = locale_time.f_month.index(found_dict["B"].lower())
447+
# elif group_key == 'b':
448+
elif parse_code == 4:
449+
month = locale_time.a_month.index(found_dict["b"].lower())
450+
# elif group_key == 'd':
451+
elif parse_code == 5:
452+
day = int(found_dict["d"])
453+
# elif group_key == 'H':
454+
elif parse_code == 6:
455+
hour = int(found_dict["H"])
456+
elif parse_code == 7:
457+
hour = int(found_dict["I"])
458+
ampm = found_dict.get("p", "").lower()
459+
# If there was no AM/PM indicator, we'll treat this like AM
460+
if ampm in ("", locale_time.am_pm[0]):
461+
# We're in AM so the hour is correct unless we're
462+
# looking at 12 midnight.
463+
# 12 midnight == 12 AM == hour 0
464+
if hour == 12:
465+
hour = 0
466+
elif ampm == locale_time.am_pm[1]:
467+
# We're in PM so we need to add 12 to the hour unless
468+
# we're looking at 12 noon.
469+
# 12 noon == 12 PM == hour 12
470+
if hour != 12:
471+
hour += 12
472+
elif parse_code == 8:
473+
minute = int(found_dict["M"])
474+
elif parse_code == 9:
475+
second = int(found_dict["S"])
476+
elif parse_code == 10:
477+
s = found_dict["f"]
478+
# Pad to always return nanoseconds
479+
s += "0" * (9 - len(s))
480+
us = long(s)
481+
ns = us % 1000
482+
us = us // 1000
483+
elif parse_code == 11:
484+
weekday = locale_time.f_weekday.index(found_dict["A"].lower())
485+
elif parse_code == 12:
486+
weekday = locale_time.a_weekday.index(found_dict["a"].lower())
487+
elif parse_code == 13:
488+
weekday = int(found_dict["w"])
489+
if weekday == 0:
490+
weekday = 6
491+
else:
492+
weekday -= 1
493+
elif parse_code == 14:
494+
julian = int(found_dict["j"])
495+
elif parse_code == 15 or parse_code == 16:
496+
week_of_year = int(found_dict[group_key])
497+
if group_key == "U":
498+
# U starts week on Sunday.
499+
week_of_year_start = 6
500+
else:
501+
# W starts week on Monday.
502+
week_of_year_start = 0
503+
elif parse_code == 17:
504+
tz = pytz.timezone(found_dict["Z"])
505+
elif parse_code == 19:
506+
tz = parse_timezone_directive(found_dict["z"])
507+
elif parse_code == 20:
508+
iso_year = int(found_dict["G"])
509+
elif parse_code == 21:
510+
iso_week = int(found_dict["V"])
511+
elif parse_code == 22:
512+
weekday = int(found_dict["u"])
513+
weekday -= 1
514+
515+
# If we know the wk of the year and what day of that wk, we can figure
516+
# out the Julian day of the year.
517+
if julian == -1 and weekday != -1:
518+
if week_of_year != -1:
519+
week_starts_Mon = week_of_year_start == 0
520+
julian = _calc_julian_from_U_or_W(year, week_of_year, weekday,
521+
week_starts_Mon)
522+
elif iso_year != -1 and iso_week != -1:
523+
year, julian = _calc_julian_from_V(iso_year, iso_week,
524+
weekday + 1)
525+
# Cannot pre-calculate date() since can change in Julian
526+
# calculation and thus could have different value for the day of the wk
527+
# calculation.
528+
if julian == -1:
529+
# Need to add 1 to result since first day of the year is 1, not
530+
# 0.
531+
ordinal = date(year, month, day).toordinal()
532+
julian = ordinal - date(year, 1, 1).toordinal() + 1
533+
else:
534+
# Assume that if they bothered to include Julian day it will
535+
# be accurate.
536+
datetime_result = date.fromordinal(
537+
(julian - 1) + date(year, 1, 1).toordinal())
538+
year = datetime_result.year
539+
month = datetime_result.month
540+
day = datetime_result.day
541+
if weekday == -1:
542+
weekday = date(year, month, day).weekday()
543+
544+
dts.year = year
545+
dts.month = month
546+
dts.day = day
547+
dts.hour = hour
548+
dts.min = minute
549+
dts.sec = second
550+
dts.us = us
551+
dts.ps = ns * 1000
552+
553+
iresult[0] = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts)
554+
check_dts_bounds(&dts)
555+
556+
return tz
557+
558+
543559
class TimeRE(_TimeRE):
544560
"""
545561
Handle conversion from format directives to regexes.

0 commit comments

Comments
 (0)