@@ -80,6 +80,7 @@ from pandas._libs.tslibs.timestamps import Timestamp
80
80
81
81
cnp.import_array()
82
82
83
+
83
84
cdef bint format_is_iso(f: str ):
84
85
"""
85
86
Does format match the iso8601 set that can be handled by the C parser?
@@ -154,6 +155,77 @@ cdef dict _parse_code_table = {"y": 0,
154
155
" u" : 22 }
155
156
156
157
158
+ cdef _validate_fmt(str fmt):
159
+ if " %W " in fmt or " %U " in fmt:
160
+ if " %Y " not in fmt and " %y " not in fmt:
161
+ raise ValueError (" Cannot use '%W ' or '%U ' without day and year" )
162
+ if " %A " not in fmt and " %a " not in fmt and " %w " not in fmt:
163
+ raise ValueError (" Cannot use '%W ' or '%U ' without day and year" )
164
+ elif " %Z " in fmt and " %z " in fmt:
165
+ raise ValueError (" Cannot parse both %Z and %z " )
166
+ elif " %j " in fmt and " %G " in fmt:
167
+ raise ValueError (" Day of the year directive '%j ' is not "
168
+ " compatible with ISO year directive '%G '. "
169
+ " Use '%Y ' instead." )
170
+ elif " %G " in fmt and (
171
+ " %V " not in fmt
172
+ or not (
173
+ " %A " in fmt
174
+ or " %a " in fmt
175
+ or " %w " in fmt
176
+ or " %u " in fmt
177
+ )
178
+ ):
179
+ raise ValueError (" ISO year directive '%G ' must be used with "
180
+ " the ISO week directive '%V ' and a weekday "
181
+ " directive '%A ', '%a ', '%w ', or '%u '." )
182
+ elif " %V " in fmt and " %Y " in fmt:
183
+ raise ValueError (" ISO week directive '%V ' is incompatible with "
184
+ " the year directive '%Y '. Use the ISO year "
185
+ " '%G ' instead." )
186
+ elif " %V " in fmt and (
187
+ " %G " not in fmt
188
+ or not (
189
+ " %A " in fmt
190
+ or " %a " in fmt
191
+ or " %w " in fmt
192
+ or " %u " in fmt
193
+ )
194
+ ):
195
+ raise ValueError (" ISO week directive '%V ' must be used with "
196
+ " the ISO year directive '%G ' and a weekday "
197
+ " directive '%A ', '%a ', '%w ', or '%u '." )
198
+
199
+
200
+ cdef _get_format_regex(str fmt):
201
+ global _TimeRE_cache, _regex_cache
202
+ with _cache_lock:
203
+ if _getlang() != _TimeRE_cache.locale_time.lang:
204
+ _TimeRE_cache = TimeRE()
205
+ _regex_cache.clear()
206
+ if len (_regex_cache) > _CACHE_MAX_SIZE:
207
+ _regex_cache.clear()
208
+ locale_time = _TimeRE_cache.locale_time
209
+ format_regex = _regex_cache.get(fmt)
210
+ if not format_regex:
211
+ try :
212
+ format_regex = _TimeRE_cache.compile(fmt)
213
+ except KeyError , err:
214
+ # KeyError raised when a bad format is found; can be specified as
215
+ # \\, in which case it was a stray % but with a space after it
216
+ bad_directive = err.args[0 ]
217
+ if bad_directive == " \\ " :
218
+ bad_directive = " %"
219
+ del err
220
+ raise ValueError (f" '{bad_directive}' is a bad directive "
221
+ f" in format '{fmt}'" )
222
+ except IndexError :
223
+ # IndexError only occurs when the format string is "%"
224
+ raise ValueError (f" stray % i n format '{fmt}'" )
225
+ _regex_cache[fmt] = format_regex
226
+ return format_regex, locale_time
227
+
228
+
157
229
cdef class DatetimeParseState:
158
230
def __cinit__ (self ):
159
231
self .found_tz = False
@@ -221,71 +293,8 @@ def array_strptime(
221
293
222
294
assert is_raise or is_ignore or is_coerce
223
295
224
- if " %W " in fmt or " %U " in fmt:
225
- if " %Y " not in fmt and " %y " not in fmt:
226
- raise ValueError (" Cannot use '%W ' or '%U ' without day and year" )
227
- if " %A " not in fmt and " %a " not in fmt and " %w " not in fmt:
228
- raise ValueError (" Cannot use '%W ' or '%U ' without day and year" )
229
- elif " %Z " in fmt and " %z " in fmt:
230
- raise ValueError (" Cannot parse both %Z and %z " )
231
- elif " %j " in fmt and " %G " in fmt:
232
- raise ValueError (" Day of the year directive '%j ' is not "
233
- " compatible with ISO year directive '%G '. "
234
- " Use '%Y ' instead." )
235
- elif " %G " in fmt and (
236
- " %V " not in fmt
237
- or not (
238
- " %A " in fmt
239
- or " %a " in fmt
240
- or " %w " in fmt
241
- or " %u " in fmt
242
- )
243
- ):
244
- raise ValueError (" ISO year directive '%G ' must be used with "
245
- " the ISO week directive '%V ' and a weekday "
246
- " directive '%A ', '%a ', '%w ', or '%u '." )
247
- elif " %V " in fmt and " %Y " in fmt:
248
- raise ValueError (" ISO week directive '%V ' is incompatible with "
249
- " the year directive '%Y '. Use the ISO year "
250
- " '%G ' instead." )
251
- elif " %V " in fmt and (
252
- " %G " not in fmt
253
- or not (
254
- " %A " in fmt
255
- or " %a " in fmt
256
- or " %w " in fmt
257
- or " %u " in fmt
258
- )
259
- ):
260
- raise ValueError (" ISO week directive '%V ' must be used with "
261
- " the ISO year directive '%G ' and a weekday "
262
- " directive '%A ', '%a ', '%w ', or '%u '." )
263
-
264
- global _TimeRE_cache, _regex_cache
265
- with _cache_lock:
266
- if _getlang() != _TimeRE_cache.locale_time.lang:
267
- _TimeRE_cache = TimeRE()
268
- _regex_cache.clear()
269
- if len (_regex_cache) > _CACHE_MAX_SIZE:
270
- _regex_cache.clear()
271
- locale_time = _TimeRE_cache.locale_time
272
- format_regex = _regex_cache.get(fmt)
273
- if not format_regex:
274
- try :
275
- format_regex = _TimeRE_cache.compile(fmt)
276
- # KeyError raised when a bad format is found; can be specified as
277
- # \\, in which case it was a stray % but with a space after it
278
- except KeyError , err:
279
- bad_directive = err.args[0 ]
280
- if bad_directive == " \\ " :
281
- bad_directive = " %"
282
- del err
283
- raise ValueError (f" '{bad_directive}' is a bad directive "
284
- f" in format '{fmt}'" )
285
- # IndexError only occurs when the format string is "%"
286
- except IndexError :
287
- raise ValueError (f" stray % i n format '{fmt}'" )
288
- _regex_cache[fmt] = format_regex
296
+ _validate_fmt(fmt)
297
+ format_regex, locale_time = _get_format_regex(fmt)
289
298
290
299
result = np.empty(n, dtype = " M8[ns]" )
291
300
iresult = result.view(" i8" )
@@ -366,8 +375,10 @@ def array_strptime(
366
375
raise ValueError (f" Time data {val} is not ISO8601 format" )
367
376
368
377
tz = _parse_with_format(
369
- val, fmt, exact, format_regex, locale_time, & iresult[i]
378
+ val, fmt, exact, format_regex, locale_time, & dts
370
379
)
380
+ iresult[i] = npy_datetimestruct_to_datetime(NPY_FR_ns, & dts)
381
+ check_dts_bounds(& dts)
371
382
result_timezone[i] = tz
372
383
373
384
except (ValueError , OutOfBoundsDatetime) as ex:
@@ -391,10 +402,10 @@ def array_strptime(
391
402
392
403
393
404
cdef tzinfo _parse_with_format(
394
- str val, str fmt, bint exact, format_regex, locale_time, int64_t * iresult
405
+ str val, str fmt, bint exact, format_regex, locale_time, npy_datetimestruct * dts
395
406
):
407
+ # Based on https://github.com/python/cpython/blob/main/Lib/_strptime.py#L293
396
408
cdef:
397
- npy_datetimestruct dts
398
409
int year, month, day, minute, hour, second, weekday, julian
399
410
int week_of_year, week_of_year_start, parse_code, ordinal
400
411
int iso_week, iso_year
@@ -452,24 +463,32 @@ cdef tzinfo _parse_with_format(
452
463
# value in the range of [00, 68] is in the century 2000, while
453
464
# [69,99] is in the century 1900
454
465
if year <= 68 :
466
+ # e.g. val='May 04'; fmt='%b %y'
455
467
year += 2000
456
468
else :
457
469
year += 1900
470
+ # TODO: not reached in tests 2023-10-28
458
471
elif parse_code == 1 :
472
+ # e.g. val='17-10-2010 07:15:30'; fmt='%d-%m-%Y %H:%M:%S'
459
473
year = int (found_dict[" Y" ])
460
474
elif parse_code == 2 :
475
+ # e.g. val='17-10-2010 07:15:30'; fmt='%d-%m-%Y %H:%M:%S'
461
476
month = int (found_dict[" m" ])
462
477
# elif group_key == 'B':
463
478
elif parse_code == 3 :
479
+ # e.g. val='30/December/2011'; fmt='%d/%B/%Y'
464
480
month = locale_time.f_month.index(found_dict[" B" ].lower())
465
481
# elif group_key == 'b':
466
482
elif parse_code == 4 :
483
+ # e.g. val='30/Dec/2011 00:00:00'; fmt='%d/%b/%Y %H:%M:%S'
467
484
month = locale_time.a_month.index(found_dict[" b" ].lower())
468
485
# elif group_key == 'd':
469
486
elif parse_code == 5 :
487
+ # e.g. val='17-10-2010 07:15:30'; fmt='%d-%m-%Y %H:%M:%S'
470
488
day = int (found_dict[" d" ])
471
489
# elif group_key == 'H':
472
490
elif parse_code == 6 :
491
+ # e.g. val='17-10-2010 07:15:30'; fmt='%d-%m-%Y %H:%M:%S'
473
492
hour = int (found_dict[" H" ])
474
493
elif parse_code == 7 :
475
494
hour = int (found_dict[" I" ])
@@ -481,71 +500,101 @@ cdef tzinfo _parse_with_format(
481
500
# 12 midnight == 12 AM == hour 0
482
501
if hour == 12 :
483
502
hour = 0
503
+ # TODO: not reached in tests 2023-10-28; the implicit `else`
504
+ # branch is tested with e.g.
505
+ # val='Tuesday 24 Aug 2021 01:30:48 AM'
506
+ # fmt='%A %d %b %Y %I:%M:%S %p'
484
507
elif ampm == locale_time.am_pm[1 ]:
485
508
# We're in PM so we need to add 12 to the hour unless
486
509
# we're looking at 12 noon.
487
510
# 12 noon == 12 PM == hour 12
488
511
if hour != 12 :
512
+ # e.g. val='01/10/2010 08:14 PM'; fmt='%m/%d/%Y %I:%M %p'
489
513
hour += 12
514
+ # TODO: the implicit `else` branch is not tested 2023-10-28
515
+ # TODO: the implicit `else` branch is not reached 2023-10-28; possible?
490
516
elif parse_code == 8 :
517
+ # e.g. val='17-10-2010 07:15:30'; fmt='%d-%m-%Y %H:%M:%S'
491
518
minute = int (found_dict[" M" ])
492
519
elif parse_code == 9 :
520
+ # e.g. val='17-10-2010 07:15:30'; fmt='%d-%m-%Y %H:%M:%S'
493
521
second = int (found_dict[" S" ])
494
522
elif parse_code == 10 :
523
+ # e.g. val='10:10:10.100'; fmt='%H:%M:%S.%f'
495
524
s = found_dict[" f" ]
496
525
# Pad to always return nanoseconds
497
526
s += " 0" * (9 - len (s))
498
527
us = long (s)
499
528
ns = us % 1000
500
529
us = us // 1000
501
530
elif parse_code == 11 :
531
+ # e.g val='Tuesday 24 Aug 2021 01:30:48 AM'; fmt='%A %d %b %Y %I:%M:%S %p'
502
532
weekday = locale_time.f_weekday.index(found_dict[" A" ].lower())
503
533
elif parse_code == 12 :
534
+ # e.g. val='Tue 24 Aug 2021 01:30:48 AM'; fmt='%a %d %b %Y %I:%M:%S %p'
504
535
weekday = locale_time.a_weekday.index(found_dict[" a" ].lower())
505
536
elif parse_code == 13 :
506
537
weekday = int (found_dict[" w" ])
507
538
if weekday == 0 :
539
+ # e.g. val='2013020'; fmt='%Y%U%w'
508
540
weekday = 6
509
541
else :
542
+ # e.g. val='2009324'; fmt='%Y%W%w'
510
543
weekday -= 1
511
544
elif parse_code == 14 :
545
+ # e.g. val='2009164202000'; fmt='%Y%j%H%M%S'
512
546
julian = int (found_dict[" j" ])
513
547
elif parse_code == 15 or parse_code == 16 :
514
548
week_of_year = int (found_dict[group_key])
515
549
if group_key == " U" :
550
+ # e.g. val='2013020'; fmt='%Y%U%w'
516
551
# U starts week on Sunday.
517
552
week_of_year_start = 6
518
553
else :
554
+ # e.g. val='2009324'; fmt='%Y%W%w'
519
555
# W starts week on Monday.
520
556
week_of_year_start = 0
521
557
elif parse_code == 17 :
558
+ # e.g. val='2011-12-30T00:00:00.000000UTC'; fmt='%Y-%m-%dT%H:%M:%S.%f%Z'
522
559
tz = pytz.timezone(found_dict[" Z" ])
523
560
elif parse_code == 19 :
561
+ # e.g. val='March 1, 2018 12:00:00+0400'; fmt='%B %d, %Y %H:%M:%S%z'
524
562
tz = parse_timezone_directive(found_dict[" z" ])
525
563
elif parse_code == 20 :
564
+ # e.g. val='2015-1-7'; fmt='%G-%V-%u'
526
565
iso_year = int (found_dict[" G" ])
527
566
elif parse_code == 21 :
567
+ # e.g. val='2015-1-7'; fmt='%G-%V-%u'
528
568
iso_week = int (found_dict[" V" ])
529
569
elif parse_code == 22 :
570
+ # e.g. val='2015-1-7'; fmt='%G-%V-%u'
530
571
weekday = int (found_dict[" u" ])
531
572
weekday -= 1
532
573
533
574
# If we know the wk of the year and what day of that wk, we can figure
534
575
# out the Julian day of the year.
535
576
if julian == - 1 and weekday != - 1 :
536
577
if week_of_year != - 1 :
578
+ # e.g. val='2013020'; fmt='%Y%U%w'
537
579
week_starts_Mon = week_of_year_start == 0
538
580
julian = _calc_julian_from_U_or_W(year, week_of_year, weekday,
539
581
week_starts_Mon)
540
582
elif iso_year != - 1 and iso_week != - 1 :
583
+ # e.g. val='2015-1-7'; fmt='%G-%V-%u'
541
584
year, julian = _calc_julian_from_V(iso_year, iso_week,
542
585
weekday + 1 )
586
+ # else:
587
+ # # e.g. val='Thu Sep 25 2003'; fmt='%a %b %d %Y'
588
+ # pass
589
+
543
590
# Cannot pre-calculate date() since can change in Julian
544
591
# calculation and thus could have different value for the day of the wk
545
592
# calculation.
546
593
if julian == - 1 :
547
594
# Need to add 1 to result since first day of the year is 1, not
548
595
# 0.
596
+ # We don't actually need ordinal/julian here, but need to raise
597
+ # on e.g. val='2015-04-31'; fmt='%Y-%m-%d'
549
598
ordinal = date(year, month, day).toordinal()
550
599
julian = ordinal - date(year, 1 , 1 ).toordinal() + 1
551
600
else :
@@ -557,6 +606,9 @@ cdef tzinfo _parse_with_format(
557
606
month = datetime_result.month
558
607
day = datetime_result.day
559
608
if weekday == - 1 :
609
+ # We don't actually use weekday here, but need to do this in order to
610
+ # raise on y/m/d combinations
611
+ # TODO: not reached in tests 2023-10-28; necessary?
560
612
weekday = date(year, month, day).weekday()
561
613
562
614
dts.year = year
@@ -567,10 +619,6 @@ cdef tzinfo _parse_with_format(
567
619
dts.sec = second
568
620
dts.us = us
569
621
dts.ps = ns * 1000
570
-
571
- iresult[0 ] = npy_datetimestruct_to_datetime(NPY_FR_ns, & dts)
572
- check_dts_bounds(& dts)
573
-
574
622
return tz
575
623
576
624
0 commit comments