@@ -57,7 +57,7 @@ cdef dict _parse_code_table = {'y': 0,
57
57
' u' : 22 }
58
58
59
59
60
- def array_strptime (ndarray[object] values , object fmt , bint exact = True , errors = ' raise' ):
60
+ def array_strptime (ndarray[object] values , str fmt , bint exact = True , errors = ' raise' ):
61
61
"""
62
62
Calculates the datetime structs represented by the passed array of strings
63
63
@@ -349,7 +349,7 @@ def array_strptime(ndarray[object] values, object fmt, bint exact=True, errors='
349
349
350
350
351
351
"""
352
- _getlang, LocaleTime, TimeRE, _calc_julian_from_U_or_W are vendored
352
+ TimeRE, _calc_julian_from_U_or_W are vendored
353
353
from the standard library, see
354
354
https://github.com/python/cpython/blob/master/Lib/_strptime.py
355
355
The original module-level docstring follows.
@@ -364,161 +364,14 @@ FUNCTIONS:
364
364
strptime -- Calculates the time struct represented by the passed-in string
365
365
"""
366
366
367
-
368
- def _getlang ():
369
- """ Figure out what language is being used for the locale"""
370
- return locale.getlocale(locale.LC_TIME)
371
-
372
-
373
- class LocaleTime :
374
- """
375
- Stores and handles locale-specific information related to time.
376
-
377
- ATTRIBUTES:
378
- f_weekday -- full weekday names (7-item list)
379
- a_weekday -- abbreviated weekday names (7-item list)
380
- f_month -- full month names (13-item list; dummy value in [0], which
381
- is added by code)
382
- a_month -- abbreviated month names (13-item list, dummy value in
383
- [0], which is added by code)
384
- am_pm -- AM/PM representation (2-item list)
385
- LC_date_time -- format string for date/time representation (string)
386
- LC_date -- format string for date representation (string)
387
- LC_time -- format string for time representation (string)
388
- timezone -- daylight- and non-daylight-savings timezone representation
389
- (2-item list of sets)
390
- lang -- Language used by instance (2-item tuple)
391
- """
392
-
393
- def __init__ (self ):
394
- """
395
- Set all attributes.
396
-
397
- Order of methods called matters for dependency reasons.
398
-
399
- The locale language is set at the offset and then checked again before
400
- exiting. This is to make sure that the attributes were not set with a
401
- mix of information from more than one locale. This would most likely
402
- happen when using threads where one thread calls a locale-dependent
403
- function while another thread changes the locale while the function in
404
- the other thread is still running. Proper coding would call for
405
- locks to prevent changing the locale while locale-dependent code is
406
- running. The check here is done in case someone does not think about
407
- doing this.
408
-
409
- Only other possible issue is if someone changed the timezone and did
410
- not call tz.tzset . That is an issue for the programmer, though,
411
- since changing the timezone is worthless without that call.
412
- """
413
- self .lang = _getlang()
414
- self .__calc_weekday()
415
- self .__calc_month()
416
- self .__calc_am_pm()
417
- self .__calc_timezone()
418
- self .__calc_date_time()
419
- if _getlang() != self .lang:
420
- raise ValueError (" locale changed during initialization" )
421
-
422
- def __pad (self , seq , front ):
423
- # Add '' to seq to either the front (is True), else the back.
424
- seq = list (seq)
425
- if front:
426
- seq.insert(0 , ' ' )
427
- else :
428
- seq.append(' ' )
429
- return seq
430
-
431
- def __calc_weekday (self ):
432
- # Set self.a_weekday and self.f_weekday using the calendar
433
- # module.
434
- a_weekday = [calendar.day_abbr[i].lower() for i in range (7 )]
435
- f_weekday = [calendar.day_name[i].lower() for i in range (7 )]
436
- self .a_weekday = a_weekday
437
- self .f_weekday = f_weekday
438
-
439
- def __calc_month (self ):
440
- # Set self.f_month and self.a_month using the calendar module.
441
- a_month = [calendar.month_abbr[i].lower() for i in range (13 )]
442
- f_month = [calendar.month_name[i].lower() for i in range (13 )]
443
- self .a_month = a_month
444
- self .f_month = f_month
445
-
446
- def __calc_am_pm (self ):
447
- # Set self.am_pm by using time.strftime().
448
-
449
- # The magic date (1999,3,17,hour,44,55,2,76,0) is not really that
450
- # magical; just happened to have used it everywhere else where a
451
- # static date was needed.
452
- am_pm = []
453
- for hour in (01 , 22 ):
454
- time_tuple = time.struct_time(
455
- (1999 , 3 , 17 , hour, 44 , 55 , 2 , 76 , 0 ))
456
- am_pm.append(time.strftime(" %p " , time_tuple).lower())
457
- self .am_pm = am_pm
458
-
459
- def __calc_date_time (self ):
460
- # Set self.date_time, self.date, & self.time by using
461
- # time.strftime().
462
-
463
- # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of
464
- # overloaded numbers is minimized. The order in which searches for
465
- # values within the format string is very important; it eliminates
466
- # possible ambiguity for what something represents.
467
- time_tuple = time.struct_time((1999 , 3 , 17 , 22 , 44 , 55 , 2 , 76 , 0 ))
468
- date_time = [None , None , None ]
469
- date_time[0 ] = time.strftime(" %c " , time_tuple).lower()
470
- date_time[1 ] = time.strftime(" %x " , time_tuple).lower()
471
- date_time[2 ] = time.strftime(" %X " , time_tuple).lower()
472
- replacement_pairs = [(' %' , ' %% ' ), (self .f_weekday[2 ], ' %A ' ),
473
- (self .f_month[3 ], ' %B ' ),
474
- (self .a_weekday[2 ], ' %a ' ),
475
- (self .a_month[3 ], ' %b ' ), (self .am_pm[1 ], ' %p ' ),
476
- (' 1999' , ' %Y ' ), (' 99' , ' %y ' ), (' 22' , ' %H ' ),
477
- (' 44' , ' %M ' ), (' 55' , ' %S ' ), (' 76' , ' %j ' ),
478
- (' 17' , ' %d ' ), (' 03' , ' %m ' ), (' 3' , ' %m ' ),
479
- # '3' needed for when no leading zero.
480
- (' 2' , ' %w ' ), (' 10' , ' %I ' )]
481
- replacement_pairs.extend([(tz, " %Z " ) for tz_values in self .timezone
482
- for tz in tz_values])
483
- for offset, directive in ((0 , ' %c ' ), (1 , ' %x ' ), (2 , ' %X ' )):
484
- current_format = date_time[offset]
485
- for old, new in replacement_pairs:
486
- # Must deal with possible lack of locale info
487
- # manifesting itself as the empty string (e.g., Swedish's
488
- # lack of AM/PM info) or a platform returning a tuple of empty
489
- # strings (e.g., MacOS 9 having timezone as ('','')).
490
- if old:
491
- current_format = current_format.replace(old, new)
492
- # If %W is used, then Sunday, 2005-01-03 will fall on week 0 since
493
- # 2005-01-03 occurs before the first Monday of the year. Otherwise
494
- # %U is used.
495
- time_tuple = time.struct_time((1999 , 1 , 3 , 1 , 1 , 1 , 6 , 3 , 0 ))
496
- if ' 00' in time.strftime(directive, time_tuple):
497
- U_W = ' %W '
498
- else :
499
- U_W = ' %U '
500
- date_time[offset] = current_format.replace(' 11' , U_W)
501
- self .LC_date_time = date_time[0 ]
502
- self .LC_date = date_time[1 ]
503
- self .LC_time = date_time[2 ]
504
-
505
- def __calc_timezone (self ):
506
- # Set self.timezone by using time.tzname.
507
- # Do not worry about possibility of time.tzname[0] == timetzname[1]
508
- # and time.daylight; handle that in strptime .
509
- try :
510
- time.tzset()
511
- except AttributeError :
512
- pass
513
- no_saving = frozenset ([" utc" , " gmt" , time.tzname[0 ].lower()])
514
- if time.daylight:
515
- has_saving = frozenset ([time.tzname[1 ].lower()])
516
- else :
517
- has_saving = frozenset ()
518
- self .timezone = (no_saving, has_saving)
367
+ from _strptime import (
368
+ LocaleTime,
369
+ TimeRE as _TimeRE,
370
+ _getlang,
371
+ )
519
372
520
373
521
- class TimeRE (dict ):
374
+ class TimeRE (_TimeRE ):
522
375
"""
523
376
Handle conversion from format directives to regexes.
524
377
@@ -532,100 +385,20 @@ class TimeRE(dict):
532
385
533
386
Order of execution is important for dependency reasons.
534
387
"""
535
- if locale_time:
536
- self .locale_time = locale_time
537
- else :
538
- self .locale_time = LocaleTime()
539
388
self ._Z = None
540
- base = super ()
541
- base.__init__ ({
542
- # The " \d" part of the regex is to make %c from ANSI C work
543
- ' d' : r " ( ?P<d> 3[0-1 ]| [1-2 ]\d | 0[1-9 ]| [1-9 ]| [1-9 ]) " ,
544
- ' f' : r " ( ?P<f> [0-9 ]{1,9} ) " ,
545
- ' G' : r " ( ?P<G> \d\d\d\d ) " ,
546
- ' H' : r " ( ?P<H> 2[0-3 ]| [0-1 ]\d | \d ) " ,
547
- ' I' : r " ( ?P<I> 1[0-2 ]| 0[1-9 ]| [1-9 ]) " ,
548
- ' j' : (r " ( ?P<j> 36[0-6 ]| 3[0-5 ]\d | [1-2 ]\d\d | 0[1-9 ]\d | 00[1-9 ]| "
549
- r"[1-9 ]\d | 0[1-9 ]| [1-9 ]) " ),
550
- ' m' : r " ( ?P<m> 1[0-2 ]| 0[1-9 ]| [1-9 ]) " ,
551
- ' M' : r " ( ?P<M> [0-5 ]\d | \d ) " ,
552
- ' S' : r " ( ?P<S> 6[0-1 ]| [0-5 ]\d | \d ) " ,
553
- ' u' : r " ( ?P<u> [1-7 ]) " ,
554
- ' U' : r " ( ?P<U> 5[0-3 ]| [0-4 ]\d | \d ) " ,
555
- ' V' : r " ( ?P<V> 5[0-3 ]| 0[1-9 ]| [1-4 ]\d | \d ) " ,
556
- ' w' : r " ( ?P<w> [0-6 ]) " ,
557
- # W is set below by using 'U'
558
- ' y' : r " ( ?P<y> \d\d ) " ,
559
- # TODO: Does 'Y' need to worry about having less or more than
560
- # 4 digits?
561
- ' Y' : r " ( ?P<Y> \d\d\d\d ) " ,
562
- ' z' : r " ( ?P<z> [+- ]\d\d :? [0-5 ]\d ( :? [0-5 ]\d ( \. \d {1,6} ) ? ) ? | Z) " ,
563
- ' A' : self .__seqToRE(self .locale_time.f_weekday, ' A' ),
564
- ' a' : self .__seqToRE(self .locale_time.a_weekday, ' a' ),
565
- ' B' : self .__seqToRE(self .locale_time.f_month[1 :], ' B' ),
566
- ' b' : self .__seqToRE(self .locale_time.a_month[1 :], ' b' ),
567
- ' p' : self .__seqToRE(self .locale_time.am_pm, ' p' ),
568
- # 'Z' key is generated lazily via __getitem__
569
- ' %' : ' %' })
570
- base.__setitem__ (' W' , base.__getitem__ (' U' ).replace(' U' , ' W' ))
571
- base.__setitem__ (' c' , self .pattern(self .locale_time.LC_date_time))
572
- base.__setitem__ (' x' , self .pattern(self .locale_time.LC_date))
573
- base.__setitem__ (' X' , self .pattern(self .locale_time.LC_time))
389
+ super ().__init__(locale_time = locale_time)
574
390
575
391
def __getitem__ (self , key ):
576
392
if key == " Z" :
577
393
# lazy computation
578
394
if self ._Z is None :
579
395
self ._Z = self .__seqToRE(pytz.all_timezones, ' Z' )
396
+ # Note: handling Z is the key difference vs using the stdlib
397
+ # _strptime.TimeRE. test_to_datetime_parse_tzname_or_tzoffset with
398
+ # fmt='%Y-%m-%d %H:%M:%S %Z' fails with the stdlib version.
580
399
return self ._Z
581
400
return super ().__getitem__(key)
582
401
583
- def __seqToRE (self , to_convert , directive ):
584
- """
585
- Convert a list to a regex string for matching a directive.
586
-
587
- Want possible matching values to be from longest to shortest. This
588
- prevents the possibility of a match occurring for a value that also
589
- a substring of a larger value that should have matched (e.g., 'abc'
590
- matching when 'abcdef' should have been the match).
591
- """
592
- to_convert = sorted (to_convert, key = len , reverse = True )
593
- for value in to_convert:
594
- if value != ' ' :
595
- break
596
- else :
597
- return ' '
598
- regex = ' |' .join(re.escape(stuff) for stuff in to_convert)
599
- regex = f" (?P<{directive}>{regex})"
600
- return regex
601
-
602
- def pattern (self , format ):
603
- """
604
- Return regex pattern for the format string.
605
-
606
- Need to make sure that any characters that might be interpreted as
607
- regex syntax are escaped.
608
- """
609
- processed_format = ' '
610
- # The sub() call escapes all characters that might be misconstrued
611
- # as regex syntax. Cannot use re.escape since we have to deal with
612
- # format directives (%m, etc.).
613
- regex_chars = re.compile(r " ( [\\ .^$*+? \(\) {} \[\] | ]) " )
614
- format = regex_chars.sub(r " \\ \1 " , format)
615
- whitespace_replacement = re.compile(r ' \s + ' )
616
- format = whitespace_replacement.sub(r ' \\ s+ ' , format)
617
- while ' %' in format:
618
- directive_index = format.index(' %' ) + 1
619
- processed_format = (f" {processed_format}"
620
- f" {format[:directive_index -1]}"
621
- f" {self[format[directive_index]]}" )
622
- format = format[directive_index + 1 :]
623
- return f" {processed_format}{format}"
624
-
625
- def compile (self , format ):
626
- """ Return a compiled re object for the format string."""
627
- return re.compile(self .pattern(format), re.IGNORECASE)
628
-
629
402
630
403
_cache_lock = _thread_allocate_lock()
631
404
# DO NOT modify _TimeRE_cache or _regex_cache without acquiring the cache lock
0 commit comments