Skip to content

Commit dc72bc8

Browse files
jbrockmendelMarco Gorelli
authored and
Marco Gorelli
committed
PERF: lazify pytz seqToRE call, trims 35ms from import (pandas-dev#28228)
1 parent 0dd4833 commit dc72bc8

File tree

1 file changed

+29
-15
lines changed

1 file changed

+29
-15
lines changed

pandas/_libs/tslibs/strptime.pyx

+29-15
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,8 @@ def array_strptime(object[:] values, object fmt,
341341
return result, result_timezone.base
342342

343343

344-
"""_getlang, LocaleTime, TimeRE, _calc_julian_from_U_or_W are vendored
344+
"""
345+
_getlang, LocaleTime, TimeRE, _calc_julian_from_U_or_W are vendored
345346
from the standard library, see
346347
https://github.com/python/cpython/blob/master/Lib/_strptime.py
347348
The original module-level docstring follows.
@@ -363,7 +364,8 @@ def _getlang():
363364

364365

365366
class LocaleTime:
366-
"""Stores and handles locale-specific information related to time.
367+
"""
368+
Stores and handles locale-specific information related to time.
367369
368370
ATTRIBUTES:
369371
f_weekday -- full weekday names (7-item list)
@@ -382,7 +384,8 @@ class LocaleTime:
382384
"""
383385

384386
def __init__(self):
385-
"""Set all attributes.
387+
"""
388+
Set all attributes.
386389
387390
Order of methods called matters for dependency reasons.
388391
@@ -399,7 +402,6 @@ class LocaleTime:
399402
Only other possible issue is if someone changed the timezone and did
400403
not call tz.tzset . That is an issue for the programmer, though,
401404
since changing the timezone is worthless without that call.
402-
403405
"""
404406
self.lang = _getlang()
405407
self.__calc_weekday()
@@ -518,15 +520,16 @@ class TimeRE(dict):
518520
"""
519521

520522
def __init__(self, locale_time=None):
521-
"""Create keys/values.
523+
"""
524+
Create keys/values.
522525
523526
Order of execution is important for dependency reasons.
524-
525527
"""
526528
if locale_time:
527529
self.locale_time = locale_time
528530
else:
529531
self.locale_time = LocaleTime()
532+
self._Z = None
530533
base = super()
531534
base.__init__({
532535
# The " \d" part of the regex is to make %c from ANSI C work
@@ -555,21 +558,29 @@ class TimeRE(dict):
555558
'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'),
556559
'b': self.__seqToRE(self.locale_time.a_month[1:], 'b'),
557560
'p': self.__seqToRE(self.locale_time.am_pm, 'p'),
558-
'Z': self.__seqToRE(pytz.all_timezones, 'Z'),
561+
# 'Z' key is generated lazily via __getitem__
559562
'%': '%'})
560563
base.__setitem__('W', base.__getitem__('U').replace('U', 'W'))
561564
base.__setitem__('c', self.pattern(self.locale_time.LC_date_time))
562565
base.__setitem__('x', self.pattern(self.locale_time.LC_date))
563566
base.__setitem__('X', self.pattern(self.locale_time.LC_time))
564567

568+
def __getitem__(self, key):
569+
if key == "Z":
570+
# lazy computation
571+
if self._Z is None:
572+
self._Z = self.__seqToRE(pytz.all_timezones, 'Z')
573+
return self._Z
574+
return super().__getitem__(key)
575+
565576
def __seqToRE(self, to_convert, directive):
566-
"""Convert a list to a regex string for matching a directive.
577+
"""
578+
Convert a list to a regex string for matching a directive.
567579
568580
Want possible matching values to be from longest to shortest. This
569581
prevents the possibility of a match occurring for a value that also
570582
a substring of a larger value that should have matched (e.g., 'abc'
571583
matching when 'abcdef' should have been the match).
572-
573584
"""
574585
to_convert = sorted(to_convert, key=len, reverse=True)
575586
for value in to_convert:
@@ -582,11 +593,11 @@ class TimeRE(dict):
582593
return '%s)' % regex
583594

584595
def pattern(self, format):
585-
"""Return regex pattern for the format string.
596+
"""
597+
Return regex pattern for the format string.
586598
587599
Need to make sure that any characters that might be interpreted as
588600
regex syntax are escaped.
589-
590601
"""
591602
processed_format = ''
592603
# The sub() call escapes all characters that might be misconstrued
@@ -619,7 +630,8 @@ _regex_cache = {}
619630

620631
cdef int _calc_julian_from_U_or_W(int year, int week_of_year,
621632
int day_of_week, int week_starts_Mon):
622-
"""Calculate the Julian day based on the year, week of the year, and day of
633+
"""
634+
Calculate the Julian day based on the year, week of the year, and day of
623635
the week, with week_start_day representing whether the week of the year
624636
assumes the week starts on Sunday or Monday (6 or 0).
625637
@@ -660,8 +672,10 @@ cdef int _calc_julian_from_U_or_W(int year, int week_of_year,
660672
return 1 + days_to_week + day_of_week
661673

662674

663-
cdef object _calc_julian_from_V(int iso_year, int iso_week, int iso_weekday):
664-
"""Calculate the Julian day based on the ISO 8601 year, week, and weekday.
675+
cdef (int, int) _calc_julian_from_V(int iso_year, int iso_week, int iso_weekday):
676+
"""
677+
Calculate the Julian day based on the ISO 8601 year, week, and weekday.
678+
665679
ISO weeks start on Mondays, with week 01 being the week containing 4 Jan.
666680
ISO week days range from 1 (Monday) to 7 (Sunday).
667681
@@ -694,7 +708,7 @@ cdef object _calc_julian_from_V(int iso_year, int iso_week, int iso_weekday):
694708
return iso_year, ordinal
695709

696710

697-
cdef parse_timezone_directive(object z):
711+
cdef parse_timezone_directive(str z):
698712
"""
699713
Parse the '%z' directive and return a pytz.FixedOffset
700714

0 commit comments

Comments
 (0)