Skip to content

PERF: lazify pytz seqToRE call, trims 35ms from import #28228

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 30, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 29 additions & 15 deletions pandas/_libs/tslibs/strptime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,8 @@ def array_strptime(object[:] values, object fmt,
return result, result_timezone.base


"""_getlang, LocaleTime, TimeRE, _calc_julian_from_U_or_W are vendored
"""
_getlang, LocaleTime, TimeRE, _calc_julian_from_U_or_W are vendored
from the standard library, see
https://github.com/python/cpython/blob/master/Lib/_strptime.py
The original module-level docstring follows.
Expand All @@ -363,7 +364,8 @@ def _getlang():


class LocaleTime:
"""Stores and handles locale-specific information related to time.
"""
Stores and handles locale-specific information related to time.

ATTRIBUTES:
f_weekday -- full weekday names (7-item list)
Expand All @@ -382,7 +384,8 @@ class LocaleTime:
"""

def __init__(self):
"""Set all attributes.
"""
Set all attributes.

Order of methods called matters for dependency reasons.

Expand All @@ -399,7 +402,6 @@ class LocaleTime:
Only other possible issue is if someone changed the timezone and did
not call tz.tzset . That is an issue for the programmer, though,
since changing the timezone is worthless without that call.

"""
self.lang = _getlang()
self.__calc_weekday()
Expand Down Expand Up @@ -518,15 +520,16 @@ class TimeRE(dict):
"""

def __init__(self, locale_time=None):
"""Create keys/values.
"""
Create keys/values.

Order of execution is important for dependency reasons.

"""
if locale_time:
self.locale_time = locale_time
else:
self.locale_time = LocaleTime()
self._Z = None
base = super()
base.__init__({
# The " \d" part of the regex is to make %c from ANSI C work
Expand Down Expand Up @@ -555,21 +558,29 @@ class TimeRE(dict):
'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'),
'b': self.__seqToRE(self.locale_time.a_month[1:], 'b'),
'p': self.__seqToRE(self.locale_time.am_pm, 'p'),
'Z': self.__seqToRE(pytz.all_timezones, 'Z'),
# 'Z' key is generated lazily via __getitem__
'%': '%'})
base.__setitem__('W', base.__getitem__('U').replace('U', 'W'))
base.__setitem__('c', self.pattern(self.locale_time.LC_date_time))
base.__setitem__('x', self.pattern(self.locale_time.LC_date))
base.__setitem__('X', self.pattern(self.locale_time.LC_time))

def __getitem__(self, key):
if key == "Z":
# lazy computation
if self._Z is None:
self._Z = self.__seqToRE(pytz.all_timezones, 'Z')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

slightly OT this should be a module level function?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the seqRE i mean

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is based on a class vendored from the stdlib, so probably not

return self._Z
return super().__getitem__(key)

def __seqToRE(self, to_convert, directive):
"""Convert a list to a regex string for matching a directive.
"""
Convert a list to a regex string for matching a directive.

Want possible matching values to be from longest to shortest. This
prevents the possibility of a match occurring for a value that also
a substring of a larger value that should have matched (e.g., 'abc'
matching when 'abcdef' should have been the match).

"""
to_convert = sorted(to_convert, key=len, reverse=True)
for value in to_convert:
Expand All @@ -582,11 +593,11 @@ class TimeRE(dict):
return '%s)' % regex

def pattern(self, format):
"""Return regex pattern for the format string.
"""
Return regex pattern for the format string.

Need to make sure that any characters that might be interpreted as
regex syntax are escaped.

"""
processed_format = ''
# The sub() call escapes all characters that might be misconstrued
Expand Down Expand Up @@ -619,7 +630,8 @@ _regex_cache = {}

cdef int _calc_julian_from_U_or_W(int year, int week_of_year,
int day_of_week, int week_starts_Mon):
"""Calculate the Julian day based on the year, week of the year, and day of
"""
Calculate the Julian day based on the year, week of the year, and day of
the week, with week_start_day representing whether the week of the year
assumes the week starts on Sunday or Monday (6 or 0).

Expand Down Expand Up @@ -660,8 +672,10 @@ cdef int _calc_julian_from_U_or_W(int year, int week_of_year,
return 1 + days_to_week + day_of_week


cdef object _calc_julian_from_V(int iso_year, int iso_week, int iso_weekday):
"""Calculate the Julian day based on the ISO 8601 year, week, and weekday.
cdef (int, int) _calc_julian_from_V(int iso_year, int iso_week, int iso_weekday):
"""
Calculate the Julian day based on the ISO 8601 year, week, and weekday.

ISO weeks start on Mondays, with week 01 being the week containing 4 Jan.
ISO week days range from 1 (Monday) to 7 (Sunday).

Expand Down Expand Up @@ -694,7 +708,7 @@ cdef object _calc_julian_from_V(int iso_year, int iso_week, int iso_weekday):
return iso_year, ordinal


cdef parse_timezone_directive(object z):
cdef parse_timezone_directive(str z):
"""
Parse the '%z' directive and return a pytz.FixedOffset

Expand Down