Skip to content

Commit 65c0bb6

Browse files
committed
PERF: allow dst transition computations to be handled much faster
if the end-points are ok (GH7633)
1 parent 160419e commit 65c0bb6

File tree

2 files changed

+23
-9
lines changed

2 files changed

+23
-9
lines changed

pandas/tseries/frequencies.py

+21-5
Original file line numberDiff line numberDiff line change
@@ -599,7 +599,7 @@ def _period_group(freqstr):
599599
def _period_str_to_code(freqstr):
600600
# hack
601601
freqstr = _rule_aliases.get(freqstr, freqstr)
602-
602+
603603
if freqstr not in _dont_uppercase:
604604
freqstr = _rule_aliases.get(freqstr.lower(), freqstr)
605605

@@ -659,6 +659,25 @@ def infer_freq(index, warn=True):
659659
_ONE_HOUR = 60 * _ONE_MINUTE
660660
_ONE_DAY = 24 * _ONE_HOUR
661661

662+
def _tz_convert_with_transitions(values, to_tz, from_tz):
663+
"""
664+
convert i8 values from the specificed timezone to the to_tz zone, taking
665+
into account DST transitions
666+
"""
667+
668+
# vectorization is slow, so tests if we can do this via the faster tz_convert
669+
f = lambda x: tslib.tz_convert_single(x, to_tz, from_tz)
670+
671+
if len(values) > 2:
672+
first_slow, last_slow = f(values[0]),f(values[-1])
673+
674+
first_fast, last_fast = tslib.tz_convert(np.array([values[0],values[-1]],dtype='i8'),to_tz,from_tz)
675+
676+
# don't cross a DST, so ok
677+
if first_fast == first_slow and last_fast == last_slow:
678+
return tslib.tz_convert(values,to_tz,from_tz)
679+
680+
return np.vectorize(f)(values)
662681

663682
class _FrequencyInferer(object):
664683
"""
@@ -670,10 +689,7 @@ def __init__(self, index, warn=True):
670689
self.values = np.asarray(index).view('i8')
671690

672691
if index.tz is not None:
673-
f = lambda x: tslib.tz_convert_single(x, 'UTC', index.tz)
674-
self.values = np.vectorize(f)(self.values)
675-
# This cant work, because of DST
676-
# self.values = tslib.tz_convert(self.values, 'UTC', index.tz)
692+
self.values = _tz_convert_with_transitions(self.values,'UTC',index.tz)
677693

678694
self.warn = warn
679695

pandas/tseries/index.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from pandas.compat import u
1515
from pandas.tseries.frequencies import (
1616
infer_freq, to_offset, get_period_alias,
17-
Resolution, get_reso_string)
17+
Resolution, get_reso_string, _tz_convert_with_transitions)
1818
from pandas.core.base import DatetimeIndexOpsMixin
1919
from pandas.tseries.offsets import DateOffset, generate_range, Tick, CDay
2020
from pandas.tseries.tools import parse_time_string, normalize_date
@@ -1588,9 +1588,7 @@ def insert(self, loc, item):
15881588
new_dates = np.concatenate((self[:loc].asi8, [item.view(np.int64)],
15891589
self[loc:].asi8))
15901590
if self.tz is not None:
1591-
f = lambda x: tslib.tz_convert_single(x, 'UTC', self.tz)
1592-
new_dates = np.vectorize(f)(new_dates)
1593-
# new_dates = tslib.tz_convert(new_dates, 'UTC', self.tz)
1591+
new_dates = _tz_convert_with_transitions(new_dates,'UTC',self.tz)
15941592
return DatetimeIndex(new_dates, name=self.name, freq=freq, tz=self.tz)
15951593

15961594
except (AttributeError, TypeError):

0 commit comments

Comments
 (0)