Skip to content

Commit 8cd3dd6

Browse files
committed
Merge pull request pandas-dev#7652 from jreback/dst_transitions
PERF: fix perf issue in tz conversions w/o affecting DST transitions
2 parents 31cac55 + fc88541 commit 8cd3dd6

File tree

2 files changed

+27
-10
lines changed

2 files changed

+27
-10
lines changed

pandas/tseries/frequencies.py

+21-5
Original file line numberDiff line numberDiff line change
@@ -599,7 +599,7 @@ def _period_group(freqstr):
599599
def _period_str_to_code(freqstr):
600600
# hack
601601
freqstr = _rule_aliases.get(freqstr, freqstr)
602-
602+
603603
if freqstr not in _dont_uppercase:
604604
freqstr = _rule_aliases.get(freqstr.lower(), freqstr)
605605

@@ -659,6 +659,25 @@ def infer_freq(index, warn=True):
659659
_ONE_HOUR = 60 * _ONE_MINUTE
660660
_ONE_DAY = 24 * _ONE_HOUR
661661

662+
def _tz_convert_with_transitions(values, to_tz, from_tz):
663+
"""
664+
convert i8 values from the specificed timezone to the to_tz zone, taking
665+
into account DST transitions
666+
"""
667+
668+
# vectorization is slow, so tests if we can do this via the faster tz_convert
669+
f = lambda x: tslib.tz_convert_single(x, to_tz, from_tz)
670+
671+
if len(values) > 2:
672+
first_slow, last_slow = f(values[0]),f(values[-1])
673+
674+
first_fast, last_fast = tslib.tz_convert(np.array([values[0],values[-1]],dtype='i8'),to_tz,from_tz)
675+
676+
# don't cross a DST, so ok
677+
if first_fast == first_slow and last_fast == last_slow:
678+
return tslib.tz_convert(values,to_tz,from_tz)
679+
680+
return np.vectorize(f)(values)
662681

663682
class _FrequencyInferer(object):
664683
"""
@@ -670,10 +689,7 @@ def __init__(self, index, warn=True):
670689
self.values = np.asarray(index).view('i8')
671690

672691
if index.tz is not None:
673-
f = lambda x: tslib.tz_convert_single(x, 'UTC', index.tz)
674-
self.values = np.vectorize(f)(self.values)
675-
# This cant work, because of DST
676-
# self.values = tslib.tz_convert(self.values, 'UTC', index.tz)
692+
self.values = _tz_convert_with_transitions(self.values,'UTC',index.tz)
677693

678694
self.warn = warn
679695

pandas/tseries/index.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from pandas.compat import u
1515
from pandas.tseries.frequencies import (
1616
infer_freq, to_offset, get_period_alias,
17-
Resolution, get_reso_string)
17+
Resolution, get_reso_string, _tz_convert_with_transitions)
1818
from pandas.core.base import DatetimeIndexOpsMixin
1919
from pandas.tseries.offsets import DateOffset, generate_range, Tick, CDay
2020
from pandas.tseries.tools import parse_time_string, normalize_date
@@ -1376,7 +1376,10 @@ def __getitem__(self, key):
13761376
else:
13771377
if com._is_bool_indexer(key):
13781378
key = np.asarray(key)
1379-
key = lib.maybe_booleans_to_slice(key.view(np.uint8))
1379+
if key.all():
1380+
key = slice(0,None,None)
1381+
else:
1382+
key = lib.maybe_booleans_to_slice(key.view(np.uint8))
13801383

13811384
new_offset = None
13821385
if isinstance(key, slice):
@@ -1588,9 +1591,7 @@ def insert(self, loc, item):
15881591
new_dates = np.concatenate((self[:loc].asi8, [item.view(np.int64)],
15891592
self[loc:].asi8))
15901593
if self.tz is not None:
1591-
f = lambda x: tslib.tz_convert_single(x, 'UTC', self.tz)
1592-
new_dates = np.vectorize(f)(new_dates)
1593-
# new_dates = tslib.tz_convert(new_dates, 'UTC', self.tz)
1594+
new_dates = _tz_convert_with_transitions(new_dates,'UTC',self.tz)
15941595
return DatetimeIndex(new_dates, name=self.name, freq=freq, tz=self.tz)
15951596

15961597
except (AttributeError, TypeError):

0 commit comments

Comments
 (0)