From 65c0bb6533b690ccf52e0e0f6dc6906622792739 Mon Sep 17 00:00:00 2001 From: jreback Date: Wed, 2 Jul 2014 12:44:28 -0400 Subject: [PATCH 1/2] PERF: allow dst transition computations to be handled much faster if the end-points are ok (GH7633) --- pandas/tseries/frequencies.py | 26 +++++++++++++++++++++----- pandas/tseries/index.py | 6 ++---- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 09ff6578160f8..441a5e8a99c78 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -599,7 +599,7 @@ def _period_group(freqstr): def _period_str_to_code(freqstr): # hack freqstr = _rule_aliases.get(freqstr, freqstr) - + if freqstr not in _dont_uppercase: freqstr = _rule_aliases.get(freqstr.lower(), freqstr) @@ -659,6 +659,25 @@ def infer_freq(index, warn=True): _ONE_HOUR = 60 * _ONE_MINUTE _ONE_DAY = 24 * _ONE_HOUR +def _tz_convert_with_transitions(values, to_tz, from_tz): + """ + convert i8 values from the specificed timezone to the to_tz zone, taking + into account DST transitions + """ + + # vectorization is slow, so tests if we can do this via the faster tz_convert + f = lambda x: tslib.tz_convert_single(x, to_tz, from_tz) + + if len(values) > 2: + first_slow, last_slow = f(values[0]),f(values[-1]) + + first_fast, last_fast = tslib.tz_convert(np.array([values[0],values[-1]],dtype='i8'),to_tz,from_tz) + + # don't cross a DST, so ok + if first_fast == first_slow and last_fast == last_slow: + return tslib.tz_convert(values,to_tz,from_tz) + + return np.vectorize(f)(values) class _FrequencyInferer(object): """ @@ -670,10 +689,7 @@ def __init__(self, index, warn=True): self.values = np.asarray(index).view('i8') if index.tz is not None: - f = lambda x: tslib.tz_convert_single(x, 'UTC', index.tz) - self.values = np.vectorize(f)(self.values) - # This cant work, because of DST - # self.values = tslib.tz_convert(self.values, 'UTC', index.tz) + self.values = _tz_convert_with_transitions(self.values,'UTC',index.tz) self.warn = warn diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 9473b10876600..a6b7dc01ca666 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -14,7 +14,7 @@ from pandas.compat import u from pandas.tseries.frequencies import ( infer_freq, to_offset, get_period_alias, - Resolution, get_reso_string) + Resolution, get_reso_string, _tz_convert_with_transitions) from pandas.core.base import DatetimeIndexOpsMixin from pandas.tseries.offsets import DateOffset, generate_range, Tick, CDay from pandas.tseries.tools import parse_time_string, normalize_date @@ -1588,9 +1588,7 @@ def insert(self, loc, item): new_dates = np.concatenate((self[:loc].asi8, [item.view(np.int64)], self[loc:].asi8)) if self.tz is not None: - f = lambda x: tslib.tz_convert_single(x, 'UTC', self.tz) - new_dates = np.vectorize(f)(new_dates) - # new_dates = tslib.tz_convert(new_dates, 'UTC', self.tz) + new_dates = _tz_convert_with_transitions(new_dates,'UTC',self.tz) return DatetimeIndex(new_dates, name=self.name, freq=freq, tz=self.tz) except (AttributeError, TypeError): From fc88541654283fa5f9438306b027c325e5614d81 Mon Sep 17 00:00:00 2001 From: jreback Date: Wed, 2 Jul 2014 13:15:05 -0400 Subject: [PATCH 2/2] PERF: allow slice indexers to be computed faster --- pandas/tseries/index.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index a6b7dc01ca666..d022911fe2909 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -1376,7 +1376,10 @@ def __getitem__(self, key): else: if com._is_bool_indexer(key): key = np.asarray(key) - key = lib.maybe_booleans_to_slice(key.view(np.uint8)) + if key.all(): + key = slice(0,None,None) + else: + key = lib.maybe_booleans_to_slice(key.view(np.uint8)) new_offset = None if isinstance(key, slice):