Skip to content

PERF: fix perf issue in tz conversions w/o affecting DST transitions #7652

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 2, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 21 additions & 5 deletions pandas/tseries/frequencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -599,7 +599,7 @@ def _period_group(freqstr):
def _period_str_to_code(freqstr):
# hack
freqstr = _rule_aliases.get(freqstr, freqstr)

if freqstr not in _dont_uppercase:
freqstr = _rule_aliases.get(freqstr.lower(), freqstr)

Expand Down Expand Up @@ -659,6 +659,25 @@ def infer_freq(index, warn=True):
_ONE_HOUR = 60 * _ONE_MINUTE
_ONE_DAY = 24 * _ONE_HOUR

def _tz_convert_with_transitions(values, to_tz, from_tz):
"""
convert i8 values from the specificed timezone to the to_tz zone, taking
into account DST transitions
"""

# vectorization is slow, so tests if we can do this via the faster tz_convert
f = lambda x: tslib.tz_convert_single(x, to_tz, from_tz)

if len(values) > 2:
first_slow, last_slow = f(values[0]),f(values[-1])

first_fast, last_fast = tslib.tz_convert(np.array([values[0],values[-1]],dtype='i8'),to_tz,from_tz)

# don't cross a DST, so ok
if first_fast == first_slow and last_fast == last_slow:
return tslib.tz_convert(values,to_tz,from_tz)

return np.vectorize(f)(values)

class _FrequencyInferer(object):
"""
Expand All @@ -670,10 +689,7 @@ def __init__(self, index, warn=True):
self.values = np.asarray(index).view('i8')

if index.tz is not None:
f = lambda x: tslib.tz_convert_single(x, 'UTC', index.tz)
self.values = np.vectorize(f)(self.values)
# This cant work, because of DST
# self.values = tslib.tz_convert(self.values, 'UTC', index.tz)
self.values = _tz_convert_with_transitions(self.values,'UTC',index.tz)

self.warn = warn

Expand Down
11 changes: 6 additions & 5 deletions pandas/tseries/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from pandas.compat import u
from pandas.tseries.frequencies import (
infer_freq, to_offset, get_period_alias,
Resolution, get_reso_string)
Resolution, get_reso_string, _tz_convert_with_transitions)
from pandas.core.base import DatetimeIndexOpsMixin
from pandas.tseries.offsets import DateOffset, generate_range, Tick, CDay
from pandas.tseries.tools import parse_time_string, normalize_date
Expand Down Expand Up @@ -1376,7 +1376,10 @@ def __getitem__(self, key):
else:
if com._is_bool_indexer(key):
key = np.asarray(key)
key = lib.maybe_booleans_to_slice(key.view(np.uint8))
if key.all():
key = slice(0,None,None)
else:
key = lib.maybe_booleans_to_slice(key.view(np.uint8))

new_offset = None
if isinstance(key, slice):
Expand Down Expand Up @@ -1588,9 +1591,7 @@ def insert(self, loc, item):
new_dates = np.concatenate((self[:loc].asi8, [item.view(np.int64)],
self[loc:].asi8))
if self.tz is not None:
f = lambda x: tslib.tz_convert_single(x, 'UTC', self.tz)
new_dates = np.vectorize(f)(new_dates)
# new_dates = tslib.tz_convert(new_dates, 'UTC', self.tz)
new_dates = _tz_convert_with_transitions(new_dates,'UTC',self.tz)
return DatetimeIndex(new_dates, name=self.name, freq=freq, tz=self.tz)

except (AttributeError, TypeError):
Expand Down