From f697065c4206db4569d829ddb4f2b2fe869adce1 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 21 Jun 2018 14:45:24 -0700 Subject: [PATCH 1/3] cache DateOffset attrs now that they are immutable --- pandas/tseries/offsets.py | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index da8fdb4d79e34..a3f82c1a0902e 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -288,6 +288,7 @@ def isAnchored(self): # if there were a canonical docstring for what isAnchored means. return (self.n == 1) + @cache_readonly def _params(self): all_paras = self.__dict__.copy() if 'holidays' in all_paras and not all_paras['holidays']: @@ -322,8 +323,6 @@ def name(self): return self.rule_code def __eq__(self, other): - if other is None: - return False if isinstance(other, compat.string_types): from pandas.tseries.frequencies import to_offset @@ -333,13 +332,13 @@ def __eq__(self, other): if not isinstance(other, DateOffset): return False - return self._params() == other._params() + return self._params == other._params def __ne__(self, other): return not self == other def __hash__(self): - return hash(self._params()) + return hash(self._params) def __add__(self, other): if isinstance(other, (ABCDatetimeIndex, ABCSeries)): @@ -397,7 +396,7 @@ def _prefix(self): def rule_code(self): return self._prefix - @property + @cache_readonly def freqstr(self): try: code = self.rule_code @@ -601,7 +600,7 @@ def next_bday(self): else: return BusinessDay(n=nb_offset) - # TODO: Cache this once offsets are immutable + @cache_readonly def _get_daytime_flag(self): if self.start == self.end: raise ValueError('start and end must not be the same') @@ -643,12 +642,12 @@ def _prev_opening_time(self, other): return datetime(other.year, other.month, other.day, self.start.hour, self.start.minute) - # TODO: cache this once offsets are immutable + @cache_readonly def _get_business_hours_by_sec(self): """ Return business hours in a day by seconds. """ - if self._get_daytime_flag(): + if self._get_daytime_flag: # create dummy datetime to calculate businesshours in a day dtstart = datetime(2014, 4, 1, self.start.hour, self.start.minute) until = datetime(2014, 4, 1, self.end.hour, self.end.minute) @@ -662,7 +661,7 @@ def _get_business_hours_by_sec(self): def rollback(self, dt): """Roll provided date backward to next offset only if not on offset""" if not self.onOffset(dt): - businesshours = self._get_business_hours_by_sec() + businesshours = self._get_business_hours_by_sec if self.n >= 0: dt = self._prev_opening_time( dt) + timedelta(seconds=businesshours) @@ -683,9 +682,8 @@ def rollforward(self, dt): @apply_wraps def apply(self, other): - # calculate here because offset is not immutable - daytime = self._get_daytime_flag() - businesshours = self._get_business_hours_by_sec() + daytime = self._get_daytime_flag + businesshours = self._get_business_hours_by_sec bhdelta = timedelta(seconds=businesshours) if isinstance(other, datetime): @@ -766,7 +764,7 @@ def onOffset(self, dt): dt.minute, dt.second, dt.microsecond) # Valid BH can be on the different BusinessDay during midnight # Distinguish by the time spent from previous opening time - businesshours = self._get_business_hours_by_sec() + businesshours = self._get_business_hours_by_sec return self._onOffset(dt, businesshours) def _onOffset(self, dt, businesshours): @@ -2203,13 +2201,12 @@ def __eq__(self, other): if isinstance(other, Tick): return self.delta == other.delta else: - # TODO: Are there cases where this should raise TypeError? return False # This is identical to DateOffset.__hash__, but has to be redefined here # for Python 3, because we've redefined __eq__. def __hash__(self): - return hash(self._params()) + return hash(self._params) def __ne__(self, other): if isinstance(other, compat.string_types): @@ -2220,7 +2217,6 @@ def __ne__(self, other): if isinstance(other, Tick): return self.delta != other.delta else: - # TODO: Are there cases where this should raise TypeError? return True @property From 3aba6088bdf7ce5e072ce13919a6c3622b241196 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 21 Jun 2018 18:04:55 -0700 Subject: [PATCH 2/3] fix pickle errors --- pandas/_libs/tslibs/offsets.pyx | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index a9ef9166e4d33..63add06db17b4 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -404,6 +404,9 @@ class _BaseOffset(object): kwds = {key: odict[key] for key in odict if odict[key]} state.update(kwds) + if '_cache' not in state: + state['_cache'] = {} + self.__dict__.update(state) if 'weekmask' in state and 'holidays' in state: From 4d8411db73ea3cac79037ee82f9e67c1dc5bc425 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 22 Jun 2018 11:10:39 -0700 Subject: [PATCH 3/3] asv, whatsnew note --- asv_bench/benchmarks/period.py | 5 +++++ doc/source/whatsnew/v0.24.0.txt | 1 + 2 files changed, 6 insertions(+) diff --git a/asv_bench/benchmarks/period.py b/asv_bench/benchmarks/period.py index 897a3338c164c..c34f9a737473e 100644 --- a/asv_bench/benchmarks/period.py +++ b/asv_bench/benchmarks/period.py @@ -64,6 +64,11 @@ def setup(self): def time_setitem_period_column(self): self.df['col'] = self.rng + def time_set_index(self): + # GH#21582 limited by comparisons of Period objects + self.df['col2'] = self.rng + self.df.set_index('col2', append=True) + class Algorithms(object): diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 4bfae7de01b8f..5f05bbdfdb948 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -130,6 +130,7 @@ Performance Improvements - Improved performance of :func:`Series.describe` in case of numeric dtpyes (:issue:`21274`) - Improved performance of :func:`pandas.core.groupby.GroupBy.rank` when dealing with tied rankings (:issue:`21237`) +- Improved performance of :func:`DataFrame.set_index` with columns consisting of :class:`Period` objects (:issue:`21582`) - .. _whatsnew_0240.docs: