Skip to content

cache DateOffset attrs now that they are immutable #21582

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 22, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions asv_bench/benchmarks/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@ def setup(self):
def time_setitem_period_column(self):
self.df['col'] = self.rng

def time_set_index(self):
# GH#21582 limited by comparisons of Period objects
self.df['col2'] = self.rng
self.df.set_index('col2', append=True)


class Algorithms(object):

Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ Performance Improvements

- Improved performance of :func:`Series.describe` in case of numeric dtpyes (:issue:`21274`)
- Improved performance of :func:`pandas.core.groupby.GroupBy.rank` when dealing with tied rankings (:issue:`21237`)
- Improved performance of :func:`DataFrame.set_index` with columns consisting of :class:`Period` objects (:issue:`21582`)
-

.. _whatsnew_0240.docs:
Expand Down
3 changes: 3 additions & 0 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,9 @@ class _BaseOffset(object):
kwds = {key: odict[key] for key in odict if odict[key]}
state.update(kwds)

if '_cache' not in state:
state['_cache'] = {}

self.__dict__.update(state)

if 'weekmask' in state and 'holidays' in state:
Expand Down
28 changes: 12 additions & 16 deletions pandas/tseries/offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ def isAnchored(self):
# if there were a canonical docstring for what isAnchored means.
return (self.n == 1)

@cache_readonly
def _params(self):
all_paras = self.__dict__.copy()
if 'holidays' in all_paras and not all_paras['holidays']:
Expand Down Expand Up @@ -322,8 +323,6 @@ def name(self):
return self.rule_code

def __eq__(self, other):
if other is None:
return False

if isinstance(other, compat.string_types):
from pandas.tseries.frequencies import to_offset
Expand All @@ -333,13 +332,13 @@ def __eq__(self, other):
if not isinstance(other, DateOffset):
return False

return self._params() == other._params()
return self._params == other._params

def __ne__(self, other):
return not self == other

def __hash__(self):
return hash(self._params())
return hash(self._params)

def __add__(self, other):
if isinstance(other, (ABCDatetimeIndex, ABCSeries)):
Expand Down Expand Up @@ -397,7 +396,7 @@ def _prefix(self):
def rule_code(self):
return self._prefix

@property
@cache_readonly
def freqstr(self):
try:
code = self.rule_code
Expand Down Expand Up @@ -601,7 +600,7 @@ def next_bday(self):
else:
return BusinessDay(n=nb_offset)

# TODO: Cache this once offsets are immutable
@cache_readonly
def _get_daytime_flag(self):
if self.start == self.end:
raise ValueError('start and end must not be the same')
Expand Down Expand Up @@ -643,12 +642,12 @@ def _prev_opening_time(self, other):
return datetime(other.year, other.month, other.day,
self.start.hour, self.start.minute)

# TODO: cache this once offsets are immutable
@cache_readonly
def _get_business_hours_by_sec(self):
"""
Return business hours in a day by seconds.
"""
if self._get_daytime_flag():
if self._get_daytime_flag:
# create dummy datetime to calculate businesshours in a day
dtstart = datetime(2014, 4, 1, self.start.hour, self.start.minute)
until = datetime(2014, 4, 1, self.end.hour, self.end.minute)
Expand All @@ -662,7 +661,7 @@ def _get_business_hours_by_sec(self):
def rollback(self, dt):
"""Roll provided date backward to next offset only if not on offset"""
if not self.onOffset(dt):
businesshours = self._get_business_hours_by_sec()
businesshours = self._get_business_hours_by_sec
if self.n >= 0:
dt = self._prev_opening_time(
dt) + timedelta(seconds=businesshours)
Expand All @@ -683,9 +682,8 @@ def rollforward(self, dt):

@apply_wraps
def apply(self, other):
# calculate here because offset is not immutable
daytime = self._get_daytime_flag()
businesshours = self._get_business_hours_by_sec()
daytime = self._get_daytime_flag
businesshours = self._get_business_hours_by_sec
bhdelta = timedelta(seconds=businesshours)

if isinstance(other, datetime):
Expand Down Expand Up @@ -766,7 +764,7 @@ def onOffset(self, dt):
dt.minute, dt.second, dt.microsecond)
# Valid BH can be on the different BusinessDay during midnight
# Distinguish by the time spent from previous opening time
businesshours = self._get_business_hours_by_sec()
businesshours = self._get_business_hours_by_sec
return self._onOffset(dt, businesshours)

def _onOffset(self, dt, businesshours):
Expand Down Expand Up @@ -2203,13 +2201,12 @@ def __eq__(self, other):
if isinstance(other, Tick):
return self.delta == other.delta
else:
# TODO: Are there cases where this should raise TypeError?
return False

# This is identical to DateOffset.__hash__, but has to be redefined here
# for Python 3, because we've redefined __eq__.
def __hash__(self):
return hash(self._params())
return hash(self._params)

def __ne__(self, other):
if isinstance(other, compat.string_types):
Expand All @@ -2220,7 +2217,6 @@ def __ne__(self, other):
if isinstance(other, Tick):
return self.delta != other.delta
else:
# TODO: Are there cases where this should raise TypeError?
return True

@property
Expand Down