Skip to content

Commit 607e414

Browse files
jbrockmendelvictor
authored and
victor
committed
cache DateOffset attrs now that they are immutable (pandas-dev#21582)
1 parent 89caed9 commit 607e414

File tree

4 files changed

+21
-16
lines changed

4 files changed

+21
-16
lines changed

asv_bench/benchmarks/period.py

+5
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,11 @@ def setup(self):
6464
def time_setitem_period_column(self):
6565
self.df['col'] = self.rng
6666

67+
def time_set_index(self):
68+
# GH#21582 limited by comparisons of Period objects
69+
self.df['col2'] = self.rng
70+
self.df.set_index('col2', append=True)
71+
6772

6873
class Algorithms(object):
6974

doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ Performance Improvements
130130

131131
- Improved performance of :func:`Series.describe` in case of numeric dtpyes (:issue:`21274`)
132132
- Improved performance of :func:`pandas.core.groupby.GroupBy.rank` when dealing with tied rankings (:issue:`21237`)
133+
- Improved performance of :func:`DataFrame.set_index` with columns consisting of :class:`Period` objects (:issue:`21582`)
133134
-
134135

135136
.. _whatsnew_0240.docs:

pandas/_libs/tslibs/offsets.pyx

+3
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,9 @@ class _BaseOffset(object):
404404
kwds = {key: odict[key] for key in odict if odict[key]}
405405
state.update(kwds)
406406

407+
if '_cache' not in state:
408+
state['_cache'] = {}
409+
407410
self.__dict__.update(state)
408411

409412
if 'weekmask' in state and 'holidays' in state:

pandas/tseries/offsets.py

+12-16
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,7 @@ def isAnchored(self):
288288
# if there were a canonical docstring for what isAnchored means.
289289
return (self.n == 1)
290290

291+
@cache_readonly
291292
def _params(self):
292293
all_paras = self.__dict__.copy()
293294
if 'holidays' in all_paras and not all_paras['holidays']:
@@ -322,8 +323,6 @@ def name(self):
322323
return self.rule_code
323324

324325
def __eq__(self, other):
325-
if other is None:
326-
return False
327326

328327
if isinstance(other, compat.string_types):
329328
from pandas.tseries.frequencies import to_offset
@@ -333,13 +332,13 @@ def __eq__(self, other):
333332
if not isinstance(other, DateOffset):
334333
return False
335334

336-
return self._params() == other._params()
335+
return self._params == other._params
337336

338337
def __ne__(self, other):
339338
return not self == other
340339

341340
def __hash__(self):
342-
return hash(self._params())
341+
return hash(self._params)
343342

344343
def __add__(self, other):
345344
if isinstance(other, (ABCDatetimeIndex, ABCSeries)):
@@ -397,7 +396,7 @@ def _prefix(self):
397396
def rule_code(self):
398397
return self._prefix
399398

400-
@property
399+
@cache_readonly
401400
def freqstr(self):
402401
try:
403402
code = self.rule_code
@@ -601,7 +600,7 @@ def next_bday(self):
601600
else:
602601
return BusinessDay(n=nb_offset)
603602

604-
# TODO: Cache this once offsets are immutable
603+
@cache_readonly
605604
def _get_daytime_flag(self):
606605
if self.start == self.end:
607606
raise ValueError('start and end must not be the same')
@@ -643,12 +642,12 @@ def _prev_opening_time(self, other):
643642
return datetime(other.year, other.month, other.day,
644643
self.start.hour, self.start.minute)
645644

646-
# TODO: cache this once offsets are immutable
645+
@cache_readonly
647646
def _get_business_hours_by_sec(self):
648647
"""
649648
Return business hours in a day by seconds.
650649
"""
651-
if self._get_daytime_flag():
650+
if self._get_daytime_flag:
652651
# create dummy datetime to calculate businesshours in a day
653652
dtstart = datetime(2014, 4, 1, self.start.hour, self.start.minute)
654653
until = datetime(2014, 4, 1, self.end.hour, self.end.minute)
@@ -662,7 +661,7 @@ def _get_business_hours_by_sec(self):
662661
def rollback(self, dt):
663662
"""Roll provided date backward to next offset only if not on offset"""
664663
if not self.onOffset(dt):
665-
businesshours = self._get_business_hours_by_sec()
664+
businesshours = self._get_business_hours_by_sec
666665
if self.n >= 0:
667666
dt = self._prev_opening_time(
668667
dt) + timedelta(seconds=businesshours)
@@ -683,9 +682,8 @@ def rollforward(self, dt):
683682

684683
@apply_wraps
685684
def apply(self, other):
686-
# calculate here because offset is not immutable
687-
daytime = self._get_daytime_flag()
688-
businesshours = self._get_business_hours_by_sec()
685+
daytime = self._get_daytime_flag
686+
businesshours = self._get_business_hours_by_sec
689687
bhdelta = timedelta(seconds=businesshours)
690688

691689
if isinstance(other, datetime):
@@ -766,7 +764,7 @@ def onOffset(self, dt):
766764
dt.minute, dt.second, dt.microsecond)
767765
# Valid BH can be on the different BusinessDay during midnight
768766
# Distinguish by the time spent from previous opening time
769-
businesshours = self._get_business_hours_by_sec()
767+
businesshours = self._get_business_hours_by_sec
770768
return self._onOffset(dt, businesshours)
771769

772770
def _onOffset(self, dt, businesshours):
@@ -2203,13 +2201,12 @@ def __eq__(self, other):
22032201
if isinstance(other, Tick):
22042202
return self.delta == other.delta
22052203
else:
2206-
# TODO: Are there cases where this should raise TypeError?
22072204
return False
22082205

22092206
# This is identical to DateOffset.__hash__, but has to be redefined here
22102207
# for Python 3, because we've redefined __eq__.
22112208
def __hash__(self):
2212-
return hash(self._params())
2209+
return hash(self._params)
22132210

22142211
def __ne__(self, other):
22152212
if isinstance(other, compat.string_types):
@@ -2220,7 +2217,6 @@ def __ne__(self, other):
22202217
if isinstance(other, Tick):
22212218
return self.delta != other.delta
22222219
else:
2223-
# TODO: Are there cases where this should raise TypeError?
22242220
return True
22252221

22262222
@property

0 commit comments

Comments
 (0)