Skip to content

Commit 7fbc600

Browse files
rs2jreback
authored andcommitted
PERF: Fix performance issues when creating multiple instances of Period
closes pandas-dev#12903 closes pandas-dev#11831 Author: rs2 <[email protected]> Closes pandas-dev#12909 from rs2/master and squashes the following commits: 0d9712d [rs2] Make RESO constants global in period.pyx and reduce the number of loops in asv_benchmarks/period.py 1c5a2ab [rs2] Added asv benchmark for Period, PeriodIndex 8bcfd57 [rs2] Reworded whatsnew 8f254e3 [rs2] Added a whatsnew entry + ensured constants are imported correctly by test_tslib.py 5b3e291 [rs2] Moved constants to frequencies.py fec1b51 [rs2] Fix performance issues when creating multiple instances of Period (pandas-dev#12903, pandas-dev#11831)
1 parent db6d009 commit 7fbc600

File tree

5 files changed

+56
-53
lines changed

5 files changed

+56
-53
lines changed

asv_bench/benchmarks/period.py

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from pandas import PeriodIndex, date_range
2+
3+
4+
class create_period_index_from_date_range(object):
5+
goal_time = 0.2
6+
7+
def time_period_index(self):
8+
# Simulate irregular PeriodIndex
9+
PeriodIndex(date_range('1985', periods=1000).to_pydatetime(), freq='D')

doc/source/whatsnew/v0.18.1.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,7 @@ Performance Improvements
409409

410410

411411
- Improved performance of ``DataFrame.to_sql`` when checking case sensitivity for tables. Now only checks if table has been created correctly when table name is not lower case. (:issue:`12876`)
412-
412+
- Improved performance of ``Period`` construction and plotting of ``Period``s. (:issue:`12903`, :issue:`11831`)
413413

414414

415415

pandas/src/period.pyx

+23-39
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,20 @@ from tslib cimport (
3636
_nat_scalar_rules,
3737
)
3838

39+
from pandas.tseries import frequencies
40+
3941
from sys import version_info
4042

4143
cdef bint PY2 = version_info[0] == 2
4244

4345
cdef int64_t NPY_NAT = util.get_nat()
4446

47+
cdef int US_RESO = frequencies.US_RESO
48+
cdef int MS_RESO = frequencies.MS_RESO
49+
cdef int S_RESO = frequencies.S_RESO
50+
cdef int T_RESO = frequencies.T_RESO
51+
cdef int H_RESO = frequencies.H_RESO
52+
cdef int D_RESO = frequencies.D_RESO
4553

4654
cdef extern from "period_helper.h":
4755
ctypedef struct date_info:
@@ -476,12 +484,6 @@ cpdef resolution(ndarray[int64_t] stamps, tz=None):
476484
reso = curr_reso
477485
return reso
478486

479-
US_RESO = 0
480-
MS_RESO = 1
481-
S_RESO = 2
482-
T_RESO = 3
483-
H_RESO = 4
484-
D_RESO = 5
485487

486488
cdef inline int _reso_stamp(pandas_datetimestruct *dts):
487489
if dts.us != 0:
@@ -662,17 +664,13 @@ cdef class Period(object):
662664
def _maybe_convert_freq(cls, object freq):
663665

664666
if isinstance(freq, compat.string_types):
665-
from pandas.tseries.frequencies import _period_alias_dict
666667
freq = freq.upper()
667-
freq = _period_alias_dict.get(freq, freq)
668+
freq = frequencies._period_alias_dict.get(freq, freq)
668669
elif isinstance(freq, (int, tuple)):
669-
from pandas.tseries.frequencies import get_freq_code as _gfc
670-
from pandas.tseries.frequencies import _get_freq_str
671-
code, stride = _gfc(freq)
672-
freq = _get_freq_str(code, stride)
670+
code, stride = frequencies.get_freq_code(freq)
671+
freq = frequencies._get_freq_str(code, stride)
673672

674-
from pandas.tseries.frequencies import to_offset
675-
freq = to_offset(freq)
673+
freq = frequencies.to_offset(freq)
676674

677675
if freq.n <= 0:
678676
raise ValueError('Frequency must be positive, because it'
@@ -691,9 +689,6 @@ cdef class Period(object):
691689
def __init__(self, value=None, freq=None, ordinal=None,
692690
year=None, month=1, quarter=None, day=1,
693691
hour=0, minute=0, second=0):
694-
from pandas.tseries import frequencies
695-
from pandas.tseries.frequencies import get_freq_code as _gfc
696-
697692
# freq points to a tuple (base, mult); base is one of the defined
698693
# periods such as A, Q, etc. Every five minutes would be, e.g.,
699694
# ('T', 5) but may be passed in as a string like '5T'
@@ -717,7 +712,7 @@ cdef class Period(object):
717712

718713
elif isinstance(value, Period):
719714
other = value
720-
if freq is None or _gfc(freq) == _gfc(other.freq):
715+
if freq is None or frequencies.get_freq_code(freq) == frequencies.get_freq_code(other.freq):
721716
ordinal = other.ordinal
722717
freq = other.freq
723718
else:
@@ -758,7 +753,7 @@ cdef class Period(object):
758753
msg = "Value must be Period, string, integer, or datetime"
759754
raise ValueError(msg)
760755

761-
base, mult = _gfc(freq)
756+
base, mult = frequencies.get_freq_code(freq)
762757

763758
if ordinal is None:
764759
self.ordinal = get_period_ordinal(dt.year, dt.month, dt.day,
@@ -771,7 +766,6 @@ cdef class Period(object):
771766

772767
def __richcmp__(self, other, op):
773768
if isinstance(other, Period):
774-
from pandas.tseries.frequencies import get_freq_code as _gfc
775769
if other.freq != self.freq:
776770
msg = _DIFFERENT_FREQ.format(self.freqstr, other.freqstr)
777771
raise IncompatibleFrequency(msg)
@@ -790,7 +784,6 @@ cdef class Period(object):
790784
return hash((self.ordinal, self.freq))
791785

792786
def _add_delta(self, other):
793-
from pandas.tseries import frequencies
794787
if isinstance(other, (timedelta, np.timedelta64, offsets.Tick, Timedelta)):
795788
offset = frequencies.to_offset(self.freq.rule_code)
796789
if isinstance(offset, offsets.Tick):
@@ -868,10 +861,9 @@ cdef class Period(object):
868861
-------
869862
resampled : Period
870863
"""
871-
from pandas.tseries.frequencies import get_freq_code as _gfc
872864
how = _validate_end_alias(how)
873-
base1, mult1 = _gfc(self.freq)
874-
base2, mult2 = _gfc(freq)
865+
base1, mult1 = frequencies.get_freq_code(self.freq)
866+
base2, mult2 = frequencies.get_freq_code(freq)
875867

876868
if self.ordinal == tslib.iNaT:
877869
ordinal = self.ordinal
@@ -918,23 +910,20 @@ cdef class Period(object):
918910
-------
919911
Timestamp
920912
"""
921-
from pandas.tseries import frequencies
922-
from pandas.tseries.frequencies import get_freq_code as _gfc
923913
how = _validate_end_alias(how)
924914

925915
if freq is None:
926-
base, mult = _gfc(self.freq)
916+
base, mult = frequencies.get_freq_code(self.freq)
927917
freq = frequencies.get_to_timestamp_base(base)
928918

929-
base, mult = _gfc(freq)
919+
base, mult = frequencies.get_freq_code(freq)
930920
val = self.asfreq(freq, how)
931921

932922
dt64 = period_ordinal_to_dt64(val.ordinal, base)
933923
return Timestamp(dt64, tz=tz)
934924

935925
cdef _field(self, alias):
936-
from pandas.tseries.frequencies import get_freq_code as _gfc
937-
base, mult = _gfc(self.freq)
926+
base, mult = frequencies.get_freq_code(self.freq)
938927
return get_period_field(alias, self.ordinal, base)
939928

940929
property year:
@@ -996,8 +985,7 @@ cdef class Period(object):
996985
return self.freq.freqstr
997986

998987
def __repr__(self):
999-
from pandas.tseries.frequencies import get_freq_code as _gfc
1000-
base, mult = _gfc(self.freq)
988+
base, mult = frequencies.get_freq_code(self.freq)
1001989
formatted = period_format(self.ordinal, base)
1002990
return "Period('%s', '%s')" % (formatted, self.freqstr)
1003991

@@ -1008,8 +996,7 @@ cdef class Period(object):
1008996
Invoked by unicode(df) in py2 only. Yields a Unicode String in both
1009997
py2/py3.
1010998
"""
1011-
from pandas.tseries.frequencies import get_freq_code as _gfc
1012-
base, mult = _gfc(self.freq)
999+
base, mult = frequencies.get_freq_code(self.freq)
10131000
formatted = period_format(self.ordinal, base)
10141001
value = ("%s" % formatted)
10151002
return value
@@ -1159,15 +1146,13 @@ cdef class Period(object):
11591146
>>> a.strftime('%b. %d, %Y was a %A')
11601147
'Jan. 01, 2001 was a Monday'
11611148
"""
1162-
from pandas.tseries.frequencies import get_freq_code as _gfc
1163-
base, mult = _gfc(self.freq)
1149+
base, mult = frequencies.get_freq_code(self.freq)
11641150
return period_format(self.ordinal, base, fmt)
11651151

11661152

11671153
def _ordinal_from_fields(year, month, quarter, day, hour, minute,
11681154
second, freq):
1169-
from pandas.tseries.frequencies import get_freq_code as _gfc
1170-
base, mult = _gfc(freq)
1155+
base, mult = frequencies.get_freq_code(freq)
11711156
if quarter is not None:
11721157
year, month = _quarter_to_myear(year, quarter, freq)
11731158

@@ -1179,7 +1164,6 @@ def _quarter_to_myear(year, quarter, freq):
11791164
if quarter <= 0 or quarter > 4:
11801165
raise ValueError('Quarter must be 1 <= q <= 4')
11811166

1182-
from pandas.tseries import frequencies
11831167
mnum = frequencies._month_numbers[frequencies._get_rule_month(freq)] + 1
11841168
month = (mnum + (quarter - 1) * 3) % 12 + 1
11851169
if month > mnum:

pandas/tseries/frequencies.py

+14-7
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
import pandas.core.common as com
1515
import pandas.lib as lib
1616
import pandas.tslib as tslib
17-
import pandas._period as period
1817
from pandas.tslib import Timedelta
1918
from pytz import AmbiguousTimeError
2019

@@ -34,16 +33,24 @@ class FreqGroup(object):
3433
FR_NS = 12000
3534

3635

36+
US_RESO = 0
37+
MS_RESO = 1
38+
S_RESO = 2
39+
T_RESO = 3
40+
H_RESO = 4
41+
D_RESO = 5
42+
43+
3744
class Resolution(object):
3845

3946
# defined in period.pyx
4047
# note that these are different from freq codes
41-
RESO_US = period.US_RESO
42-
RESO_MS = period.MS_RESO
43-
RESO_SEC = period.S_RESO
44-
RESO_MIN = period.T_RESO
45-
RESO_HR = period.H_RESO
46-
RESO_DAY = period.D_RESO
48+
RESO_US = US_RESO
49+
RESO_MS = MS_RESO
50+
RESO_SEC = S_RESO
51+
RESO_MIN = T_RESO
52+
RESO_HR = H_RESO
53+
RESO_DAY = D_RESO
4754

4855
_reso_str_map = {
4956
RESO_US: 'microsecond',

pandas/tseries/tests/test_tslib.py

+9-6
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,10 @@
1111
from pandas.tslib import get_timezone
1212
from pandas._period import period_asfreq, period_ordinal
1313
from pandas.tseries.index import date_range, DatetimeIndex
14-
from pandas.tseries.frequencies import get_freq
14+
from pandas.tseries.frequencies import (
15+
get_freq,
16+
US_RESO, MS_RESO, S_RESO, H_RESO, D_RESO, T_RESO
17+
)
1518
import pandas.tseries.tools as tools
1619
import pandas.tseries.offsets as offsets
1720
import pandas.util.testing as tm
@@ -1309,11 +1312,11 @@ def test_resolution(self):
13091312

13101313
for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', 'T',
13111314
'S', 'L', 'U'],
1312-
[period.D_RESO, period.D_RESO,
1313-
period.D_RESO, period.D_RESO,
1314-
period.H_RESO, period.T_RESO,
1315-
period.S_RESO, period.MS_RESO,
1316-
period.US_RESO]):
1315+
[D_RESO, D_RESO,
1316+
D_RESO, D_RESO,
1317+
H_RESO, T_RESO,
1318+
S_RESO, MS_RESO,
1319+
US_RESO]):
13171320
for tz in [None, 'Asia/Tokyo', 'US/Eastern',
13181321
'dateutil/US/Eastern']:
13191322
idx = date_range(start='2013-04-01', periods=30, freq=freq,

0 commit comments

Comments
 (0)