-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
Fix performance issues when creating multiple instances of Period (#12903, #11831) #12909
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
fec1b51
5b3e291
8f254e3
8bcfd57
1c5a2ab
0d9712d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
from pandas import Period, PeriodIndex, date_range | ||
|
||
|
||
class create_period_index_from_date_range(object): | ||
goal_time = 0.2 | ||
|
||
def time_period_index(self): | ||
# Simulate irregular PeriodIndex | ||
PeriodIndex(date_range('1985', periods=10000).to_pydatetime(), freq='D') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just make this 1000 (it will be the same ratio, but the previous versions will take less time) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,6 +36,8 @@ from tslib cimport ( | |
_nat_scalar_rules, | ||
) | ||
|
||
from pandas.tseries import frequencies | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what you need to do is at the top here type and import all of the freqs that you need e.g
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will do in a moment. |
||
from sys import version_info | ||
|
||
cdef bint PY2 = version_info[0] == 2 | ||
|
@@ -461,7 +463,7 @@ cpdef resolution(ndarray[int64_t] stamps, tz=None): | |
cdef: | ||
Py_ssize_t i, n = len(stamps) | ||
pandas_datetimestruct dts | ||
int reso = D_RESO, curr_reso | ||
int reso = frequencies.D_RESO, curr_reso | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. then you can simply use |
||
|
||
if tz is not None: | ||
tz = maybe_get_tz(tz) | ||
|
@@ -476,30 +478,24 @@ cpdef resolution(ndarray[int64_t] stamps, tz=None): | |
reso = curr_reso | ||
return reso | ||
|
||
US_RESO = 0 | ||
MS_RESO = 1 | ||
S_RESO = 2 | ||
T_RESO = 3 | ||
H_RESO = 4 | ||
D_RESO = 5 | ||
|
||
cdef inline int _reso_stamp(pandas_datetimestruct *dts): | ||
if dts.us != 0: | ||
if dts.us % 1000 == 0: | ||
return MS_RESO | ||
return US_RESO | ||
return frequencies.MS_RESO | ||
return frequencies.US_RESO | ||
elif dts.sec != 0: | ||
return S_RESO | ||
return frequencies.S_RESO | ||
elif dts.min != 0: | ||
return T_RESO | ||
return frequencies.T_RESO | ||
elif dts.hour != 0: | ||
return H_RESO | ||
return D_RESO | ||
return frequencies.H_RESO | ||
return frequencies.D_RESO | ||
|
||
cdef _reso_local(ndarray[int64_t] stamps, object tz): | ||
cdef: | ||
Py_ssize_t n = len(stamps) | ||
int reso = D_RESO, curr_reso | ||
int reso = frequencies.D_RESO, curr_reso | ||
ndarray[int64_t] trans, deltas, pos | ||
pandas_datetimestruct dts | ||
|
||
|
@@ -662,17 +658,13 @@ cdef class Period(object): | |
def _maybe_convert_freq(cls, object freq): | ||
|
||
if isinstance(freq, compat.string_types): | ||
from pandas.tseries.frequencies import _period_alias_dict | ||
freq = freq.upper() | ||
freq = _period_alias_dict.get(freq, freq) | ||
freq = frequencies._period_alias_dict.get(freq, freq) | ||
elif isinstance(freq, (int, tuple)): | ||
from pandas.tseries.frequencies import get_freq_code as _gfc | ||
from pandas.tseries.frequencies import _get_freq_str | ||
code, stride = _gfc(freq) | ||
freq = _get_freq_str(code, stride) | ||
code, stride = frequencies.get_freq_code(freq) | ||
freq = frequencies._get_freq_str(code, stride) | ||
|
||
from pandas.tseries.frequencies import to_offset | ||
freq = to_offset(freq) | ||
freq = frequencies.to_offset(freq) | ||
|
||
if freq.n <= 0: | ||
raise ValueError('Frequency must be positive, because it' | ||
|
@@ -691,9 +683,6 @@ cdef class Period(object): | |
def __init__(self, value=None, freq=None, ordinal=None, | ||
year=None, month=1, quarter=None, day=1, | ||
hour=0, minute=0, second=0): | ||
from pandas.tseries import frequencies | ||
from pandas.tseries.frequencies import get_freq_code as _gfc | ||
|
||
# freq points to a tuple (base, mult); base is one of the defined | ||
# periods such as A, Q, etc. Every five minutes would be, e.g., | ||
# ('T', 5) but may be passed in as a string like '5T' | ||
|
@@ -717,7 +706,7 @@ cdef class Period(object): | |
|
||
elif isinstance(value, Period): | ||
other = value | ||
if freq is None or _gfc(freq) == _gfc(other.freq): | ||
if freq is None or frequencies.get_freq_code(freq) == frequencies.get_freq_code(other.freq): | ||
ordinal = other.ordinal | ||
freq = other.freq | ||
else: | ||
|
@@ -758,7 +747,7 @@ cdef class Period(object): | |
msg = "Value must be Period, string, integer, or datetime" | ||
raise ValueError(msg) | ||
|
||
base, mult = _gfc(freq) | ||
base, mult = frequencies.get_freq_code(freq) | ||
|
||
if ordinal is None: | ||
self.ordinal = get_period_ordinal(dt.year, dt.month, dt.day, | ||
|
@@ -771,7 +760,6 @@ cdef class Period(object): | |
|
||
def __richcmp__(self, other, op): | ||
if isinstance(other, Period): | ||
from pandas.tseries.frequencies import get_freq_code as _gfc | ||
if other.freq != self.freq: | ||
msg = _DIFFERENT_FREQ.format(self.freqstr, other.freqstr) | ||
raise IncompatibleFrequency(msg) | ||
|
@@ -790,7 +778,6 @@ cdef class Period(object): | |
return hash((self.ordinal, self.freq)) | ||
|
||
def _add_delta(self, other): | ||
from pandas.tseries import frequencies | ||
if isinstance(other, (timedelta, np.timedelta64, offsets.Tick, Timedelta)): | ||
offset = frequencies.to_offset(self.freq.rule_code) | ||
if isinstance(offset, offsets.Tick): | ||
|
@@ -868,10 +855,9 @@ cdef class Period(object): | |
------- | ||
resampled : Period | ||
""" | ||
from pandas.tseries.frequencies import get_freq_code as _gfc | ||
how = _validate_end_alias(how) | ||
base1, mult1 = _gfc(self.freq) | ||
base2, mult2 = _gfc(freq) | ||
base1, mult1 = frequencies.get_freq_code(self.freq) | ||
base2, mult2 = frequencies.get_freq_code(freq) | ||
|
||
if self.ordinal == tslib.iNaT: | ||
ordinal = self.ordinal | ||
|
@@ -918,23 +904,20 @@ cdef class Period(object): | |
------- | ||
Timestamp | ||
""" | ||
from pandas.tseries import frequencies | ||
from pandas.tseries.frequencies import get_freq_code as _gfc | ||
how = _validate_end_alias(how) | ||
|
||
if freq is None: | ||
base, mult = _gfc(self.freq) | ||
base, mult = frequencies.get_freq_code(self.freq) | ||
freq = frequencies.get_to_timestamp_base(base) | ||
|
||
base, mult = _gfc(freq) | ||
base, mult = frequencies.get_freq_code(freq) | ||
val = self.asfreq(freq, how) | ||
|
||
dt64 = period_ordinal_to_dt64(val.ordinal, base) | ||
return Timestamp(dt64, tz=tz) | ||
|
||
cdef _field(self, alias): | ||
from pandas.tseries.frequencies import get_freq_code as _gfc | ||
base, mult = _gfc(self.freq) | ||
base, mult = frequencies.get_freq_code(self.freq) | ||
return get_period_field(alias, self.ordinal, base) | ||
|
||
property year: | ||
|
@@ -996,8 +979,7 @@ cdef class Period(object): | |
return self.freq.freqstr | ||
|
||
def __repr__(self): | ||
from pandas.tseries.frequencies import get_freq_code as _gfc | ||
base, mult = _gfc(self.freq) | ||
base, mult = frequencies.get_freq_code(self.freq) | ||
formatted = period_format(self.ordinal, base) | ||
return "Period('%s', '%s')" % (formatted, self.freqstr) | ||
|
||
|
@@ -1008,8 +990,7 @@ cdef class Period(object): | |
Invoked by unicode(df) in py2 only. Yields a Unicode String in both | ||
py2/py3. | ||
""" | ||
from pandas.tseries.frequencies import get_freq_code as _gfc | ||
base, mult = _gfc(self.freq) | ||
base, mult = frequencies.get_freq_code(self.freq) | ||
formatted = period_format(self.ordinal, base) | ||
value = ("%s" % formatted) | ||
return value | ||
|
@@ -1159,15 +1140,13 @@ cdef class Period(object): | |
>>> a.strftime('%b. %d, %Y was a %A') | ||
'Jan. 01, 2001 was a Monday' | ||
""" | ||
from pandas.tseries.frequencies import get_freq_code as _gfc | ||
base, mult = _gfc(self.freq) | ||
base, mult = frequencies.get_freq_code(self.freq) | ||
return period_format(self.ordinal, base, fmt) | ||
|
||
|
||
def _ordinal_from_fields(year, month, quarter, day, hour, minute, | ||
second, freq): | ||
from pandas.tseries.frequencies import get_freq_code as _gfc | ||
base, mult = _gfc(freq) | ||
base, mult = frequencies.get_freq_code(freq) | ||
if quarter is not None: | ||
year, month = _quarter_to_myear(year, quarter, freq) | ||
|
||
|
@@ -1179,7 +1158,6 @@ def _quarter_to_myear(year, quarter, freq): | |
if quarter <= 0 or quarter > 4: | ||
raise ValueError('Quarter must be 1 <= q <= 4') | ||
|
||
from pandas.tseries import frequencies | ||
mnum = frequencies._month_numbers[frequencies._get_rule_month(freq)] + 1 | ||
month = (mnum + (quarter - 1) * 3) % 12 + 1 | ||
if month > mnum: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you add a benchmark for #11831 as well (you can put in this file even though its plotting)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's quite possible there is already a benchmark for this in the plotting benchmarks
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ahh right, there prob is
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It was
Python 3.5.1 :: Anaconda 2.5.0 (64-bit)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So I'm not committing any new benchmarks, but let me run them locally before and after the change...