diff --git a/asv_bench/benchmarks/period.py b/asv_bench/benchmarks/period.py new file mode 100644 index 0000000000000..012030a71ac82 --- /dev/null +++ b/asv_bench/benchmarks/period.py @@ -0,0 +1,9 @@ +from pandas import PeriodIndex, date_range + + +class create_period_index_from_date_range(object): + goal_time = 0.2 + + def time_period_index(self): + # Simulate irregular PeriodIndex + PeriodIndex(date_range('1985', periods=1000).to_pydatetime(), freq='D') diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index cc84347313b42..d79bbfbe2e390 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -243,7 +243,7 @@ Performance Improvements - Improved performance of ``DataFrame.to_sql`` when checking case sensitivity for tables. Now only checks if table has been created correctly when table name is not lower case. (:issue:`12876`) - +- Improved performance of ``Period`` construction and plotting of ``Period``s. (:issue:`12903`, :issue:`11831`) diff --git a/pandas/src/period.pyx b/pandas/src/period.pyx index 33c213ac5d8df..e5802ccef7495 100644 --- a/pandas/src/period.pyx +++ b/pandas/src/period.pyx @@ -36,12 +36,20 @@ from tslib cimport ( _nat_scalar_rules, ) +from pandas.tseries import frequencies + from sys import version_info cdef bint PY2 = version_info[0] == 2 cdef int64_t NPY_NAT = util.get_nat() +cdef int US_RESO = frequencies.US_RESO +cdef int MS_RESO = frequencies.MS_RESO +cdef int S_RESO = frequencies.S_RESO +cdef int T_RESO = frequencies.T_RESO +cdef int H_RESO = frequencies.H_RESO +cdef int D_RESO = frequencies.D_RESO cdef extern from "period_helper.h": ctypedef struct date_info: @@ -476,12 +484,6 @@ cpdef resolution(ndarray[int64_t] stamps, tz=None): reso = curr_reso return reso -US_RESO = 0 -MS_RESO = 1 -S_RESO = 2 -T_RESO = 3 -H_RESO = 4 -D_RESO = 5 cdef inline int _reso_stamp(pandas_datetimestruct *dts): if dts.us != 0: @@ -662,17 +664,13 @@ cdef class Period(object): def _maybe_convert_freq(cls, object freq): if isinstance(freq, compat.string_types): - from pandas.tseries.frequencies import _period_alias_dict freq = freq.upper() - freq = _period_alias_dict.get(freq, freq) + freq = frequencies._period_alias_dict.get(freq, freq) elif isinstance(freq, (int, tuple)): - from pandas.tseries.frequencies import get_freq_code as _gfc - from pandas.tseries.frequencies import _get_freq_str - code, stride = _gfc(freq) - freq = _get_freq_str(code, stride) + code, stride = frequencies.get_freq_code(freq) + freq = frequencies._get_freq_str(code, stride) - from pandas.tseries.frequencies import to_offset - freq = to_offset(freq) + freq = frequencies.to_offset(freq) if freq.n <= 0: raise ValueError('Frequency must be positive, because it' @@ -691,9 +689,6 @@ cdef class Period(object): def __init__(self, value=None, freq=None, ordinal=None, year=None, month=1, quarter=None, day=1, hour=0, minute=0, second=0): - from pandas.tseries import frequencies - from pandas.tseries.frequencies import get_freq_code as _gfc - # freq points to a tuple (base, mult); base is one of the defined # periods such as A, Q, etc. Every five minutes would be, e.g., # ('T', 5) but may be passed in as a string like '5T' @@ -717,7 +712,7 @@ cdef class Period(object): elif isinstance(value, Period): other = value - if freq is None or _gfc(freq) == _gfc(other.freq): + if freq is None or frequencies.get_freq_code(freq) == frequencies.get_freq_code(other.freq): ordinal = other.ordinal freq = other.freq else: @@ -758,7 +753,7 @@ cdef class Period(object): msg = "Value must be Period, string, integer, or datetime" raise ValueError(msg) - base, mult = _gfc(freq) + base, mult = frequencies.get_freq_code(freq) if ordinal is None: self.ordinal = get_period_ordinal(dt.year, dt.month, dt.day, @@ -771,7 +766,6 @@ cdef class Period(object): def __richcmp__(self, other, op): if isinstance(other, Period): - from pandas.tseries.frequencies import get_freq_code as _gfc if other.freq != self.freq: msg = _DIFFERENT_FREQ.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) @@ -790,7 +784,6 @@ cdef class Period(object): return hash((self.ordinal, self.freq)) def _add_delta(self, other): - from pandas.tseries import frequencies if isinstance(other, (timedelta, np.timedelta64, offsets.Tick, Timedelta)): offset = frequencies.to_offset(self.freq.rule_code) if isinstance(offset, offsets.Tick): @@ -868,10 +861,9 @@ cdef class Period(object): ------- resampled : Period """ - from pandas.tseries.frequencies import get_freq_code as _gfc how = _validate_end_alias(how) - base1, mult1 = _gfc(self.freq) - base2, mult2 = _gfc(freq) + base1, mult1 = frequencies.get_freq_code(self.freq) + base2, mult2 = frequencies.get_freq_code(freq) if self.ordinal == tslib.iNaT: ordinal = self.ordinal @@ -918,23 +910,20 @@ cdef class Period(object): ------- Timestamp """ - from pandas.tseries import frequencies - from pandas.tseries.frequencies import get_freq_code as _gfc how = _validate_end_alias(how) if freq is None: - base, mult = _gfc(self.freq) + base, mult = frequencies.get_freq_code(self.freq) freq = frequencies.get_to_timestamp_base(base) - base, mult = _gfc(freq) + base, mult = frequencies.get_freq_code(freq) val = self.asfreq(freq, how) dt64 = period_ordinal_to_dt64(val.ordinal, base) return Timestamp(dt64, tz=tz) cdef _field(self, alias): - from pandas.tseries.frequencies import get_freq_code as _gfc - base, mult = _gfc(self.freq) + base, mult = frequencies.get_freq_code(self.freq) return get_period_field(alias, self.ordinal, base) property year: @@ -996,8 +985,7 @@ cdef class Period(object): return self.freq.freqstr def __repr__(self): - from pandas.tseries.frequencies import get_freq_code as _gfc - base, mult = _gfc(self.freq) + base, mult = frequencies.get_freq_code(self.freq) formatted = period_format(self.ordinal, base) return "Period('%s', '%s')" % (formatted, self.freqstr) @@ -1008,8 +996,7 @@ cdef class Period(object): Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3. """ - from pandas.tseries.frequencies import get_freq_code as _gfc - base, mult = _gfc(self.freq) + base, mult = frequencies.get_freq_code(self.freq) formatted = period_format(self.ordinal, base) value = ("%s" % formatted) return value @@ -1159,15 +1146,13 @@ cdef class Period(object): >>> a.strftime('%b. %d, %Y was a %A') 'Jan. 01, 2001 was a Monday' """ - from pandas.tseries.frequencies import get_freq_code as _gfc - base, mult = _gfc(self.freq) + base, mult = frequencies.get_freq_code(self.freq) return period_format(self.ordinal, base, fmt) def _ordinal_from_fields(year, month, quarter, day, hour, minute, second, freq): - from pandas.tseries.frequencies import get_freq_code as _gfc - base, mult = _gfc(freq) + base, mult = frequencies.get_freq_code(freq) if quarter is not None: year, month = _quarter_to_myear(year, quarter, freq) @@ -1179,7 +1164,6 @@ def _quarter_to_myear(year, quarter, freq): if quarter <= 0 or quarter > 4: raise ValueError('Quarter must be 1 <= q <= 4') - from pandas.tseries import frequencies mnum = frequencies._month_numbers[frequencies._get_rule_month(freq)] + 1 month = (mnum + (quarter - 1) * 3) % 12 + 1 if month > mnum: diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 058a8db9ead08..b053f455d7f4b 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -14,7 +14,6 @@ import pandas.core.common as com import pandas.lib as lib import pandas.tslib as tslib -import pandas._period as period from pandas.tslib import Timedelta from pytz import AmbiguousTimeError @@ -34,16 +33,24 @@ class FreqGroup(object): FR_NS = 12000 +US_RESO = 0 +MS_RESO = 1 +S_RESO = 2 +T_RESO = 3 +H_RESO = 4 +D_RESO = 5 + + class Resolution(object): # defined in period.pyx # note that these are different from freq codes - RESO_US = period.US_RESO - RESO_MS = period.MS_RESO - RESO_SEC = period.S_RESO - RESO_MIN = period.T_RESO - RESO_HR = period.H_RESO - RESO_DAY = period.D_RESO + RESO_US = US_RESO + RESO_MS = MS_RESO + RESO_SEC = S_RESO + RESO_MIN = T_RESO + RESO_HR = H_RESO + RESO_DAY = D_RESO _reso_str_map = { RESO_US: 'microsecond', diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py index 863bc6f630d06..053c59fcbc7a5 100644 --- a/pandas/tseries/tests/test_tslib.py +++ b/pandas/tseries/tests/test_tslib.py @@ -11,7 +11,10 @@ from pandas.tslib import get_timezone from pandas._period import period_asfreq, period_ordinal from pandas.tseries.index import date_range, DatetimeIndex -from pandas.tseries.frequencies import get_freq +from pandas.tseries.frequencies import ( + get_freq, + US_RESO, MS_RESO, S_RESO, H_RESO, D_RESO, T_RESO +) import pandas.tseries.tools as tools import pandas.tseries.offsets as offsets import pandas.util.testing as tm @@ -1307,11 +1310,11 @@ def test_resolution(self): for freq, expected in zip(['A', 'Q', 'M', 'D', 'H', 'T', 'S', 'L', 'U'], - [period.D_RESO, period.D_RESO, - period.D_RESO, period.D_RESO, - period.H_RESO, period.T_RESO, - period.S_RESO, period.MS_RESO, - period.US_RESO]): + [D_RESO, D_RESO, + D_RESO, D_RESO, + H_RESO, T_RESO, + S_RESO, MS_RESO, + US_RESO]): for tz in [None, 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Eastern']: idx = date_range(start='2013-04-01', periods=30, freq=freq,