diff --git a/asv_bench/benchmarks/period.py b/asv_bench/benchmarks/period.py index 8f341c8b415fe..6d2c7156a0a3d 100644 --- a/asv_bench/benchmarks/period.py +++ b/asv_bench/benchmarks/period.py @@ -1,5 +1,6 @@ from pandas import ( DataFrame, Period, PeriodIndex, Series, date_range, period_range) +from pandas.tseries.frequencies import to_offset class PeriodProperties(object): @@ -35,25 +36,48 @@ def time_asfreq(self, freq): self.per.asfreq('A') +class PeriodConstructor(object): + params = [['D'], [True, False]] + param_names = ['freq', 'is_offset'] + + def setup(self, freq, is_offset): + if is_offset: + self.freq = to_offset(freq) + else: + self.freq = freq + + def time_period_constructor(self, freq, is_offset): + Period('2012-06-01', freq=freq) + + class PeriodIndexConstructor(object): - params = ['D'] - param_names = ['freq'] + params = [['D'], [True, False]] + param_names = ['freq', 'is_offset'] - def setup(self, freq): + def setup(self, freq, is_offset): self.rng = date_range('1985', periods=1000) self.rng2 = date_range('1985', periods=1000).to_pydatetime() self.ints = list(range(2000, 3000)) - - def time_from_date_range(self, freq): + self.daily_ints = date_range('1/1/2000', periods=1000, + freq=freq).strftime('%Y%m%d').map(int) + if is_offset: + self.freq = to_offset(freq) + else: + self.freq = freq + + def time_from_date_range(self, freq, is_offset): PeriodIndex(self.rng, freq=freq) - def time_from_pydatetime(self, freq): + def time_from_pydatetime(self, freq, is_offset): PeriodIndex(self.rng2, freq=freq) - def time_from_ints(self, freq): + def time_from_ints(self, freq, is_offset): PeriodIndex(self.ints, freq=freq) + def time_from_ints_daily(self, freq, is_offset): + PeriodIndex(self.daily_ints, freq=freq) + class DataFramePeriodColumn(object): diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 29ab51c582a97..affef80571fce 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1294,6 +1294,7 @@ Performance Improvements - Improved performance of :meth:`~DataFrame.where` for Categorical data (:issue:`24077`) - Improved performance of iterating over a :class:`Series`. Using :meth:`DataFrame.itertuples` now creates iterators without internally allocating lists of all elements (:issue:`20783`) +- Improved performance of :class:`Period` constructor, additionally benefitting ``PeriodArray`` and ``PeriodIndex`` creation (:issue:`24084` and :issue:`24118`) .. _whatsnew_0240.docs: diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 37f11af81dfd6..3a03018141f5a 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -47,6 +47,20 @@ cdef set _not_datelike_strings = {'a', 'A', 'm', 'M', 'p', 'P', 't', 'T'} # ---------------------------------------------------------------------- +_get_option = None + + +def get_option(param): + """ Defer import of get_option to break an import cycle that caused + significant performance degradation in Period construction. See + GH#24118 for details + """ + global _get_option + if _get_option is None: + from pandas.core.config import get_option + _get_option = get_option + return _get_option(param) + def parse_datetime_string(date_string, freq=None, dayfirst=False, yearfirst=False, **kwargs): @@ -117,7 +131,6 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): freq = freq.rule_code if dayfirst is None or yearfirst is None: - from pandas.core.config import get_option if dayfirst is None: dayfirst = get_option("display.date_dayfirst") if yearfirst is None: