Skip to content

PERF: speed up PeriodArray creation by exposing dayfirst/yearfirst params #24118

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 29, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 31 additions & 7 deletions asv_bench/benchmarks/period.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from pandas import (
DataFrame, Period, PeriodIndex, Series, date_range, period_range)
from pandas.tseries.frequencies import to_offset


class PeriodProperties(object):
Expand Down Expand Up @@ -35,25 +36,48 @@ def time_asfreq(self, freq):
self.per.asfreq('A')


class PeriodConstructor(object):
params = [['D'], [True, False]]
param_names = ['freq', 'is_offset']

def setup(self, freq, is_offset):
if is_offset:
self.freq = to_offset(freq)
else:
self.freq = freq

def time_period_constructor(self, freq, is_offset):
Period('2012-06-01', freq=freq)


class PeriodIndexConstructor(object):

params = ['D']
param_names = ['freq']
params = [['D'], [True, False]]
param_names = ['freq', 'is_offset']

def setup(self, freq):
def setup(self, freq, is_offset):
self.rng = date_range('1985', periods=1000)
self.rng2 = date_range('1985', periods=1000).to_pydatetime()
self.ints = list(range(2000, 3000))

def time_from_date_range(self, freq):
self.daily_ints = date_range('1/1/2000', periods=1000,
freq=freq).strftime('%Y%m%d').map(int)
if is_offset:
self.freq = to_offset(freq)
else:
self.freq = freq

def time_from_date_range(self, freq, is_offset):
PeriodIndex(self.rng, freq=freq)

def time_from_pydatetime(self, freq):
def time_from_pydatetime(self, freq, is_offset):
PeriodIndex(self.rng2, freq=freq)

def time_from_ints(self, freq):
def time_from_ints(self, freq, is_offset):
PeriodIndex(self.ints, freq=freq)

def time_from_ints_daily(self, freq, is_offset):
PeriodIndex(self.daily_ints, freq=freq)


class DataFramePeriodColumn(object):

Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1294,6 +1294,7 @@ Performance Improvements
- Improved performance of :meth:`~DataFrame.where` for Categorical data (:issue:`24077`)
- Improved performance of iterating over a :class:`Series`. Using :meth:`DataFrame.itertuples` now creates iterators
without internally allocating lists of all elements (:issue:`20783`)
- Improved performance of :class:`Period` constructor, additionally benefitting ``PeriodArray`` and ``PeriodIndex`` creation (:issue:`24084` and :issue:`24118`)

.. _whatsnew_0240.docs:

Expand Down
15 changes: 14 additions & 1 deletion pandas/_libs/tslibs/parsing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,20 @@ cdef set _not_datelike_strings = {'a', 'A', 'm', 'M', 'p', 'P', 't', 'T'}

# ----------------------------------------------------------------------

_get_option = None


def get_option(param):
""" Defer import of get_option to break an import cycle that caused
significant performance degradation in Period construction. See
GH#24118 for details
"""
global _get_option
if _get_option is None:
from pandas.core.config import get_option
_get_option = get_option
return _get_option(param)


def parse_datetime_string(date_string, freq=None, dayfirst=False,
yearfirst=False, **kwargs):
Expand Down Expand Up @@ -117,7 +131,6 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None):
freq = freq.rule_code

if dayfirst is None or yearfirst is None:
from pandas.core.config import get_option
if dayfirst is None:
dayfirst = get_option("display.date_dayfirst")
if yearfirst is None:
Expand Down