Skip to content

PERF: typing and cdefs for tslibs.resolution #21452

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 14, 2018
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 42 additions & 29 deletions pandas/_libs/tslibs/resolution.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ from cython cimport Py_ssize_t

import numpy as np
cimport numpy as cnp
from numpy cimport ndarray, int64_t
from numpy cimport ndarray, int64_t, int32_t
cnp.import_array()

from util cimport is_string_object, get_nat
Expand Down Expand Up @@ -44,12 +44,12 @@ cdef int RESO_MIN = 4
cdef int RESO_HR = 5
cdef int RESO_DAY = 6

_ONE_MICRO = 1000L
_ONE_MILLI = _ONE_MICRO * 1000
_ONE_SECOND = _ONE_MILLI * 1000
_ONE_MINUTE = 60 * _ONE_SECOND
_ONE_HOUR = 60 * _ONE_MINUTE
_ONE_DAY = 24 * _ONE_HOUR
_ONE_MICRO = <int64_t>1000L
_ONE_MILLI = <int64_t>(_ONE_MICRO * 1000)
_ONE_SECOND = <int64_t>(_ONE_MILLI * 1000)
_ONE_MINUTE = <int64_t>(60 * _ONE_SECOND)
_ONE_HOUR = <int64_t>(60 * _ONE_MINUTE)
_ONE_DAY = <int64_t>(24 * _ONE_HOUR)

# ----------------------------------------------------------------------

Expand Down Expand Up @@ -349,7 +349,7 @@ class Resolution(object):
# TODO: this is non performant logic here (and duplicative) and this
# simply should call unique_1d directly
# plus no reason to depend on khash directly
cdef unique_deltas(ndarray[int64_t] arr):
cdef ndarray[int64_t, ndim=1] unique_deltas(ndarray[int64_t] arr):
cdef:
Py_ssize_t i, n = len(arr)
int64_t val
Expand All @@ -373,21 +373,27 @@ cdef unique_deltas(ndarray[int64_t] arr):
return result


def _is_multiple(us, mult):
cdef inline bint _is_multiple(int64_t us, int64_t mult):
return us % mult == 0


def _maybe_add_count(base, count):
cdef inline str _maybe_add_count(str base, int64_t count):
if count != 1:
return '{count}{base}'.format(count=int(count), base=base)
return '{count}{base}'.format(count=count, base=base)
else:
return base


class _FrequencyInferer(object):
cdef class _FrequencyInferer(object):
"""
Not sure if I can avoid the state machine here
"""
cdef public:
object index
object values
bint warn
bint is_monotonic
dict _cache

def __init__(self, index, warn=True):
self.index = index
Expand Down Expand Up @@ -475,16 +481,23 @@ class _FrequencyInferer(object):
def rep_stamp(self):
return Timestamp(self.values[0])

def month_position_check(self):
cdef month_position_check(self):
# TODO: cythonize this, very slow
calendar_end = True
business_end = True
calendar_start = True
business_start = True

years = self.fields['Y']
months = self.fields['M']
days = self.fields['D']
cdef:
int32_t daysinmonth, y, m, d
bint calendar_end = True
bint business_end = True
bint calendar_start = True
bint business_start = True
bint cal
int32_t[:] years
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are memoryviews, right? Any reason you are choosing these instead of the ndarray declarations throughout the rest of the code base?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My understanding is that memoryviews are encouraged because they are lighter weight.

Copy link
Member

@gfyoung gfyoung Jun 13, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jbrockmendel : That is indeed correct. I agree with this design choice.

Might be something to look into for other parts of the code.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In general, one of the main problems with memoryviews is with read-only arrays (and for that reason we do/should not use them solely in certain algos), but I don't think that's relevant in this case

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can make cdefs readonly when it makes sense FYI

int32_t[:] months
int32_t[:] days

fields = self.fields
years = fields['Y']
months = fields['M']
days = fields['D']
weekdays = self.index.dayofweek

from calendar import monthrange
Expand Down Expand Up @@ -525,7 +538,7 @@ class _FrequencyInferer(object):
def ydiffs(self):
return unique_deltas(self.fields['Y'].astype('i8'))

def _infer_daily_rule(self):
cdef _infer_daily_rule(self):
annual_rule = self._get_annual_rule()
if annual_rule:
nyears = self.ydiffs[0]
Expand Down Expand Up @@ -562,7 +575,7 @@ class _FrequencyInferer(object):
if wom_rule:
return wom_rule

def _get_annual_rule(self):
cdef _get_annual_rule(self):
if len(self.ydiffs) > 1:
return None

Expand All @@ -573,7 +586,7 @@ class _FrequencyInferer(object):
return {'cs': 'AS', 'bs': 'BAS',
'ce': 'A', 'be': 'BA'}.get(pos_check)

def _get_quarterly_rule(self):
cdef _get_quarterly_rule(self):
if len(self.mdiffs) > 1:
return None

Expand All @@ -584,14 +597,14 @@ class _FrequencyInferer(object):
return {'cs': 'QS', 'bs': 'BQS',
'ce': 'Q', 'be': 'BQ'}.get(pos_check)

def _get_monthly_rule(self):
cdef _get_monthly_rule(self):
if len(self.mdiffs) > 1:
return None
pos_check = self.month_position_check()
return {'cs': 'MS', 'bs': 'BMS',
'ce': 'M', 'be': 'BM'}.get(pos_check)

def _is_business_daily(self):
cdef bint _is_business_daily(self):
# quick check: cannot be business daily
if self.day_deltas != [1, 3]:
return False
Expand All @@ -604,7 +617,7 @@ class _FrequencyInferer(object):
return np.all(((weekdays == 0) & (shifts == 3)) |
((weekdays > 0) & (weekdays <= 4) & (shifts == 1)))

def _get_wom_rule(self):
cdef _get_wom_rule(self):
# wdiffs = unique(np.diff(self.index.week))
# We also need -47, -49, -48 to catch index spanning year boundary
# if not lib.ismember(wdiffs, set([4, 5, -47, -49, -48])).all():
Expand All @@ -627,9 +640,9 @@ class _FrequencyInferer(object):
return 'WOM-{week}{weekday}'.format(week=week, weekday=wd)


class _TimedeltaFrequencyInferer(_FrequencyInferer):
cdef class _TimedeltaFrequencyInferer(_FrequencyInferer):

def _infer_daily_rule(self):
cdef _infer_daily_rule(self):
if self.is_unique:
days = self.deltas[0] / _ONE_DAY
if days % 7 == 0:
Expand Down