-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
PERF: typing and cdefs for tslibs.resolution #21452
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,7 +5,7 @@ from cython cimport Py_ssize_t | |
|
||
import numpy as np | ||
cimport numpy as cnp | ||
from numpy cimport ndarray, int64_t | ||
from numpy cimport ndarray, int64_t, int32_t | ||
cnp.import_array() | ||
|
||
from util cimport is_string_object, get_nat | ||
|
@@ -44,12 +44,12 @@ cdef int RESO_MIN = 4 | |
cdef int RESO_HR = 5 | ||
cdef int RESO_DAY = 6 | ||
|
||
_ONE_MICRO = 1000L | ||
_ONE_MILLI = _ONE_MICRO * 1000 | ||
_ONE_SECOND = _ONE_MILLI * 1000 | ||
_ONE_MINUTE = 60 * _ONE_SECOND | ||
_ONE_HOUR = 60 * _ONE_MINUTE | ||
_ONE_DAY = 24 * _ONE_HOUR | ||
_ONE_MICRO = <int64_t>1000L | ||
_ONE_MILLI = <int64_t>(_ONE_MICRO * 1000) | ||
_ONE_SECOND = <int64_t>(_ONE_MILLI * 1000) | ||
_ONE_MINUTE = <int64_t>(60 * _ONE_SECOND) | ||
_ONE_HOUR = <int64_t>(60 * _ONE_MINUTE) | ||
_ONE_DAY = <int64_t>(24 * _ONE_HOUR) | ||
|
||
# ---------------------------------------------------------------------- | ||
|
||
|
@@ -349,7 +349,7 @@ class Resolution(object): | |
# TODO: this is non performant logic here (and duplicative) and this | ||
# simply should call unique_1d directly | ||
# plus no reason to depend on khash directly | ||
cdef unique_deltas(ndarray[int64_t] arr): | ||
cdef ndarray[int64_t, ndim=1] unique_deltas(ndarray[int64_t] arr): | ||
cdef: | ||
Py_ssize_t i, n = len(arr) | ||
int64_t val | ||
|
@@ -373,21 +373,27 @@ cdef unique_deltas(ndarray[int64_t] arr): | |
return result | ||
|
||
|
||
def _is_multiple(us, mult): | ||
cdef inline bint _is_multiple(int64_t us, int64_t mult): | ||
return us % mult == 0 | ||
|
||
|
||
def _maybe_add_count(base, count): | ||
cdef inline str _maybe_add_count(str base, count): | ||
if count != 1: | ||
return '{count}{base}'.format(count=int(count), base=base) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If above comment is correct shouldn't need the int cast here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like you're right. Just pushed a commit that added the suggested typing and removed the |
||
else: | ||
return base | ||
|
||
|
||
class _FrequencyInferer(object): | ||
cdef class _FrequencyInferer(object): | ||
""" | ||
Not sure if I can avoid the state machine here | ||
""" | ||
cdef public: | ||
index | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need types here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not really viable since we can't specify There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just specify them as object to make this explict |
||
values | ||
bint warn | ||
bint is_monotonic | ||
dict _cache | ||
|
||
def __init__(self, index, warn=True): | ||
self.index = index | ||
|
@@ -475,16 +481,23 @@ class _FrequencyInferer(object): | |
def rep_stamp(self): | ||
return Timestamp(self.values[0]) | ||
|
||
def month_position_check(self): | ||
cdef month_position_check(self): | ||
# TODO: cythonize this, very slow | ||
calendar_end = True | ||
business_end = True | ||
calendar_start = True | ||
business_start = True | ||
|
||
years = self.fields['Y'] | ||
months = self.fields['M'] | ||
days = self.fields['D'] | ||
cdef: | ||
int32_t daysinmonth, y, m, d | ||
bint calendar_end = True | ||
bint business_end = True | ||
bint calendar_start = True | ||
bint business_start = True | ||
bint cal | ||
int32_t[:] years | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These are memoryviews, right? Any reason you are choosing these instead of the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My understanding is that memoryviews are encouraged because they are lighter weight. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jbrockmendel : That is indeed correct. I agree with this design choice. Might be something to look into for other parts of the code. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In general, one of the main problems with memoryviews is with read-only arrays (and for that reason we do/should not use them solely in certain algos), but I don't think that's relevant in this case There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can make cdefs readonly when it makes sense FYI |
||
int32_t[:] months | ||
int32_t[:] days | ||
|
||
fields = self.fields | ||
years = fields['Y'] | ||
months = fields['M'] | ||
days = fields['D'] | ||
weekdays = self.index.dayofweek | ||
|
||
from calendar import monthrange | ||
|
@@ -525,7 +538,7 @@ class _FrequencyInferer(object): | |
def ydiffs(self): | ||
return unique_deltas(self.fields['Y'].astype('i8')) | ||
|
||
def _infer_daily_rule(self): | ||
cdef _infer_daily_rule(self): | ||
annual_rule = self._get_annual_rule() | ||
if annual_rule: | ||
nyears = self.ydiffs[0] | ||
|
@@ -562,7 +575,7 @@ class _FrequencyInferer(object): | |
if wom_rule: | ||
return wom_rule | ||
|
||
def _get_annual_rule(self): | ||
cdef _get_annual_rule(self): | ||
if len(self.ydiffs) > 1: | ||
return None | ||
|
||
|
@@ -573,7 +586,7 @@ class _FrequencyInferer(object): | |
return {'cs': 'AS', 'bs': 'BAS', | ||
'ce': 'A', 'be': 'BA'}.get(pos_check) | ||
|
||
def _get_quarterly_rule(self): | ||
cdef _get_quarterly_rule(self): | ||
if len(self.mdiffs) > 1: | ||
return None | ||
|
||
|
@@ -584,14 +597,14 @@ class _FrequencyInferer(object): | |
return {'cs': 'QS', 'bs': 'BQS', | ||
'ce': 'Q', 'be': 'BQ'}.get(pos_check) | ||
|
||
def _get_monthly_rule(self): | ||
cdef _get_monthly_rule(self): | ||
if len(self.mdiffs) > 1: | ||
return None | ||
pos_check = self.month_position_check() | ||
return {'cs': 'MS', 'bs': 'BMS', | ||
'ce': 'M', 'be': 'BM'}.get(pos_check) | ||
|
||
def _is_business_daily(self): | ||
cdef bint _is_business_daily(self): | ||
# quick check: cannot be business daily | ||
if self.day_deltas != [1, 3]: | ||
return False | ||
|
@@ -604,7 +617,7 @@ class _FrequencyInferer(object): | |
return np.all(((weekdays == 0) & (shifts == 3)) | | ||
((weekdays > 0) & (weekdays <= 4) & (shifts == 1))) | ||
|
||
def _get_wom_rule(self): | ||
cdef _get_wom_rule(self): | ||
# wdiffs = unique(np.diff(self.index.week)) | ||
# We also need -47, -49, -48 to catch index spanning year boundary | ||
# if not lib.ismember(wdiffs, set([4, 5, -47, -49, -48])).all(): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we add a int64_t type for
count
here?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ill double-check, but I'm pretty sure past-me had a reason for leaving that one out. Maybe there are cases when
None
is passed