-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
Move frequencies functions to cython #17746
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
b77d09d
d551246
cb227dc
6532f76
b5198f3
3bfbdf1
4b8cb92
da8369f
74258c0
823db82
c1d8ec1
551f047
026056f
925aca9
92b1e9e
ae06978
2a2155e
4192c0d
21e13dc
43c971b
67dbfe2
0292fbe
22f46ac
893fef9
bcc5ca1
147bdc7
b54fe29
7bcdbe0
38b520c
3430925
0029303
957345b
49f0f72
4c200b2
aa0f988
1d12f09
6b57c23
9d0e70d
664d51e
e565a3a
2138285
7d3cd5c
cb73bd6
db3717b
cbefe3e
d1c67bc
f60fe4d
f250550
ae9f628
db44aa5
02e3217
23a3588
3769095
a7f063c
3ee6e92
64ef864
56d7949
6d62e04
1d71924
730b21b
010a7a5
a5d408e
8129b1c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,8 +32,11 @@ from tslibs.timezones cimport ( | |
is_utc, is_tzlocal, get_utcoffset, get_dst_info, maybe_get_tz) | ||
from tslib cimport _nat_scalar_rules | ||
|
||
from tslibs.frequencies cimport ( | ||
get_freq_code, get_base_alias, get_to_timestamp_base, _get_freq_str, | ||
_get_rule_month) | ||
from tslibs.frequencies import _MONTH_NUMBERS | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. import from ccalendar? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If/when |
||
from tslibs.parsing import parse_time_string, NAT_SENTINEL | ||
from tslibs.frequencies cimport get_freq_code | ||
|
||
from pandas.tseries import offsets | ||
from pandas.tseries import frequencies | ||
|
@@ -665,7 +668,7 @@ cdef class _Period(object): | |
|
||
if isinstance(freq, (int, tuple)): | ||
code, stride = get_freq_code(freq) | ||
freq = frequencies._get_freq_str(code, stride) | ||
freq = _get_freq_str(code, stride) | ||
|
||
freq = frequencies.to_offset(freq) | ||
|
||
|
@@ -723,7 +726,7 @@ cdef class _Period(object): | |
raise IncompatibleFrequency(msg.format(self.freqstr)) | ||
elif isinstance(other, offsets.DateOffset): | ||
freqstr = other.rule_code | ||
base = frequencies.get_base_alias(freqstr) | ||
base = get_base_alias(freqstr) | ||
if base == self.freq.rule_code: | ||
ordinal = self.ordinal + other.n | ||
return Period(ordinal=ordinal, freq=self.freq) | ||
|
@@ -839,7 +842,7 @@ cdef class _Period(object): | |
|
||
if freq is None: | ||
base, mult = get_freq_code(self.freq) | ||
freq = frequencies.get_to_timestamp_base(base) | ||
freq = get_to_timestamp_base(base) | ||
|
||
base, mult = get_freq_code(freq) | ||
val = self.asfreq(freq, how) | ||
|
@@ -1232,7 +1235,7 @@ def _quarter_to_myear(year, quarter, freq): | |
if quarter <= 0 or quarter > 4: | ||
raise ValueError('Quarter must be 1 <= q <= 4') | ||
|
||
mnum = tslib._MONTH_NUMBERS[tslib._get_rule_month(freq)] + 1 | ||
mnum = _MONTH_NUMBERS[_get_rule_month(freq)] + 1 | ||
month = (mnum + (quarter - 1) * 3) % 12 + 1 | ||
if month > mnum: | ||
year -= 1 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -92,6 +92,7 @@ from tslibs.timezones cimport ( | |
treat_tz_as_dateutil, treat_tz_as_pytz, | ||
get_timezone, get_utcoffset, maybe_get_tz, | ||
get_dst_info) | ||
from tslibs.frequencies cimport _get_rule_month | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so this is prob unecessary There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like. Removing. |
||
from tslibs.fields import ( | ||
get_date_name_field, get_start_end_field, get_date_field, | ||
build_field_sarray) | ||
|
@@ -1950,27 +1951,6 @@ _MONTH_NUMBERS = {k: i for i, k in enumerate(_MONTHS)} | |
_MONTH_ALIASES = {(k + 1): v for k, v in enumerate(_MONTHS)} | ||
|
||
|
||
cpdef object _get_rule_month(object source, object default='DEC'): | ||
""" | ||
Return starting month of given freq, default is December. | ||
|
||
Example | ||
------- | ||
>>> _get_rule_month('D') | ||
'DEC' | ||
|
||
>>> _get_rule_month('A-JAN') | ||
'JAN' | ||
""" | ||
if hasattr(source, 'freqstr'): | ||
source = source.freqstr | ||
source = source.upper() | ||
if '-' not in source: | ||
return default | ||
else: | ||
return source.split('-')[1] | ||
|
||
|
||
cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): | ||
""" | ||
convert the ndarray according to the unit | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,10 @@ | ||
# -*- coding: utf-8 -*- | ||
# cython: profile=False | ||
|
||
cpdef object _get_rule_month(object source, object default=*) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is the main one to de-privatize |
||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are these used anywhere but in period.pyx (for cython usage)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's a duplicate of it in tslibs.parsing that should be replaced with a cimport. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (things will be easier on my end if we can clean those up in a follow-up; im guessing you'll also suggest de-privatizing _get_rule_month) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. that's fine, pls add to the list. |
||
cpdef get_freq_code(freqstr) | ||
cpdef get_freq(freq) | ||
cpdef get_base_alias(freqstr) | ||
cpdef get_to_timestamp_base(base) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should de-privatize get_freq_str at some point (add to list) |
||
cpdef _get_freq_str(base, mult=*) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,9 +6,32 @@ cimport cython | |
|
||
import numpy as np | ||
cimport numpy as np | ||
from numpy cimport int64_t | ||
np.import_array() | ||
|
||
from util cimport is_integer_object | ||
from util cimport is_integer_object, is_string_object | ||
|
||
|
||
#---------------------------------------------------------------------- | ||
cpdef object _get_rule_month(object source, object default='DEC'): | ||
""" | ||
Return starting month of given freq, default is December. | ||
|
||
Example | ||
------- | ||
>>> _get_rule_month('D') | ||
'DEC' | ||
|
||
>>> _get_rule_month('A-JAN') | ||
'JAN' | ||
""" | ||
if hasattr(source, 'freqstr'): | ||
source = source.freqstr | ||
source = source.upper() | ||
if '-' not in source: | ||
return default | ||
else: | ||
return source.split('-')[1] | ||
|
||
|
||
cpdef get_freq_code(freqstr): | ||
|
@@ -202,3 +225,242 @@ cpdef _period_str_to_code(freqstr): | |
return _period_code_map[freqstr] | ||
except KeyError: | ||
raise ValueError(_INVALID_FREQ_ERROR.format(freqstr)) | ||
|
||
|
||
cpdef _get_freq_str(base, mult=1): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. de-privatize There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will do. |
||
code = _reverse_period_code_map.get(base) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a doc-string |
||
if mult == 1: | ||
return code | ||
return str(mult) + code | ||
|
||
|
||
cpdef get_base_alias(freqstr): | ||
""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add Parameters, Returns There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. return str? (or do we just use object) |
||
Returns the base frequency alias, e.g., '5D' -> 'D' | ||
""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add parameters/returns. |
||
return _base_and_stride(freqstr)[0] | ||
|
||
|
||
class FreqGroup(object): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you move this class to the top |
||
FR_ANN = 1000 | ||
FR_QTR = 2000 | ||
FR_MTH = 3000 | ||
FR_WK = 4000 | ||
FR_BUS = 5000 | ||
FR_DAY = 6000 | ||
FR_HR = 7000 | ||
FR_MIN = 8000 | ||
FR_SEC = 9000 | ||
FR_MS = 10000 | ||
FR_US = 11000 | ||
FR_NS = 12000 | ||
|
||
|
||
cpdef get_to_timestamp_base(base): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. at some point add types to these (and Parameters in doc-string). if we are not actually calling from python change to cdef (these can be added to list or done here) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To the list they go. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. type |
||
""" | ||
Return frequency code group used for base of to_timestamp against | ||
frequency code. | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add Parameters, Returns |
||
Example | ||
------- | ||
# Return day freq code against longer freq than day | ||
>>> get_to_timestamp_base(get_freq_code('D')[0]) | ||
6000 | ||
>>> get_to_timestamp_base(get_freq_code('W')[0]) | ||
6000 | ||
>>> get_to_timestamp_base(get_freq_code('M')[0]) | ||
6000 | ||
|
||
# Return second freq code against hour between second | ||
>>> get_to_timestamp_base(get_freq_code('H')[0]) | ||
9000 | ||
>>> get_to_timestamp_base(get_freq_code('S')[0]) | ||
9000 | ||
""" | ||
if base < FreqGroup.FR_BUS: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ? |
||
return FreqGroup.FR_DAY | ||
if FreqGroup.FR_HR <= base <= FreqGroup.FR_SEC: | ||
return FreqGroup.FR_SEC | ||
return base | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a considtion for base < FR_NS. (and make these if/else). the final entry should be a raise (as base is out of range). |
||
|
||
|
||
cpdef get_freq(freq): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. type There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would have to be object in and object out. Does that actually help? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes it helps from a readibility perspective |
||
""" | ||
Return frequency code of given frequency str. | ||
If input is not string, return input as it is. | ||
|
||
Example | ||
------- | ||
>>> get_freq('A') | ||
1000 | ||
|
||
>>> get_freq('3A') | ||
1000 | ||
""" | ||
if is_string_object(freq): | ||
base, mult = get_freq_code(freq) | ||
freq = base | ||
return freq | ||
|
||
|
||
#---------------------------------------------------------------------- | ||
# Frequency comparison | ||
|
||
def is_subperiod(source, target): | ||
""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. prob can be cpdef? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. type There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. cpdef bool |
||
Returns True if downsampling is possible between source and target | ||
frequencies | ||
|
||
Parameters | ||
---------- | ||
source : string | ||
Frequency converting from | ||
target : string | ||
Frequency converting to | ||
|
||
Returns | ||
------- | ||
is_subperiod : boolean | ||
""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. later on can you come back and add examples to functions w/o |
||
|
||
if target is None or source is None: | ||
return False | ||
source = _maybe_coerce_freq(source) | ||
target = _maybe_coerce_freq(target) | ||
|
||
if _is_annual(target): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. want to de-privatize these (again can add to list) |
||
if _is_quarterly(source): | ||
return _quarter_months_conform(_get_rule_month(source), | ||
_get_rule_month(target)) | ||
return source in ['D', 'C', 'B', 'M', 'H', 'T', 'S', 'L', 'U', 'N'] | ||
elif _is_quarterly(target): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this could be encoded in a dict (the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These are used outside of _libs and aren't too strict about typing. Do we want passing something non-hashable to raise? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. wasn't talking about the |
||
return source in ['D', 'C', 'B', 'M', 'H', 'T', 'S', 'L', 'U', 'N'] | ||
elif _is_monthly(target): | ||
return source in ['D', 'C', 'B', 'H', 'T', 'S', 'L', 'U', 'N'] | ||
elif _is_weekly(target): | ||
return source in [target, 'D', 'C', 'B', 'H', 'T', 'S', 'L', 'U', 'N'] | ||
elif target == 'B': | ||
return source in ['B', 'H', 'T', 'S', 'L', 'U', 'N'] | ||
elif target == 'C': | ||
return source in ['C', 'H', 'T', 'S', 'L', 'U', 'N'] | ||
elif target == 'D': | ||
return source in ['D', 'H', 'T', 'S', 'L', 'U', 'N'] | ||
elif target == 'H': | ||
return source in ['H', 'T', 'S', 'L', 'U', 'N'] | ||
elif target == 'T': | ||
return source in ['T', 'S', 'L', 'U', 'N'] | ||
elif target == 'S': | ||
return source in ['S', 'L', 'U', 'N'] | ||
elif target == 'L': | ||
return source in ['L', 'U', 'N'] | ||
elif target == 'U': | ||
return source in ['U', 'N'] | ||
elif target == 'N': | ||
return source in ['N'] | ||
|
||
|
||
def is_superperiod(source, target): | ||
""" | ||
Returns True if upsampling is possible between source and target | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same |
||
frequencies | ||
|
||
Parameters | ||
---------- | ||
source : string | ||
Frequency converting from | ||
target : string | ||
Frequency converting to | ||
|
||
Returns | ||
------- | ||
is_superperiod : boolean | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same |
||
""" | ||
if target is None or source is None: | ||
return False | ||
source = _maybe_coerce_freq(source) | ||
target = _maybe_coerce_freq(target) | ||
|
||
if _is_annual(source): | ||
if _is_annual(target): | ||
return _get_rule_month(source) == _get_rule_month(target) | ||
|
||
if _is_quarterly(target): | ||
smonth = _get_rule_month(source) | ||
tmonth = _get_rule_month(target) | ||
return _quarter_months_conform(smonth, tmonth) | ||
return target in ['D', 'C', 'B', 'M', 'H', 'T', 'S', 'L', 'U', 'N'] | ||
elif _is_quarterly(source): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same (maybe even same dict) |
||
return target in ['D', 'C', 'B', 'M', 'H', 'T', 'S', 'L', 'U', 'N'] | ||
elif _is_monthly(source): | ||
return target in ['D', 'C', 'B', 'H', 'T', 'S', 'L', 'U', 'N'] | ||
elif _is_weekly(source): | ||
return target in [source, 'D', 'C', 'B', 'H', 'T', 'S', 'L', 'U', 'N'] | ||
elif source == 'B': | ||
return target in ['D', 'C', 'B', 'H', 'T', 'S', 'L', 'U', 'N'] | ||
elif source == 'C': | ||
return target in ['D', 'C', 'B', 'H', 'T', 'S', 'L', 'U', 'N'] | ||
elif source == 'D': | ||
return target in ['D', 'C', 'B', 'H', 'T', 'S', 'L', 'U', 'N'] | ||
elif source == 'H': | ||
return target in ['H', 'T', 'S', 'L', 'U', 'N'] | ||
elif source == 'T': | ||
return target in ['T', 'S', 'L', 'U', 'N'] | ||
elif source == 'S': | ||
return target in ['S', 'L', 'U', 'N'] | ||
elif source == 'L': | ||
return target in ['L', 'U', 'N'] | ||
elif source == 'U': | ||
return target in ['U', 'N'] | ||
elif source == 'N': | ||
return target in ['N'] | ||
|
||
|
||
def _maybe_coerce_freq(code): | ||
""" we might need to coerce a code to a rule_code | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. cpdef? type |
||
and uppercase it | ||
|
||
Parameters | ||
---------- | ||
source : string | ||
Frequency converting from | ||
|
||
Returns | ||
------- | ||
string code | ||
""" | ||
assert code is not None | ||
if getattr(code, '_typ', None) == 'dateoffset': | ||
# i.e. isinstance(code, ABCDateOffset): | ||
code = code.rule_code | ||
return code.upper() | ||
|
||
|
||
_MONTHS = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', | ||
'AUG', 'SEP', 'OCT', 'NOV', 'DEC'] | ||
_MONTH_NUMBERS = {k: i for i, k in enumerate(_MONTHS)} | ||
|
||
|
||
def _quarter_months_conform(source, target): | ||
snum = _MONTH_NUMBERS[source] | ||
tnum = _MONTH_NUMBERS[target] | ||
return snum % 3 == tnum % 3 | ||
|
||
|
||
def _is_annual(rule): | ||
rule = rule.upper() | ||
return rule == 'A' or rule.startswith('A-') | ||
|
||
|
||
def _is_quarterly(rule): | ||
rule = rule.upper() | ||
return rule == 'Q' or rule.startswith('Q-') or rule.startswith('BQ') | ||
|
||
|
||
def _is_monthly(rule): | ||
rule = rule.upper() | ||
return rule == 'M' or rule == 'BM' | ||
|
||
|
||
def _is_weekly(rule): | ||
rule = rule.upper() | ||
return rule == 'W' or rule.startswith('W-') |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can't this be a cimport?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
At the moment
_MONTH_NUMBERS
is not cdef'd. It is imported by tseries.frequencies, but not used there.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
oh, then take it out