Skip to content

Commit 239acb2

Browse files
jbrockmendelalanbato
authored andcommitted
PERF: Implement get_freq_code in cython frequencies (pandas-dev#17422)
1 parent fc07432 commit 239acb2

File tree

5 files changed

+235
-78
lines changed

5 files changed

+235
-78
lines changed

asv_bench/benchmarks/period.py

+29
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,35 @@
22
from pandas import Series, Period, PeriodIndex, date_range
33

44

5+
class PeriodProperties(object):
6+
def setup(self):
7+
self.per = Period('2012-06-01', freq='M')
8+
9+
def time_year(self):
10+
self.per.year
11+
12+
def time_month(self):
13+
self.per.month
14+
15+
def time_quarter(self):
16+
self.per.quarter
17+
18+
def time_day(self):
19+
self.per.day
20+
21+
def time_hour(self):
22+
self.per.hour
23+
24+
def time_minute(self):
25+
self.per.second
26+
27+
def time_second(self):
28+
self.per.second
29+
30+
def time_leap_year(self):
31+
self.per.is_leapyear
32+
33+
534
class Constructor(object):
635
goal_time = 0.2
736

pandas/_libs/tslibs/__init__.py

Whitespace-only changes.

pandas/_libs/tslibs/frequencies.pyx

+201
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
# -*- coding: utf-8 -*-
2+
# cython: profile=False
3+
import re
4+
5+
cimport cython
6+
7+
import numpy as np
8+
cimport numpy as np
9+
np.import_array()
10+
11+
from util cimport is_integer_object
12+
13+
14+
cpdef get_freq_code(freqstr):
15+
"""
16+
Return freq str or tuple to freq code and stride (mult)
17+
18+
Parameters
19+
----------
20+
freqstr : str or tuple
21+
22+
Returns
23+
-------
24+
return : tuple of base frequency code and stride (mult)
25+
26+
Example
27+
-------
28+
>>> get_freq_code('3D')
29+
(6000, 3)
30+
31+
>>> get_freq_code('D')
32+
(6000, 1)
33+
34+
>>> get_freq_code(('D', 3))
35+
(6000, 3)
36+
"""
37+
if getattr(freqstr, '_typ', None) == 'dateoffset':
38+
freqstr = (freqstr.rule_code, freqstr.n)
39+
40+
if isinstance(freqstr, tuple):
41+
if (is_integer_object(freqstr[0]) and
42+
is_integer_object(freqstr[1])):
43+
# e.g., freqstr = (2000, 1)
44+
return freqstr
45+
else:
46+
# e.g., freqstr = ('T', 5)
47+
try:
48+
code = _period_str_to_code(freqstr[0])
49+
stride = freqstr[1]
50+
except:
51+
if is_integer_object(freqstr[1]):
52+
raise
53+
code = _period_str_to_code(freqstr[1])
54+
stride = freqstr[0]
55+
return code, stride
56+
57+
if is_integer_object(freqstr):
58+
return (freqstr, 1)
59+
60+
base, stride = _base_and_stride(freqstr)
61+
code = _period_str_to_code(base)
62+
63+
return code, stride
64+
65+
66+
# hack to handle WOM-1MON
67+
opattern = re.compile(
68+
r'([\-]?\d*|[\-]?\d*\.\d*)\s*([A-Za-z]+([\-][\dA-Za-z\-]+)?)'
69+
)
70+
71+
72+
cpdef _base_and_stride(freqstr):
73+
"""
74+
Return base freq and stride info from string representation
75+
76+
Examples
77+
--------
78+
_freq_and_stride('5Min') -> 'Min', 5
79+
"""
80+
groups = opattern.match(freqstr)
81+
82+
if not groups:
83+
raise ValueError("Could not evaluate {freq}".format(freq=freqstr))
84+
85+
stride = groups.group(1)
86+
87+
if len(stride):
88+
stride = int(stride)
89+
else:
90+
stride = 1
91+
92+
base = groups.group(2)
93+
94+
return (base, stride)
95+
96+
97+
# ---------------------------------------------------------------------
98+
# Period codes
99+
100+
# period frequency constants corresponding to scikits timeseries
101+
# originals
102+
_period_code_map = {
103+
# Annual freqs with various fiscal year ends.
104+
# eg, 2005 for A-FEB runs Mar 1, 2004 to Feb 28, 2005
105+
"A-DEC": 1000, # Annual - December year end
106+
"A-JAN": 1001, # Annual - January year end
107+
"A-FEB": 1002, # Annual - February year end
108+
"A-MAR": 1003, # Annual - March year end
109+
"A-APR": 1004, # Annual - April year end
110+
"A-MAY": 1005, # Annual - May year end
111+
"A-JUN": 1006, # Annual - June year end
112+
"A-JUL": 1007, # Annual - July year end
113+
"A-AUG": 1008, # Annual - August year end
114+
"A-SEP": 1009, # Annual - September year end
115+
"A-OCT": 1010, # Annual - October year end
116+
"A-NOV": 1011, # Annual - November year end
117+
118+
# Quarterly frequencies with various fiscal year ends.
119+
# eg, Q42005 for Q-OCT runs Aug 1, 2005 to Oct 31, 2005
120+
"Q-DEC": 2000, # Quarterly - December year end
121+
"Q-JAN": 2001, # Quarterly - January year end
122+
"Q-FEB": 2002, # Quarterly - February year end
123+
"Q-MAR": 2003, # Quarterly - March year end
124+
"Q-APR": 2004, # Quarterly - April year end
125+
"Q-MAY": 2005, # Quarterly - May year end
126+
"Q-JUN": 2006, # Quarterly - June year end
127+
"Q-JUL": 2007, # Quarterly - July year end
128+
"Q-AUG": 2008, # Quarterly - August year end
129+
"Q-SEP": 2009, # Quarterly - September year end
130+
"Q-OCT": 2010, # Quarterly - October year end
131+
"Q-NOV": 2011, # Quarterly - November year end
132+
133+
"M": 3000, # Monthly
134+
135+
"W-SUN": 4000, # Weekly - Sunday end of week
136+
"W-MON": 4001, # Weekly - Monday end of week
137+
"W-TUE": 4002, # Weekly - Tuesday end of week
138+
"W-WED": 4003, # Weekly - Wednesday end of week
139+
"W-THU": 4004, # Weekly - Thursday end of week
140+
"W-FRI": 4005, # Weekly - Friday end of week
141+
"W-SAT": 4006, # Weekly - Saturday end of week
142+
143+
"B": 5000, # Business days
144+
"D": 6000, # Daily
145+
"H": 7000, # Hourly
146+
"T": 8000, # Minutely
147+
"S": 9000, # Secondly
148+
"L": 10000, # Millisecondly
149+
"U": 11000, # Microsecondly
150+
"N": 12000, # Nanosecondly
151+
}
152+
153+
# Yearly aliases; careful not to put these in _reverse_period_code_map
154+
_period_code_map.update({'Y' + key[1:]: _period_code_map[key]
155+
for key in _period_code_map
156+
if key.startswith('A-')})
157+
158+
_period_code_map.update({
159+
"Q": 2000, # Quarterly - December year end (default quarterly)
160+
"A": 1000, # Annual
161+
"W": 4000, # Weekly
162+
"C": 5000, # Custom Business Day
163+
})
164+
165+
_dont_uppercase = set(('MS', 'ms'))
166+
167+
_lite_rule_alias = {
168+
'W': 'W-SUN',
169+
'Q': 'Q-DEC',
170+
171+
'A': 'A-DEC', # YearEnd(month=12),
172+
'Y': 'A-DEC',
173+
'AS': 'AS-JAN', # YearBegin(month=1),
174+
'YS': 'AS-JAN',
175+
'BA': 'BA-DEC', # BYearEnd(month=12),
176+
'BY': 'BA-DEC',
177+
'BAS': 'BAS-JAN', # BYearBegin(month=1),
178+
'BYS': 'BAS-JAN',
179+
180+
'Min': 'T',
181+
'min': 'T',
182+
'ms': 'L',
183+
'us': 'U',
184+
'ns': 'N'}
185+
186+
_INVALID_FREQ_ERROR = "Invalid frequency: {0}"
187+
188+
189+
cpdef _period_str_to_code(freqstr):
190+
freqstr = _lite_rule_alias.get(freqstr, freqstr)
191+
192+
if freqstr not in _dont_uppercase:
193+
lower = freqstr.lower()
194+
freqstr = _lite_rule_alias.get(lower, freqstr)
195+
196+
if freqstr not in _dont_uppercase:
197+
freqstr = freqstr.upper()
198+
try:
199+
return _period_code_map[freqstr]
200+
except KeyError:
201+
raise ValueError(_INVALID_FREQ_ERROR.format(freqstr))

pandas/tseries/frequencies.py

+1-78
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
from pandas.core.dtypes.generic import ABCSeries
1010
from pandas.core.dtypes.common import (
11-
is_integer,
1211
is_period_arraylike,
1312
is_timedelta64_dtype,
1413
is_datetime64_dtype)
@@ -21,6 +20,7 @@
2120

2221
from pandas._libs import lib, tslib
2322
from pandas._libs.tslib import Timedelta
23+
from pandas._libs.tslibs.frequencies import get_freq_code, _base_and_stride
2424
from pytz import AmbiguousTimeError
2525

2626

@@ -298,58 +298,6 @@ def get_freq(freq):
298298
return freq
299299

300300

301-
def get_freq_code(freqstr):
302-
"""
303-
Return freq str or tuple to freq code and stride (mult)
304-
305-
Parameters
306-
----------
307-
freqstr : str or tuple
308-
309-
Returns
310-
-------
311-
return : tuple of base frequency code and stride (mult)
312-
313-
Example
314-
-------
315-
>>> get_freq_code('3D')
316-
(6000, 3)
317-
318-
>>> get_freq_code('D')
319-
(6000, 1)
320-
321-
>>> get_freq_code(('D', 3))
322-
(6000, 3)
323-
"""
324-
if isinstance(freqstr, DateOffset):
325-
freqstr = (freqstr.rule_code, freqstr.n)
326-
327-
if isinstance(freqstr, tuple):
328-
if (is_integer(freqstr[0]) and
329-
is_integer(freqstr[1])):
330-
# e.g., freqstr = (2000, 1)
331-
return freqstr
332-
else:
333-
# e.g., freqstr = ('T', 5)
334-
try:
335-
code = _period_str_to_code(freqstr[0])
336-
stride = freqstr[1]
337-
except:
338-
if is_integer(freqstr[1]):
339-
raise
340-
code = _period_str_to_code(freqstr[1])
341-
stride = freqstr[0]
342-
return code, stride
343-
344-
if is_integer(freqstr):
345-
return (freqstr, 1)
346-
347-
base, stride = _base_and_stride(freqstr)
348-
code = _period_str_to_code(base)
349-
350-
return code, stride
351-
352-
353301
def _get_freq_str(base, mult=1):
354302
code = _reverse_period_code_map.get(base)
355303
if mult == 1:
@@ -577,31 +525,6 @@ def to_offset(freq):
577525
)
578526

579527

580-
def _base_and_stride(freqstr):
581-
"""
582-
Return base freq and stride info from string representation
583-
584-
Examples
585-
--------
586-
_freq_and_stride('5Min') -> 'Min', 5
587-
"""
588-
groups = opattern.match(freqstr)
589-
590-
if not groups:
591-
raise ValueError("Could not evaluate {freq}".format(freq=freqstr))
592-
593-
stride = groups.group(1)
594-
595-
if len(stride):
596-
stride = int(stride)
597-
else:
598-
stride = 1
599-
600-
base = groups.group(2)
601-
602-
return (base, stride)
603-
604-
605528
def get_base_alias(freqstr):
606529
"""
607530
Returns the base frequency alias, e.g., '5D' -> 'D'

setup.py

+4
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,7 @@ class CheckSDist(sdist_class):
341341
'pandas/_libs/window.pyx',
342342
'pandas/_libs/sparse.pyx',
343343
'pandas/_libs/parsers.pyx',
344+
'panads/_libs/tslibs/frequencies.pyx',
344345
'pandas/io/sas/sas.pyx']
345346

346347
def initialize_options(self):
@@ -492,6 +493,8 @@ def pxd(name):
492493
'sources': ['pandas/_libs/src/datetime/np_datetime.c',
493494
'pandas/_libs/src/datetime/np_datetime_strings.c',
494495
'pandas/_libs/src/period_helper.c']},
496+
'_libs.tslibs.frequencies': {'pyxfile': '_libs/tslibs/frequencies',
497+
'pxdfiles': ['_libs/src/util']},
495498
'_libs.index': {'pyxfile': '_libs/index',
496499
'sources': ['pandas/_libs/src/datetime/np_datetime.c',
497500
'pandas/_libs/src/datetime/np_datetime_strings.c'],
@@ -653,6 +656,7 @@ def pxd(name):
653656
'pandas.io.formats',
654657
'pandas.io.clipboard',
655658
'pandas._libs',
659+
'pandas._libs.tslibs',
656660
'pandas.plotting',
657661
'pandas.stats',
658662
'pandas.types',

0 commit comments

Comments
 (0)