Skip to content

Commit 58a3a90

Browse files
jbrockmendelpeterpanmj
authored andcommitted
Start porting offsets to cython (pandas-dev#17830)
1 parent b3e8f69 commit 58a3a90

File tree

6 files changed

+224
-176
lines changed

6 files changed

+224
-176
lines changed

doc/source/whatsnew/v0.22.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ Other API Changes
4242

4343
- ``NaT`` division with :class:`datetime.timedelta` will now return ``NaN`` instead of raising (:issue:`17876`)
4444
- :class:`Timestamp` will no longer silently ignore unused or invalid `tz` or `tzinfo` arguments (:issue:`17690`)
45-
-
45+
- :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the `tseries.offsets` module (:issue:`17830`)
4646
-
4747

4848
.. _whatsnew_0220.deprecations:

pandas/_libs/tslibs/offsets.pyx

+208
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
# -*- coding: utf-8 -*-
2+
# cython: profile=False
3+
4+
cimport cython
5+
6+
import time
7+
from cpython.datetime cimport time as dt_time
8+
9+
import numpy as np
10+
cimport numpy as np
11+
np.import_array()
12+
13+
14+
from util cimport is_string_object
15+
16+
17+
from pandas._libs.tslib import pydt_to_i8, tz_convert_single
18+
19+
# ---------------------------------------------------------------------
20+
# Constants
21+
22+
# Duplicated in tslib
23+
_MONTHS = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL',
24+
'AUG', 'SEP', 'OCT', 'NOV', 'DEC']
25+
_int_to_month = {(k + 1): v for k, v in enumerate(_MONTHS)}
26+
_month_to_int = dict((v, k) for k, v in _int_to_month.items())
27+
28+
29+
class WeekDay(object):
30+
MON = 0
31+
TUE = 1
32+
WED = 2
33+
THU = 3
34+
FRI = 4
35+
SAT = 5
36+
SUN = 6
37+
38+
39+
_int_to_weekday = {
40+
WeekDay.MON: 'MON',
41+
WeekDay.TUE: 'TUE',
42+
WeekDay.WED: 'WED',
43+
WeekDay.THU: 'THU',
44+
WeekDay.FRI: 'FRI',
45+
WeekDay.SAT: 'SAT',
46+
WeekDay.SUN: 'SUN'}
47+
48+
_weekday_to_int = {_int_to_weekday[key]: key for key in _int_to_weekday}
49+
50+
51+
_offset_to_period_map = {
52+
'WEEKDAY': 'D',
53+
'EOM': 'M',
54+
'BM': 'M',
55+
'BQS': 'Q',
56+
'QS': 'Q',
57+
'BQ': 'Q',
58+
'BA': 'A',
59+
'AS': 'A',
60+
'BAS': 'A',
61+
'MS': 'M',
62+
'D': 'D',
63+
'C': 'C',
64+
'B': 'B',
65+
'T': 'T',
66+
'S': 'S',
67+
'L': 'L',
68+
'U': 'U',
69+
'N': 'N',
70+
'H': 'H',
71+
'Q': 'Q',
72+
'A': 'A',
73+
'W': 'W',
74+
'M': 'M',
75+
'Y': 'A',
76+
'BY': 'A',
77+
'YS': 'A',
78+
'BYS': 'A'}
79+
80+
need_suffix = ['QS', 'BQ', 'BQS', 'YS', 'AS', 'BY', 'BA', 'BYS', 'BAS']
81+
82+
83+
for __prefix in need_suffix:
84+
for _m in _MONTHS:
85+
key = '%s-%s' % (__prefix, _m)
86+
_offset_to_period_map[key] = _offset_to_period_map[__prefix]
87+
88+
for __prefix in ['A', 'Q']:
89+
for _m in _MONTHS:
90+
_alias = '%s-%s' % (__prefix, _m)
91+
_offset_to_period_map[_alias] = _alias
92+
93+
_days = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN']
94+
for _d in _days:
95+
_offset_to_period_map['W-%s' % _d] = 'W-%s' % _d
96+
97+
98+
# ---------------------------------------------------------------------
99+
# Misc Helpers
100+
101+
def as_datetime(obj):
102+
f = getattr(obj, 'to_pydatetime', None)
103+
if f is not None:
104+
obj = f()
105+
return obj
106+
107+
108+
def _is_normalized(dt):
109+
if (dt.hour != 0 or dt.minute != 0 or dt.second != 0 or
110+
dt.microsecond != 0 or getattr(dt, 'nanosecond', 0) != 0):
111+
return False
112+
return True
113+
114+
115+
# ---------------------------------------------------------------------
116+
# Business Helpers
117+
118+
def _get_firstbday(wkday):
119+
"""
120+
wkday is the result of monthrange(year, month)
121+
122+
If it's a saturday or sunday, increment first business day to reflect this
123+
"""
124+
first = 1
125+
if wkday == 5: # on Saturday
126+
first = 3
127+
elif wkday == 6: # on Sunday
128+
first = 2
129+
return first
130+
131+
132+
def _get_calendar(weekmask, holidays, calendar):
133+
"""Generate busdaycalendar"""
134+
if isinstance(calendar, np.busdaycalendar):
135+
if not holidays:
136+
holidays = tuple(calendar.holidays)
137+
elif not isinstance(holidays, tuple):
138+
holidays = tuple(holidays)
139+
else:
140+
# trust that calendar.holidays and holidays are
141+
# consistent
142+
pass
143+
return calendar, holidays
144+
145+
if holidays is None:
146+
holidays = []
147+
try:
148+
holidays = holidays + calendar.holidays().tolist()
149+
except AttributeError:
150+
pass
151+
holidays = [_to_dt64(dt, dtype='datetime64[D]') for dt in holidays]
152+
holidays = tuple(sorted(holidays))
153+
154+
kwargs = {'weekmask': weekmask}
155+
if holidays:
156+
kwargs['holidays'] = holidays
157+
158+
busdaycalendar = np.busdaycalendar(**kwargs)
159+
return busdaycalendar, holidays
160+
161+
162+
def _to_dt64(dt, dtype='datetime64'):
163+
# Currently
164+
# > np.datetime64(dt.datetime(2013,5,1),dtype='datetime64[D]')
165+
# numpy.datetime64('2013-05-01T02:00:00.000000+0200')
166+
# Thus astype is needed to cast datetime to datetime64[D]
167+
if getattr(dt, 'tzinfo', None) is not None:
168+
i8 = pydt_to_i8(dt)
169+
dt = tz_convert_single(i8, 'UTC', dt.tzinfo)
170+
dt = np.int64(dt).astype('datetime64[ns]')
171+
else:
172+
dt = np.datetime64(dt)
173+
if dt.dtype.name != dtype:
174+
dt = dt.astype(dtype)
175+
return dt
176+
177+
178+
# ---------------------------------------------------------------------
179+
# Validation
180+
181+
182+
def _validate_business_time(t_input):
183+
if is_string_object(t_input):
184+
try:
185+
t = time.strptime(t_input, '%H:%M')
186+
return dt_time(hour=t.tm_hour, minute=t.tm_min)
187+
except ValueError:
188+
raise ValueError("time data must match '%H:%M' format")
189+
elif isinstance(t_input, dt_time):
190+
if t_input.second != 0 or t_input.microsecond != 0:
191+
raise ValueError(
192+
"time data must be specified only with hour and minute")
193+
return t_input
194+
else:
195+
raise ValueError("time data must be string or datetime.time")
196+
197+
# ---------------------------------------------------------------------
198+
# Mixins & Singletons
199+
200+
201+
class ApplyTypeError(TypeError):
202+
# sentinel class for catching the apply error to return NotImplemented
203+
pass
204+
205+
206+
# TODO: unused. remove?
207+
class CacheableOffset(object):
208+
_cacheable = True

pandas/tests/tseries/test_offsets.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,18 @@
1717
get_offset, get_standard_freq)
1818
from pandas.core.indexes.datetimes import (
1919
_to_m8, DatetimeIndex, _daterange_cache)
20+
from pandas._libs.tslibs.offsets import WeekDay, CacheableOffset
2021
from pandas.tseries.offsets import (BDay, CDay, BQuarterEnd, BMonthEnd,
2122
BusinessHour, WeekOfMonth, CBMonthEnd,
22-
CustomBusinessHour, WeekDay,
23+
CustomBusinessHour,
2324
CBMonthBegin, BYearEnd, MonthEnd,
2425
MonthBegin, SemiMonthBegin, SemiMonthEnd,
2526
BYearBegin, QuarterBegin, BQuarterBegin,
2627
BMonthBegin, DateOffset, Week, YearBegin,
2728
YearEnd, Hour, Minute, Second, Day, Micro,
2829
QuarterEnd, BusinessMonthEnd, FY5253,
2930
Milli, Nano, Easter, FY5253Quarter,
30-
LastWeekOfMonth, CacheableOffset)
31+
LastWeekOfMonth)
3132
from pandas.core.tools.datetimes import (
3233
format, ole2datetime, parse_time_string,
3334
to_datetime, DateParseError)

pandas/tseries/frequencies.py

+1-46
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,7 @@ def _get_freq_str(base, mult=1):
312312
# ---------------------------------------------------------------------
313313
# Offset names ("time rules") and related functions
314314

315-
315+
from pandas._libs.tslibs.offsets import _offset_to_period_map
316316
from pandas.tseries.offsets import (Nano, Micro, Milli, Second, # noqa
317317
Minute, Hour,
318318
Day, BDay, CDay, Week, MonthBegin,
@@ -328,51 +328,6 @@ def _get_freq_str(base, mult=1):
328328
#: cache of previously seen offsets
329329
_offset_map = {}
330330

331-
_offset_to_period_map = {
332-
'WEEKDAY': 'D',
333-
'EOM': 'M',
334-
'BM': 'M',
335-
'BQS': 'Q',
336-
'QS': 'Q',
337-
'BQ': 'Q',
338-
'BA': 'A',
339-
'AS': 'A',
340-
'BAS': 'A',
341-
'MS': 'M',
342-
'D': 'D',
343-
'C': 'C',
344-
'B': 'B',
345-
'T': 'T',
346-
'S': 'S',
347-
'L': 'L',
348-
'U': 'U',
349-
'N': 'N',
350-
'H': 'H',
351-
'Q': 'Q',
352-
'A': 'A',
353-
'W': 'W',
354-
'M': 'M',
355-
'Y': 'A',
356-
'BY': 'A',
357-
'YS': 'A',
358-
'BYS': 'A',
359-
}
360-
361-
need_suffix = ['QS', 'BQ', 'BQS', 'YS', 'AS', 'BY', 'BA', 'BYS', 'BAS']
362-
for __prefix in need_suffix:
363-
for _m in tslib._MONTHS:
364-
_alias = '{prefix}-{month}'.format(prefix=__prefix, month=_m)
365-
_offset_to_period_map[_alias] = _offset_to_period_map[__prefix]
366-
for __prefix in ['A', 'Q']:
367-
for _m in tslib._MONTHS:
368-
_alias = '{prefix}-{month}'.format(prefix=__prefix, month=_m)
369-
_offset_to_period_map[_alias] = _alias
370-
371-
_days = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN']
372-
for _d in _days:
373-
_alias = 'W-{day}'.format(day=_d)
374-
_offset_to_period_map[_alias] = _alias
375-
376331

377332
def get_period_alias(offset_str):
378333
""" alias to closest period strings BQ->Q etc"""

0 commit comments

Comments
 (0)