Skip to content

Commit 3e506a3

Browse files
jbrockmendeljreback
authored andcommitted
standalone implementation of ccalendar (pandas-dev#18540)
1 parent 279578c commit 3e506a3

File tree

5 files changed

+192
-32
lines changed

5 files changed

+192
-32
lines changed

pandas/_libs/tslibs/ccalendar.pxd

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# -*- coding: utf-8 -*-
2+
# cython: profile=False
3+
4+
from cython cimport Py_ssize_t
5+
6+
from numpy cimport int64_t, int32_t
7+
8+
9+
cdef int dayofweek(int y, int m, int m) nogil
10+
cdef bint is_leapyear(int64_t year) nogil
11+
cpdef int32_t get_days_in_month(int year, Py_ssize_t month) nogil
12+
cpdef int32_t get_week_of_year(int year, int month, int day) nogil

pandas/_libs/tslibs/ccalendar.pyx

+163
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
# -*- coding: utf-8 -*-
2+
# cython: profile=False
3+
# cython: boundscheck=False
4+
"""
5+
Cython implementations of functions resembling the stdlib calendar module
6+
"""
7+
8+
cimport cython
9+
from cython cimport Py_ssize_t
10+
11+
import numpy as np
12+
cimport numpy as np
13+
from numpy cimport int64_t, int32_t
14+
np.import_array()
15+
16+
17+
# ----------------------------------------------------------------------
18+
# Constants
19+
20+
# Slightly more performant cython lookups than a 2D table
21+
# The first 12 entries correspond to month lengths for non-leap years.
22+
# The remaining 12 entries give month lengths for leap years
23+
cdef int32_t* days_per_month_array = [
24+
31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
25+
31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
26+
27+
cdef int* sakamoto_arr = [0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4]
28+
29+
# The first 13 entries give the month days elapsed as of the first of month N
30+
# (or the total number of days in the year for N=13) in non-leap years.
31+
# The remaining 13 entries give the days elapsed in leap years.
32+
cdef int32_t* _month_offset = [
33+
0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365,
34+
0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366]
35+
36+
# ----------------------------------------------------------------------
37+
38+
39+
@cython.wraparound(False)
40+
@cython.boundscheck(False)
41+
cpdef inline int32_t get_days_in_month(int year, Py_ssize_t month) nogil:
42+
"""Return the number of days in the given month of the given year.
43+
44+
Parameters
45+
----------
46+
year : int
47+
month : int
48+
49+
Returns
50+
-------
51+
days_in_month : int
52+
53+
Notes
54+
-----
55+
Assumes that the arguments are valid. Passing a month not between 1 and 12
56+
risks a segfault.
57+
"""
58+
return days_per_month_array[12 * is_leapyear(year) + month - 1]
59+
60+
61+
@cython.wraparound(False)
62+
@cython.boundscheck(False)
63+
@cython.cdivision
64+
cdef int dayofweek(int y, int m, int d) nogil:
65+
"""Find the day of week for the date described by the Y/M/D triple y, m, d
66+
using Sakamoto's method, from wikipedia.
67+
68+
0 represents Monday. See [1]_.
69+
70+
Parameters
71+
----------
72+
y : int
73+
m : int
74+
d : int
75+
76+
Returns
77+
-------
78+
weekday : int
79+
80+
Notes
81+
-----
82+
Assumes that y, m, d, represents a valid date.
83+
84+
See Also
85+
--------
86+
[1] https://docs.python.org/3.6/library/calendar.html#calendar.weekday
87+
88+
[2] https://en.wikipedia.org/wiki/\
89+
Determination_of_the_day_of_the_week#Sakamoto.27s_methods
90+
"""
91+
cdef:
92+
int day
93+
94+
y -= m < 3
95+
day = (y + y / 4 - y / 100 + y / 400 + sakamoto_arr[m - 1] + d) % 7
96+
# convert to python day
97+
return (day + 6) % 7
98+
99+
100+
cdef bint is_leapyear(int64_t year) nogil:
101+
"""Returns 1 if the given year is a leap year, 0 otherwise.
102+
103+
Parameters
104+
----------
105+
year : int
106+
107+
Returns
108+
-------
109+
is_leap : bool
110+
"""
111+
return ((year & 0x3) == 0 and # year % 4 == 0
112+
((year % 100) != 0 or (year % 400) == 0))
113+
114+
115+
@cython.wraparound(False)
116+
@cython.boundscheck(False)
117+
cpdef int32_t get_week_of_year(int year, int month, int day) nogil:
118+
"""Return the ordinal week-of-year for the given day.
119+
120+
Parameters
121+
----------
122+
year : int
123+
month : int
124+
day : int
125+
126+
Returns
127+
-------
128+
week_of_year : int32_t
129+
130+
Notes
131+
-----
132+
Assumes the inputs describe a valid date.
133+
"""
134+
cdef:
135+
bint isleap, isleap_prev
136+
int32_t mo_off
137+
int32_t doy, dow
138+
int woy
139+
140+
isleap = is_leapyear(year)
141+
isleap_prev = is_leapyear(year - 1)
142+
143+
mo_off = _month_offset[isleap * 13 + month - 1]
144+
145+
doy = mo_off + day
146+
dow = dayofweek(year, month, day)
147+
148+
# estimate
149+
woy = (doy - 1) - dow + 3
150+
if woy >= 0:
151+
woy = woy / 7 + 1
152+
153+
# verify
154+
if woy < 0:
155+
if (woy > -2) or (woy == -2 and isleap_prev):
156+
woy = 53
157+
else:
158+
woy = 52
159+
elif woy == 53:
160+
if 31 - day + dow < 3:
161+
woy = 1
162+
163+
return woy

pandas/_libs/tslibs/fields.pyx

+6-30
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,10 @@ from numpy cimport ndarray, int64_t, int32_t, int8_t
1717
np.import_array()
1818

1919

20+
from ccalendar cimport (get_days_in_month, is_leapyear, dayofweek,
21+
get_week_of_year)
2022
from np_datetime cimport (pandas_datetimestruct, pandas_timedeltastruct,
21-
dt64_to_dtstruct, td64_to_tdstruct,
22-
days_per_month_table, is_leapyear, dayofweek)
23+
dt64_to_dtstruct, td64_to_tdstruct)
2324
from nattype cimport NPY_NAT
2425

2526

@@ -379,7 +380,7 @@ def get_date_field(ndarray[int64_t] dtindex, object field):
379380
ndarray[int32_t, ndim=2] _month_offset
380381
int isleap, isleap_prev
381382
pandas_datetimestruct dts
382-
int mo_off, doy, dow, woy
383+
int mo_off, doy, dow
383384

384385
_month_offset = np.array(
385386
[[ 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 ],
@@ -507,28 +508,7 @@ def get_date_field(ndarray[int64_t] dtindex, object field):
507508
continue
508509

509510
dt64_to_dtstruct(dtindex[i], &dts)
510-
isleap = is_leapyear(dts.year)
511-
isleap_prev = is_leapyear(dts.year - 1)
512-
mo_off = _month_offset[isleap, dts.month - 1]
513-
doy = mo_off + dts.day
514-
dow = dayofweek(dts.year, dts.month, dts.day)
515-
516-
# estimate
517-
woy = (doy - 1) - dow + 3
518-
if woy >= 0:
519-
woy = woy / 7 + 1
520-
521-
# verify
522-
if woy < 0:
523-
if (woy > -2) or (woy == -2 and isleap_prev):
524-
woy = 53
525-
else:
526-
woy = 52
527-
elif woy == 53:
528-
if 31 - dts.day + dow < 3:
529-
woy = 1
530-
531-
out[i] = woy
511+
out[i] = get_week_of_year(dts.year, dts.month, dts.day)
532512
return out
533513

534514
elif field == 'q':
@@ -551,7 +531,7 @@ def get_date_field(ndarray[int64_t] dtindex, object field):
551531
continue
552532

553533
dt64_to_dtstruct(dtindex[i], &dts)
554-
out[i] = days_in_month(dts)
534+
out[i] = get_days_in_month(dts.year, dts.month)
555535
return out
556536
elif field == 'is_leap_year':
557537
return isleapyear_arr(get_date_field(dtindex, 'Y'))
@@ -676,10 +656,6 @@ def get_timedelta_field(ndarray[int64_t] tdindex, object field):
676656
raise ValueError("Field %s not supported" % field)
677657

678658

679-
cdef inline int days_in_month(pandas_datetimestruct dts) nogil:
680-
return days_per_month_table[is_leapyear(dts.year)][dts.month - 1]
681-
682-
683659
cpdef isleapyear_arr(ndarray years):
684660
"""vectorized version of isleapyear; NaT evaluates as False"""
685661
cdef:

pandas/_libs/tslibs/timestamps.pyx

+5-1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ from util cimport (is_datetime64_object, is_timedelta64_object,
2020
is_integer_object, is_string_object,
2121
INT64_MAX)
2222

23+
cimport ccalendar
2324
from conversion import tz_localize_to_utc, date_normalize
2425
from conversion cimport (tz_convert_single, _TSObject,
2526
convert_to_tsobject, convert_datetime_to_tsobject)
@@ -699,6 +700,9 @@ class Timestamp(_Timestamp):
699700

700701
@property
701702
def week(self):
703+
if self.freq is None:
704+
# fastpath for non-business
705+
return ccalendar.get_week_of_year(self.year, self.month, self.day)
702706
return self._get_field('woy')
703707

704708
weekofyear = week
@@ -709,7 +713,7 @@ class Timestamp(_Timestamp):
709713

710714
@property
711715
def days_in_month(self):
712-
return self._get_field('dim')
716+
return ccalendar.get_days_in_month(self.year, self.month)
713717

714718
daysinmonth = days_in_month
715719

setup.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,7 @@ class CheckSDist(sdist_class):
317317
'pandas/_libs/skiplist.pyx',
318318
'pandas/_libs/sparse.pyx',
319319
'pandas/_libs/parsers.pyx',
320+
'pandas/_libs/tslibs/ccalendar.pyx',
320321
'pandas/_libs/tslibs/period.pyx',
321322
'pandas/_libs/tslibs/strptime.pyx',
322323
'pandas/_libs/tslibs/np_datetime.pyx',
@@ -537,6 +538,8 @@ def pxd(name):
537538
'_libs/tslibs/nattype'],
538539
'depends': tseries_depends,
539540
'sources': np_datetime_sources},
541+
'_libs.tslibs.ccalendar': {
542+
'pyxfile': '_libs/tslibs/ccalendar'},
540543
'_libs.tslibs.conversion': {
541544
'pyxfile': '_libs/tslibs/conversion',
542545
'pxdfiles': ['_libs/src/util',
@@ -547,7 +550,8 @@ def pxd(name):
547550
'sources': np_datetime_sources},
548551
'_libs.tslibs.fields': {
549552
'pyxfile': '_libs/tslibs/fields',
550-
'pxdfiles': ['_libs/tslibs/nattype'],
553+
'pxdfiles': ['_libs/tslibs/ccalendar',
554+
'_libs/tslibs/nattype'],
551555
'depends': tseries_depends,
552556
'sources': np_datetime_sources},
553557
'_libs.tslibs.frequencies': {
@@ -594,6 +598,7 @@ def pxd(name):
594598
'_libs.tslibs.timestamps': {
595599
'pyxfile': '_libs/tslibs/timestamps',
596600
'pxdfiles': ['_libs/src/util',
601+
'_libs/tslibs/ccalendar',
597602
'_libs/tslibs/conversion',
598603
'_libs/tslibs/nattype',
599604
'_libs/tslibs/timedeltas',

0 commit comments

Comments
 (0)