diff --git a/pandas/_libs/tslibs/ccalendar.pxd b/pandas/_libs/tslibs/ccalendar.pxd new file mode 100644 index 0000000000000..a1bbeea1cb69a --- /dev/null +++ b/pandas/_libs/tslibs/ccalendar.pxd @@ -0,0 +1,12 @@ +# -*- coding: utf-8 -*- +# cython: profile=False + +from cython cimport Py_ssize_t + +from numpy cimport int64_t, int32_t + + +cdef int dayofweek(int y, int m, int m) nogil +cdef bint is_leapyear(int64_t year) nogil +cpdef int32_t get_days_in_month(int year, Py_ssize_t month) nogil +cpdef int32_t get_week_of_year(int year, int month, int day) nogil diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx new file mode 100644 index 0000000000000..a68ecbd2e8629 --- /dev/null +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -0,0 +1,163 @@ +# -*- coding: utf-8 -*- +# cython: profile=False +# cython: boundscheck=False +""" +Cython implementations of functions resembling the stdlib calendar module +""" + +cimport cython +from cython cimport Py_ssize_t + +import numpy as np +cimport numpy as np +from numpy cimport int64_t, int32_t +np.import_array() + + +# ---------------------------------------------------------------------- +# Constants + +# Slightly more performant cython lookups than a 2D table +# The first 12 entries correspond to month lengths for non-leap years. +# The remaining 12 entries give month lengths for leap years +cdef int32_t* days_per_month_array = [ + 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, + 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] + +cdef int* sakamoto_arr = [0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4] + +# The first 13 entries give the month days elapsed as of the first of month N +# (or the total number of days in the year for N=13) in non-leap years. +# The remaining 13 entries give the days elapsed in leap years. +cdef int32_t* _month_offset = [ + 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, + 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366] + +# ---------------------------------------------------------------------- + + +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef inline int32_t get_days_in_month(int year, Py_ssize_t month) nogil: + """Return the number of days in the given month of the given year. + + Parameters + ---------- + year : int + month : int + + Returns + ------- + days_in_month : int + + Notes + ----- + Assumes that the arguments are valid. Passing a month not between 1 and 12 + risks a segfault. + """ + return days_per_month_array[12 * is_leapyear(year) + month - 1] + + +@cython.wraparound(False) +@cython.boundscheck(False) +@cython.cdivision +cdef int dayofweek(int y, int m, int d) nogil: + """Find the day of week for the date described by the Y/M/D triple y, m, d + using Sakamoto's method, from wikipedia. + + 0 represents Monday. See [1]_. + + Parameters + ---------- + y : int + m : int + d : int + + Returns + ------- + weekday : int + + Notes + ----- + Assumes that y, m, d, represents a valid date. + + See Also + -------- + [1] https://docs.python.org/3.6/library/calendar.html#calendar.weekday + + [2] https://en.wikipedia.org/wiki/\ + Determination_of_the_day_of_the_week#Sakamoto.27s_methods + """ + cdef: + int day + + y -= m < 3 + day = (y + y / 4 - y / 100 + y / 400 + sakamoto_arr[m - 1] + d) % 7 + # convert to python day + return (day + 6) % 7 + + +cdef bint is_leapyear(int64_t year) nogil: + """Returns 1 if the given year is a leap year, 0 otherwise. + + Parameters + ---------- + year : int + + Returns + ------- + is_leap : bool + """ + return ((year & 0x3) == 0 and # year % 4 == 0 + ((year % 100) != 0 or (year % 400) == 0)) + + +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef int32_t get_week_of_year(int year, int month, int day) nogil: + """Return the ordinal week-of-year for the given day. + + Parameters + ---------- + year : int + month : int + day : int + + Returns + ------- + week_of_year : int32_t + + Notes + ----- + Assumes the inputs describe a valid date. + """ + cdef: + bint isleap, isleap_prev + int32_t mo_off + int32_t doy, dow + int woy + + isleap = is_leapyear(year) + isleap_prev = is_leapyear(year - 1) + + mo_off = _month_offset[isleap * 13 + month - 1] + + doy = mo_off + day + dow = dayofweek(year, month, day) + + # estimate + woy = (doy - 1) - dow + 3 + if woy >= 0: + woy = woy / 7 + 1 + + # verify + if woy < 0: + if (woy > -2) or (woy == -2 and isleap_prev): + woy = 53 + else: + woy = 52 + elif woy == 53: + if 31 - day + dow < 3: + woy = 1 + + return woy diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 3de361c511fbf..b321ca1659682 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -17,9 +17,10 @@ from numpy cimport ndarray, int64_t, int32_t, int8_t np.import_array() +from ccalendar cimport (get_days_in_month, is_leapyear, dayofweek, + get_week_of_year) from np_datetime cimport (pandas_datetimestruct, pandas_timedeltastruct, - dt64_to_dtstruct, td64_to_tdstruct, - days_per_month_table, is_leapyear, dayofweek) + dt64_to_dtstruct, td64_to_tdstruct) from nattype cimport NPY_NAT @@ -379,7 +380,7 @@ def get_date_field(ndarray[int64_t] dtindex, object field): ndarray[int32_t, ndim=2] _month_offset int isleap, isleap_prev pandas_datetimestruct dts - int mo_off, doy, dow, woy + int mo_off, doy, dow _month_offset = np.array( [[ 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 ], @@ -507,28 +508,7 @@ def get_date_field(ndarray[int64_t] dtindex, object field): continue dt64_to_dtstruct(dtindex[i], &dts) - isleap = is_leapyear(dts.year) - isleap_prev = is_leapyear(dts.year - 1) - mo_off = _month_offset[isleap, dts.month - 1] - doy = mo_off + dts.day - dow = dayofweek(dts.year, dts.month, dts.day) - - # estimate - woy = (doy - 1) - dow + 3 - if woy >= 0: - woy = woy / 7 + 1 - - # verify - if woy < 0: - if (woy > -2) or (woy == -2 and isleap_prev): - woy = 53 - else: - woy = 52 - elif woy == 53: - if 31 - dts.day + dow < 3: - woy = 1 - - out[i] = woy + out[i] = get_week_of_year(dts.year, dts.month, dts.day) return out elif field == 'q': @@ -551,7 +531,7 @@ def get_date_field(ndarray[int64_t] dtindex, object field): continue dt64_to_dtstruct(dtindex[i], &dts) - out[i] = days_in_month(dts) + out[i] = get_days_in_month(dts.year, dts.month) return out elif field == 'is_leap_year': return isleapyear_arr(get_date_field(dtindex, 'Y')) @@ -676,10 +656,6 @@ def get_timedelta_field(ndarray[int64_t] tdindex, object field): raise ValueError("Field %s not supported" % field) -cdef inline int days_in_month(pandas_datetimestruct dts) nogil: - return days_per_month_table[is_leapyear(dts.year)][dts.month - 1] - - cpdef isleapyear_arr(ndarray years): """vectorized version of isleapyear; NaT evaluates as False""" cdef: diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 59044fe314e08..478611fe9cab9 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -20,6 +20,7 @@ from util cimport (is_datetime64_object, is_timedelta64_object, is_integer_object, is_string_object, INT64_MAX) +cimport ccalendar from conversion import tz_localize_to_utc, date_normalize from conversion cimport (tz_convert_single, _TSObject, convert_to_tsobject, convert_datetime_to_tsobject) @@ -699,6 +700,9 @@ class Timestamp(_Timestamp): @property def week(self): + if self.freq is None: + # fastpath for non-business + return ccalendar.get_week_of_year(self.year, self.month, self.day) return self._get_field('woy') weekofyear = week @@ -709,7 +713,7 @@ class Timestamp(_Timestamp): @property def days_in_month(self): - return self._get_field('dim') + return ccalendar.get_days_in_month(self.year, self.month) daysinmonth = days_in_month diff --git a/setup.py b/setup.py index c58cc8ef99faf..e6480cfedaee0 100755 --- a/setup.py +++ b/setup.py @@ -317,6 +317,7 @@ class CheckSDist(sdist_class): 'pandas/_libs/skiplist.pyx', 'pandas/_libs/sparse.pyx', 'pandas/_libs/parsers.pyx', + 'pandas/_libs/tslibs/ccalendar.pyx', 'pandas/_libs/tslibs/period.pyx', 'pandas/_libs/tslibs/strptime.pyx', 'pandas/_libs/tslibs/np_datetime.pyx', @@ -537,6 +538,8 @@ def pxd(name): '_libs/tslibs/nattype'], 'depends': tseries_depends, 'sources': np_datetime_sources}, + '_libs.tslibs.ccalendar': { + 'pyxfile': '_libs/tslibs/ccalendar'}, '_libs.tslibs.conversion': { 'pyxfile': '_libs/tslibs/conversion', 'pxdfiles': ['_libs/src/util', @@ -547,7 +550,8 @@ def pxd(name): 'sources': np_datetime_sources}, '_libs.tslibs.fields': { 'pyxfile': '_libs/tslibs/fields', - 'pxdfiles': ['_libs/tslibs/nattype'], + 'pxdfiles': ['_libs/tslibs/ccalendar', + '_libs/tslibs/nattype'], 'depends': tseries_depends, 'sources': np_datetime_sources}, '_libs.tslibs.frequencies': { @@ -594,6 +598,7 @@ def pxd(name): '_libs.tslibs.timestamps': { 'pyxfile': '_libs/tslibs/timestamps', 'pxdfiles': ['_libs/src/util', + '_libs/tslibs/ccalendar', '_libs/tslibs/conversion', '_libs/tslibs/nattype', '_libs/tslibs/timedeltas',