From 4e942f53c1ba2e1e7ab54ab732c3e7493a717893 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 26 Nov 2017 15:13:57 -0800 Subject: [PATCH 01/21] implement ccalendar. only use it incrementally for profiling --- pandas/_libs/tslibs/ccalendar.pxd | 13 +++ pandas/_libs/tslibs/ccalendar.pyx | 129 ++++++++++++++++++++++++++++++ pandas/_libs/tslibs/fields.pyx | 3 +- setup.py | 6 +- 4 files changed, 149 insertions(+), 2 deletions(-) create mode 100644 pandas/_libs/tslibs/ccalendar.pxd create mode 100644 pandas/_libs/tslibs/ccalendar.pyx diff --git a/pandas/_libs/tslibs/ccalendar.pxd b/pandas/_libs/tslibs/ccalendar.pxd new file mode 100644 index 0000000000000..38d26f3f42cee --- /dev/null +++ b/pandas/_libs/tslibs/ccalendar.pxd @@ -0,0 +1,13 @@ +# -*- coding: utf-8 -*- +# cython: profile=False + +from cython cimport Py_ssize_t + +from numpy cimport int64_t, int32_t + + +cpdef monthrange(int64_t year, Py_ssize_t month) + +cdef int dayofweek(int year, int month, int day) nogil +cdef int is_leapyear(int64_t year) nogil +cdef int32_t get_days_in_month(int year, Py_ssize_t month) nogil diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx new file mode 100644 index 0000000000000..5730af16c3392 --- /dev/null +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -0,0 +1,129 @@ +# -*- coding: utf-8 -*- +# cython: profile=False +# cython: boundscheck=False +""" +Cython implementations of functions resembling the stdlib calendar module +""" + +cimport cython +from cython cimport Py_ssize_t + +import numpy as np +cimport numpy as np +from numpy cimport int64_t, int32_t +np.import_array() + + +# ---------------------------------------------------------------------- +# Constants + +# Slightly more performant cython lookups than a 2D table +# The first 12 entries correspond to month lengths for non-leap years. +# The remaining 12 entries give month lengths for leap years +cdef int32_t* days_per_month_array = [ + 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, + 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] + +# ---------------------------------------------------------------------- + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline int32_t get_days_in_month(int year, Py_ssize_t month) nogil: + """Return the number of days in the given month of the given year. + + Parameters + ---------- + year : int + month : int + + Returns + ------- + days_in_month : int + + Notes + ----- + Assumes that the arguments are valid. Passing a month not between 1 and 12 + risks a segfault. + """ + return days_per_month_array[12 * is_leapyear(year) + month - 1] + + +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef monthrange(int64_t year, Py_ssize_t month): + """ + Return tuple containing the weekday of the first day of the month and + the number of days in the month. + + Parameters + ---------- + year : int + month : int + + Returns + ------- + weekday : int + days_in_month : int + + Raises + ------ + ValueError if month is invalid + """ + cdef: + int32_t days + + if month < 1 or month > 12: + raise ValueError("bad month number 0; must be 1-12") + + days = get_days_in_month(year, month) + return (dayofweek(year, month, 1), days) + + +@cython.wraparound(False) +@cython.boundscheck(False) +@cython.cdivision +cdef int dayofweek(int year, int month, int day) nogil: + """Find the day of week for the date described by the Y/M/D triple y, m, d + using Sakamoto's method, from wikipedia. + + https://en.wikipedia.org/wiki/\ + Determination_of_the_day_of_the_week#Sakamoto.27s_methods + + Parameters + ---------- + year : int + month : int + day : int + + Returns + ------- + weekday : int + + Notes + ----- + Assumes that y, m, d, represents a valid date. + """ + cdef: + int day + int* sakamoto_arr = [0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4] + + y -= m < 3 + day = (y + y / 4 - y / 100 + y / 400 + sakamoto_arr[m - 1] + d) % 7 + # convert to python day + return (day + 6) % 7 + + +cdef bint is_leapyear(int64_t year) nogil: + """Returns 1 if the given year is a leap year, 0 otherwise. + + Parameters + ---------- + year : int + + Returns + ------- + is_leap : bool + """ + return ((year & 0x3) == 0 and # year % 4 == 0 + ((year % 100) != 0 or (year % 400) == 0)) diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 3de361c511fbf..65fdc274d90d8 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -17,9 +17,10 @@ from numpy cimport ndarray, int64_t, int32_t, int8_t np.import_array() +from ccalendar cimport dayofweek from np_datetime cimport (pandas_datetimestruct, pandas_timedeltastruct, dt64_to_dtstruct, td64_to_tdstruct, - days_per_month_table, is_leapyear, dayofweek) + days_per_month_table, is_leapyear) from nattype cimport NPY_NAT diff --git a/setup.py b/setup.py index 7e56298d1b20b..e0db1917717ce 100755 --- a/setup.py +++ b/setup.py @@ -343,6 +343,7 @@ class CheckSDist(sdist_class): 'pandas/_libs/window.pyx', 'pandas/_libs/sparse.pyx', 'pandas/_libs/parsers.pyx', + 'pandas/_libs/tslibs/ccalendar.pyx', 'pandas/_libs/tslibs/strptime.pyx', 'pandas/_libs/tslibs/np_datetime.pyx', 'pandas/_libs/tslibs/timedeltas.pyx', @@ -558,6 +559,8 @@ def pxd(name): '_libs/tslibs/nattype'], 'depends': tseries_depends, 'sources': np_datetime_sources}, + '_libs.tslibs.ccalendar': { + 'pyxfile': '_libs/tslibs/ccalendar'}, '_libs.tslibs.conversion': { 'pyxfile': '_libs/tslibs/conversion', 'pxdfiles': ['_libs/src/util', @@ -568,7 +571,8 @@ def pxd(name): 'sources': np_datetime_sources}, '_libs.tslibs.fields': { 'pyxfile': '_libs/tslibs/fields', - 'pxdfiles': ['_libs/tslibs/nattype'], + 'pxdfiles': ['_libs/tslibs/ccalendar', + '_libs/tslibs/nattype'], 'depends': tseries_depends, 'sources': np_datetime_sources}, '_libs.tslibs.frequencies': { From 6cbb4be567951278c9aea4f6a14d9ad97e7800c1 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 26 Nov 2017 15:27:56 -0800 Subject: [PATCH 02/21] Fix nameerror --- pandas/_libs/tslibs/ccalendar.pxd | 2 +- pandas/_libs/tslibs/ccalendar.pyx | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/tslibs/ccalendar.pxd b/pandas/_libs/tslibs/ccalendar.pxd index 38d26f3f42cee..648f9674e7a9f 100644 --- a/pandas/_libs/tslibs/ccalendar.pxd +++ b/pandas/_libs/tslibs/ccalendar.pxd @@ -8,6 +8,6 @@ from numpy cimport int64_t, int32_t cpdef monthrange(int64_t year, Py_ssize_t month) -cdef int dayofweek(int year, int month, int day) nogil +cdef int dayofweek(int y, int m, int m) nogil cdef int is_leapyear(int64_t year) nogil cdef int32_t get_days_in_month(int year, Py_ssize_t month) nogil diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx index 5730af16c3392..5dadc89c45605 100644 --- a/pandas/_libs/tslibs/ccalendar.pyx +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -83,7 +83,7 @@ cpdef monthrange(int64_t year, Py_ssize_t month): @cython.wraparound(False) @cython.boundscheck(False) @cython.cdivision -cdef int dayofweek(int year, int month, int day) nogil: +cdef int dayofweek(int y, int m, int d) nogil: """Find the day of week for the date described by the Y/M/D triple y, m, d using Sakamoto's method, from wikipedia. @@ -92,9 +92,9 @@ cdef int dayofweek(int year, int month, int day) nogil: Parameters ---------- - year : int - month : int - day : int + y : int + m : int + d : int Returns ------- From 7f22c6ddf7b91cf785c8b2c2e8a549d4cf0df260 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 26 Nov 2017 15:32:21 -0800 Subject: [PATCH 03/21] fixup signature --- pandas/_libs/tslibs/ccalendar.pxd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/ccalendar.pxd b/pandas/_libs/tslibs/ccalendar.pxd index 648f9674e7a9f..913101407b224 100644 --- a/pandas/_libs/tslibs/ccalendar.pxd +++ b/pandas/_libs/tslibs/ccalendar.pxd @@ -9,5 +9,5 @@ from numpy cimport int64_t, int32_t cpdef monthrange(int64_t year, Py_ssize_t month) cdef int dayofweek(int y, int m, int m) nogil -cdef int is_leapyear(int64_t year) nogil +cdef bint is_leapyear(int64_t year) nogil cdef int32_t get_days_in_month(int year, Py_ssize_t month) nogil From 94557a785f09f2e27ebd067d74f2547b14eccd83 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 26 Nov 2017 15:47:09 -0800 Subject: [PATCH 04/21] next run with just is_leapyear changed --- pandas/_libs/tslibs/fields.pyx | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 65fdc274d90d8..9bf26a18a6d13 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -17,10 +17,11 @@ from numpy cimport ndarray, int64_t, int32_t, int8_t np.import_array() -from ccalendar cimport dayofweek +from ccalendar cimport is_leapyear +# from ccalendar cimport dayofweek from np_datetime cimport (pandas_datetimestruct, pandas_timedeltastruct, dt64_to_dtstruct, td64_to_tdstruct, - days_per_month_table, is_leapyear) + days_per_month_table, dayofweek) from nattype cimport NPY_NAT From ce8e3d5511ecde237345229369c4a40d9a189129 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 26 Nov 2017 16:02:28 -0800 Subject: [PATCH 05/21] then try with just get_days_in_month --- pandas/_libs/tslibs/fields.pyx | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 9bf26a18a6d13..467568728bd0e 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -17,11 +17,12 @@ from numpy cimport ndarray, int64_t, int32_t, int8_t np.import_array() -from ccalendar cimport is_leapyear +from ccalendar cimport get_days_in_month +# from ccalendar cimport is_leapyear # from ccalendar cimport dayofweek from np_datetime cimport (pandas_datetimestruct, pandas_timedeltastruct, dt64_to_dtstruct, td64_to_tdstruct, - days_per_month_table, dayofweek) + days_per_month_table, dayofweek, is_leapyearv) from nattype cimport NPY_NAT @@ -553,7 +554,8 @@ def get_date_field(ndarray[int64_t] dtindex, object field): continue dt64_to_dtstruct(dtindex[i], &dts) - out[i] = days_in_month(dts) + out[i] = get_days_in_month(dts.year, dts.day) + # out[i] = days_in_month(dts) return out elif field == 'is_leap_year': return isleapyear_arr(get_date_field(dtindex, 'Y')) From 344252e610597377a5897c4d95cb2cd72805fb28 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 26 Nov 2017 16:03:17 -0800 Subject: [PATCH 06/21] fixup typo --- pandas/_libs/tslibs/fields.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 467568728bd0e..592f4c4803bb7 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -22,7 +22,7 @@ from ccalendar cimport get_days_in_month # from ccalendar cimport dayofweek from np_datetime cimport (pandas_datetimestruct, pandas_timedeltastruct, dt64_to_dtstruct, td64_to_tdstruct, - days_per_month_table, dayofweek, is_leapyearv) + days_per_month_table, dayofweek, is_leapyear) from nattype cimport NPY_NAT From a084fb39948f3acc56cd8f3712702a149a7cb0d4 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 26 Nov 2017 17:09:32 -0800 Subject: [PATCH 07/21] use all of dayofweek, get_days_in_monht, is_leapyear in fields --- pandas/_libs/tslibs/fields.pyx | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 592f4c4803bb7..4802b82950197 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -17,12 +17,9 @@ from numpy cimport ndarray, int64_t, int32_t, int8_t np.import_array() -from ccalendar cimport get_days_in_month -# from ccalendar cimport is_leapyear -# from ccalendar cimport dayofweek +from ccalendar cimport get_days_in_month, is_leapyear, dayofweek from np_datetime cimport (pandas_datetimestruct, pandas_timedeltastruct, - dt64_to_dtstruct, td64_to_tdstruct, - days_per_month_table, dayofweek, is_leapyear) + dt64_to_dtstruct, td64_to_tdstruct) from nattype cimport NPY_NAT @@ -555,7 +552,6 @@ def get_date_field(ndarray[int64_t] dtindex, object field): dt64_to_dtstruct(dtindex[i], &dts) out[i] = get_days_in_month(dts.year, dts.day) - # out[i] = days_in_month(dts) return out elif field == 'is_leap_year': return isleapyear_arr(get_date_field(dtindex, 'Y')) @@ -680,10 +676,6 @@ def get_timedelta_field(ndarray[int64_t] tdindex, object field): raise ValueError("Field %s not supported" % field) -cdef inline int days_in_month(pandas_datetimestruct dts) nogil: - return days_per_month_table[is_leapyear(dts.year)][dts.month - 1] - - cpdef isleapyear_arr(ndarray years): """vectorized version of isleapyear; NaT evaluates as False""" cdef: From 1b746687cf29525a8ff6da87046bd49a7228eb9e Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 26 Nov 2017 17:18:18 -0800 Subject: [PATCH 08/21] try to avoid re-initializing sakamoto_arr --- pandas/_libs/tslibs/ccalendar.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx index 5dadc89c45605..dce023b547aa2 100644 --- a/pandas/_libs/tslibs/ccalendar.pyx +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -24,6 +24,8 @@ cdef int32_t* days_per_month_array = [ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] +cdef int* sakamoto_arr = [0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4] + # ---------------------------------------------------------------------- @@ -106,7 +108,6 @@ cdef int dayofweek(int y, int m, int d) nogil: """ cdef: int day - int* sakamoto_arr = [0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4] y -= m < 3 day = (y + y / 4 - y / 100 + y / 400 + sakamoto_arr[m - 1] + d) % 7 From fdf280ec456243dfa6b02ac18ec85e27bbe16e9d Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 27 Nov 2017 19:44:40 -0800 Subject: [PATCH 09/21] implement get_week_of_year --- pandas/_libs/tslibs/ccalendar.pxd | 1 + pandas/_libs/tslibs/ccalendar.pyx | 55 +++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/pandas/_libs/tslibs/ccalendar.pxd b/pandas/_libs/tslibs/ccalendar.pxd index 913101407b224..76df84242bcb2 100644 --- a/pandas/_libs/tslibs/ccalendar.pxd +++ b/pandas/_libs/tslibs/ccalendar.pxd @@ -11,3 +11,4 @@ cpdef monthrange(int64_t year, Py_ssize_t month) cdef int dayofweek(int y, int m, int m) nogil cdef bint is_leapyear(int64_t year) nogil cdef int32_t get_days_in_month(int year, Py_ssize_t month) nogil +cpdef int32_t get_week_of_year(int year, int month, int day) nogil diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx index dce023b547aa2..6f2d5610ef0c1 100644 --- a/pandas/_libs/tslibs/ccalendar.pyx +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -26,6 +26,11 @@ cdef int32_t* days_per_month_array = [ cdef int* sakamoto_arr = [0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4] + +cdef int32_t* _month_offset = [ + 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, + 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366] + # ---------------------------------------------------------------------- @@ -128,3 +133,53 @@ cdef bint is_leapyear(int64_t year) nogil: """ return ((year & 0x3) == 0 and # year % 4 == 0 ((year % 100) != 0 or (year % 400) == 0)) + + +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef int32_t get_week_of_year(int year, int month, int day) nogil: + """Return the ordinal week-of-year for the given day + + Parameters + ---------- + year : int + month : int + day : int + + Returns + ------- + week_of_year : int32_t + + Notes + ----- + Assumes the inputs describe a valid date. + """ + cdef: + bint isleap, isleap_prev + int32_t mo_off, woy + int doy, dow + + isleap = is_leapyear(year) + isleap_prev = is_leapyear(year - 1) + + mo_off = _month_offset[isleap * 12 + month - 1] + + doy = mo_off + day + dow = dayofweek(year, month, day) + + # estimate + woy = (doy - 1) - dow + 3 + if woy >= 0: + woy = woy / 7 + 1 + + # verify + if woy < 0: + if (woy > -2) or (woy == -2 and isleap_prev): + woy = 53 + else: + woy = 52 + elif woy == 53: + if 31 - day + dow < 3: + woy = 1 + + return woy From f8cc94534ce76bb87b3ef4888ed706f30101240e Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 27 Nov 2017 19:45:08 -0800 Subject: [PATCH 10/21] punctuation --- pandas/_libs/tslibs/ccalendar.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx index 6f2d5610ef0c1..f6af4c4900543 100644 --- a/pandas/_libs/tslibs/ccalendar.pyx +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -138,7 +138,7 @@ cdef bint is_leapyear(int64_t year) nogil: @cython.wraparound(False) @cython.boundscheck(False) cpdef int32_t get_week_of_year(int year, int month, int day) nogil: - """Return the ordinal week-of-year for the given day + """Return the ordinal week-of-year for the given day. Parameters ---------- From a29950b36b19764a10957223381e36a5b5faa23a Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 27 Nov 2017 19:47:44 -0800 Subject: [PATCH 11/21] use get_week_of_year --- pandas/_libs/tslibs/fields.pyx | 26 +++----------------------- 1 file changed, 3 insertions(+), 23 deletions(-) diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 4802b82950197..3e66cf3189c25 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -17,7 +17,8 @@ from numpy cimport ndarray, int64_t, int32_t, int8_t np.import_array() -from ccalendar cimport get_days_in_month, is_leapyear, dayofweek +from ccalendar cimport (get_days_in_month, is_leapyear, dayofweek, + get_week_of_year) from np_datetime cimport (pandas_datetimestruct, pandas_timedeltastruct, dt64_to_dtstruct, td64_to_tdstruct) from nattype cimport NPY_NAT @@ -507,28 +508,7 @@ def get_date_field(ndarray[int64_t] dtindex, object field): continue dt64_to_dtstruct(dtindex[i], &dts) - isleap = is_leapyear(dts.year) - isleap_prev = is_leapyear(dts.year - 1) - mo_off = _month_offset[isleap, dts.month - 1] - doy = mo_off + dts.day - dow = dayofweek(dts.year, dts.month, dts.day) - - # estimate - woy = (doy - 1) - dow + 3 - if woy >= 0: - woy = woy / 7 + 1 - - # verify - if woy < 0: - if (woy > -2) or (woy == -2 and isleap_prev): - woy = 53 - else: - woy = 52 - elif woy == 53: - if 31 - dts.day + dow < 3: - woy = 1 - - out[i] = woy + out[i] = get_week_of_year(dts.year, dts.month, dts.day) return out elif field == 'q': From 3d07a2ab5b87b4daea01762e30ec8899f77ce384 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 27 Nov 2017 19:48:32 -0800 Subject: [PATCH 12/21] whitesapce fixup --- pandas/_libs/tslibs/ccalendar.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx index f6af4c4900543..5a580c11cc0ca 100644 --- a/pandas/_libs/tslibs/ccalendar.pyx +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -26,7 +26,6 @@ cdef int32_t* days_per_month_array = [ cdef int* sakamoto_arr = [0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4] - cdef int32_t* _month_offset = [ 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366] From b16f443bb1f8e04c1bf9c29aacd1cb33d90c510e Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 27 Nov 2017 19:48:40 -0800 Subject: [PATCH 13/21] whitesapce fixup --- pandas/_libs/tslibs/ccalendar.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx index 5a580c11cc0ca..5c8fb2ff19327 100644 --- a/pandas/_libs/tslibs/ccalendar.pyx +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -21,7 +21,7 @@ np.import_array() # The first 12 entries correspond to month lengths for non-leap years. # The remaining 12 entries give month lengths for leap years cdef int32_t* days_per_month_array = [ - 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, + 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] cdef int* sakamoto_arr = [0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4] From 9ec475a82873744d5448b0f8ae48fc626fc6ba44 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 27 Nov 2017 19:50:09 -0800 Subject: [PATCH 14/21] comment --- pandas/_libs/tslibs/ccalendar.pyx | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx index 5c8fb2ff19327..3c7bfbaadbe4b 100644 --- a/pandas/_libs/tslibs/ccalendar.pyx +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -26,6 +26,9 @@ cdef int32_t* days_per_month_array = [ cdef int* sakamoto_arr = [0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4] +# The first 12 entries give the month days elapsed as of the first of month N +# in non-leap years. The remaining 12 entries give the days elapsed in leap +# years. cdef int32_t* _month_offset = [ 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366] From b22171082fd14b303de9f854580a599a644e064b Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 27 Nov 2017 20:55:20 -0800 Subject: [PATCH 15/21] fixup typo --- pandas/_libs/tslibs/fields.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 3e66cf3189c25..c022a6325c151 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -531,7 +531,7 @@ def get_date_field(ndarray[int64_t] dtindex, object field): continue dt64_to_dtstruct(dtindex[i], &dts) - out[i] = get_days_in_month(dts.year, dts.day) + out[i] = get_days_in_month(dts.year, dts.month) return out elif field == 'is_leap_year': return isleapyear_arr(get_date_field(dtindex, 'Y')) From d91ec05a352c852062152a72b5f6960cd96ad7f8 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 27 Nov 2017 21:19:44 -0800 Subject: [PATCH 16/21] fix 12-->13 --- pandas/_libs/tslibs/ccalendar.pxd | 2 +- pandas/_libs/tslibs/ccalendar.pyx | 9 +++++---- pandas/_libs/tslibs/fields.pyx | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/tslibs/ccalendar.pxd b/pandas/_libs/tslibs/ccalendar.pxd index 76df84242bcb2..58fb08495de5f 100644 --- a/pandas/_libs/tslibs/ccalendar.pxd +++ b/pandas/_libs/tslibs/ccalendar.pxd @@ -10,5 +10,5 @@ cpdef monthrange(int64_t year, Py_ssize_t month) cdef int dayofweek(int y, int m, int m) nogil cdef bint is_leapyear(int64_t year) nogil -cdef int32_t get_days_in_month(int year, Py_ssize_t month) nogil +cpdef int32_t get_days_in_month(int year, Py_ssize_t month) nogil cpdef int32_t get_week_of_year(int year, int month, int day) nogil diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx index 3c7bfbaadbe4b..5fa2c2015b0b7 100644 --- a/pandas/_libs/tslibs/ccalendar.pyx +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -38,7 +38,7 @@ cdef int32_t* _month_offset = [ @cython.wraparound(False) @cython.boundscheck(False) -cdef inline int32_t get_days_in_month(int year, Py_ssize_t month) nogil: +cpdef inline int32_t get_days_in_month(int year, Py_ssize_t month) nogil: """Return the number of days in the given month of the given year. Parameters @@ -158,13 +158,14 @@ cpdef int32_t get_week_of_year(int year, int month, int day) nogil: """ cdef: bint isleap, isleap_prev - int32_t mo_off, woy - int doy, dow + int32_t mo_off + int32_t doy, dow + int woy isleap = is_leapyear(year) isleap_prev = is_leapyear(year - 1) - mo_off = _month_offset[isleap * 12 + month - 1] + mo_off = _month_offset[isleap * 13 + month - 1] doy = mo_off + day dow = dayofweek(year, month, day) diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index c022a6325c151..b321ca1659682 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -380,7 +380,7 @@ def get_date_field(ndarray[int64_t] dtindex, object field): ndarray[int32_t, ndim=2] _month_offset int isleap, isleap_prev pandas_datetimestruct dts - int mo_off, doy, dow, woy + int mo_off, doy, dow _month_offset = np.array( [[ 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 ], From 8f789924b58c4722b5548a016149fd2b9658e80d Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 28 Nov 2017 07:03:17 -0800 Subject: [PATCH 17/21] flesh out docstring --- pandas/_libs/tslibs/ccalendar.pyx | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx index 5fa2c2015b0b7..4110c664f3aa9 100644 --- a/pandas/_libs/tslibs/ccalendar.pyx +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -96,8 +96,7 @@ cdef int dayofweek(int y, int m, int d) nogil: """Find the day of week for the date described by the Y/M/D triple y, m, d using Sakamoto's method, from wikipedia. - https://en.wikipedia.org/wiki/\ - Determination_of_the_day_of_the_week#Sakamoto.27s_methods + 0 represents Monday. See [1]_. Parameters ---------- @@ -112,6 +111,13 @@ cdef int dayofweek(int y, int m, int d) nogil: Notes ----- Assumes that y, m, d, represents a valid date. + + See Also + -------- + [1] https://docs.python.org/3.6/library/calendar.html#calendar.weekday + + [2] https://en.wikipedia.org/wiki/\ + Determination_of_the_day_of_the_week#Sakamoto.27s_methods """ cdef: int day From 32be4f42f3a08f5c129f83cf810b55d8d5661918 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 28 Nov 2017 07:50:00 -0800 Subject: [PATCH 18/21] implement ccalendar fastpaths for timestamp properties --- pandas/_libs/tslibs/timestamps.pyx | 10 +++++++++- setup.py | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 8fdded0bcb07a..65a70ee1dce3d 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -20,6 +20,7 @@ from util cimport (is_datetime64_object, is_timedelta64_object, is_integer_object, is_string_object, INT64_MAX) +cimport ccalendar from conversion import tz_localize_to_utc, date_normalize from conversion cimport (tz_convert_single, _TSObject, convert_to_tsobject, convert_datetime_to_tsobject) @@ -707,6 +708,9 @@ class Timestamp(_Timestamp): @property def week(self): + if self.freq is None: + # fastpath for non-business + return ccalendar.get_week_of_year(self.year, self.month, self.day) return self._get_field('woy') weekofyear = week @@ -717,7 +721,7 @@ class Timestamp(_Timestamp): @property def days_in_month(self): - return self._get_field('dim') + return ccalendar.get_days_in_month(self.year, self.month) daysinmonth = days_in_month @@ -731,6 +735,10 @@ class Timestamp(_Timestamp): @property def is_month_end(self): + if self.freq is None: + # fastpath for non-business + return self.day == ccalendar.get_days_in_month(self.year, + self.month) return self._get_start_end_field('is_month_end') @property diff --git a/setup.py b/setup.py index 54221ec228325..d7b231418bfde 100755 --- a/setup.py +++ b/setup.py @@ -624,6 +624,7 @@ def pxd(name): '_libs.tslibs.timestamps': { 'pyxfile': '_libs/tslibs/timestamps', 'pxdfiles': ['_libs/src/util', + '_libs/tslibs/ccalendar', '_libs/tslibs/conversion', '_libs/tslibs/nattype', '_libs/tslibs/timedeltas', From d0f8b136c5b2c9411e4030129fc1b86bdeb99d0b Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 28 Nov 2017 17:04:07 -0800 Subject: [PATCH 19/21] fixup comment typo --- pandas/_libs/tslibs/ccalendar.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx index 4110c664f3aa9..7a3a4dafeb2e7 100644 --- a/pandas/_libs/tslibs/ccalendar.pyx +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -26,9 +26,9 @@ cdef int32_t* days_per_month_array = [ cdef int* sakamoto_arr = [0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4] -# The first 12 entries give the month days elapsed as of the first of month N -# in non-leap years. The remaining 12 entries give the days elapsed in leap -# years. +# The first 13 entries give the month days elapsed as of the first of month N +# (or the total number of days in the year for N=13) in non-leap years. +# The remaining 13 entries give the days elapsed in leap years. cdef int32_t* _month_offset = [ 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366] From 0add03ef3b585315c22a557ad6aa919bdb58d2d8 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 29 Nov 2017 09:30:39 -0800 Subject: [PATCH 20/21] remove monthrange per reviewer request --- pandas/_libs/tslibs/ccalendar.pyx | 31 ------------------------------- 1 file changed, 31 deletions(-) diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx index 7a3a4dafeb2e7..a68ecbd2e8629 100644 --- a/pandas/_libs/tslibs/ccalendar.pyx +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -58,37 +58,6 @@ cpdef inline int32_t get_days_in_month(int year, Py_ssize_t month) nogil: return days_per_month_array[12 * is_leapyear(year) + month - 1] -@cython.wraparound(False) -@cython.boundscheck(False) -cpdef monthrange(int64_t year, Py_ssize_t month): - """ - Return tuple containing the weekday of the first day of the month and - the number of days in the month. - - Parameters - ---------- - year : int - month : int - - Returns - ------- - weekday : int - days_in_month : int - - Raises - ------ - ValueError if month is invalid - """ - cdef: - int32_t days - - if month < 1 or month > 12: - raise ValueError("bad month number 0; must be 1-12") - - days = get_days_in_month(year, month) - return (dayofweek(year, month, 1), days) - - @cython.wraparound(False) @cython.boundscheck(False) @cython.cdivision From b66caa6781a30a9cdeac9bee2217c8e92d3050a6 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 29 Nov 2017 10:14:42 -0800 Subject: [PATCH 21/21] remove monthrange from pxd --- pandas/_libs/tslibs/ccalendar.pxd | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/_libs/tslibs/ccalendar.pxd b/pandas/_libs/tslibs/ccalendar.pxd index 58fb08495de5f..a1bbeea1cb69a 100644 --- a/pandas/_libs/tslibs/ccalendar.pxd +++ b/pandas/_libs/tslibs/ccalendar.pxd @@ -6,8 +6,6 @@ from cython cimport Py_ssize_t from numpy cimport int64_t, int32_t -cpdef monthrange(int64_t year, Py_ssize_t month) - cdef int dayofweek(int y, int m, int m) nogil cdef bint is_leapyear(int64_t year) nogil cpdef int32_t get_days_in_month(int year, Py_ssize_t month) nogil