diff --git a/ci/lint.sh b/ci/lint.sh index 49bf9a690b990..80a4c11e74696 100755 --- a/ci/lint.sh +++ b/ci/lint.sh @@ -75,7 +75,7 @@ if [ "$LINT" ]; then # this particular codebase (e.g. src/headers, src/klib, src/msgpack). However, # we can lint all header files since they aren't "generated" like C files are. echo "Linting *.c and *.h" - for path in '*.h' 'period_helper.c' 'datetime' 'parser' 'ujson' + for path in '*.h' 'datetime' 'parser' 'ujson' do echo "linting -> pandas/_libs/src/$path" cpplint --quiet --extensions=c,h --headers=h --filter=-readability/casting,-runtime/int,-build/include_subdir --recursive pandas/_libs/src/$path diff --git a/pandas/_libs/src/period_helper.c b/pandas/_libs/src/period_helper.c deleted file mode 100644 index f1367978bd6c9..0000000000000 --- a/pandas/_libs/src/period_helper.c +++ /dev/null @@ -1,1486 +0,0 @@ -/* -Copyright (c) 2016, PyData Development Team -All rights reserved. - -Distributed under the terms of the BSD Simplified License. - -The full license is in the LICENSE file, distributed with this software. - -Borrowed and derived code from scikits.timeseries that we will expose via -Cython to pandas. This primarily concerns interval representation and -frequency conversion routines. - -See end of file for stuff pandas uses (search for 'pandas'). -*/ - -#include "period_helper.h" - -/* ------------------------------------------------------------------ - * Code derived from scikits.timeseries - * ------------------------------------------------------------------*/ - -static int mod_compat(int x, int m) { - int result = x % m; - if (result < 0) return result + m; - return result; -} - -static int floordiv(int x, int divisor) { - if (x < 0) { - if (mod_compat(x, divisor)) { - return x / divisor - 1; - } else { - return x / divisor; - } - } else { - return x / divisor; - } -} - -/* Table with day offsets for each month (0-based, without and with leap) */ -static int month_offset[2][13] = { - {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365}, - {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}}; - -/* Table of number of days in a month (0-based, without and with leap) */ -static int days_in_month[2][12] = { - {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, - {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}}; - -/* Return 1/0 iff year points to a leap year in calendar. */ -static int dInfoCalc_Leapyear(npy_int64 year, int calendar) { - if (calendar == GREGORIAN_CALENDAR) { - return (year % 4 == 0) && ((year % 100 != 0) || (year % 400 == 0)); - } else { - return (year % 4 == 0); - } -} - -/* Return the day of the week for the given absolute date. */ -static int dInfoCalc_DayOfWeek(npy_int64 absdate) { - int day_of_week; - - if (absdate >= 1) { - day_of_week = (absdate - 1) % 7; - } else { - day_of_week = 6 - ((-absdate) % 7); - } - return day_of_week; -} - -static int monthToQuarter(int month) { return ((month - 1) / 3) + 1; } - -/* Return the year offset, that is the absolute date of the day - 31.12.(year-1) in the given calendar. - - Note: - For the Julian calendar we shift the absdate (which is measured - using the Gregorian Epoch) value by two days because the Epoch - (0001-01-01) in the Julian calendar lies 2 days before the Epoch in - the Gregorian calendar. */ -static int dInfoCalc_YearOffset(npy_int64 year, int calendar) { - year--; - if (calendar == GREGORIAN_CALENDAR) { - if (year >= 0 || -1 / 4 == -1) - return year * 365 + year / 4 - year / 100 + year / 400; - else - return year * 365 + (year - 3) / 4 - (year - 99) / 100 + - (year - 399) / 400; - } else if (calendar == JULIAN_CALENDAR) { - if (year >= 0 || -1 / 4 == -1) - return year * 365 + year / 4 - 2; - else - return year * 365 + (year - 3) / 4 - 2; - } - Py_Error(PyExc_ValueError, "unknown calendar"); -onError: - return INT_ERR_CODE; -} - -/* Set the instance's value using the given date and time. calendar may be set - * to the flags: GREGORIAN_CALENDAR, JULIAN_CALENDAR to indicate the calendar - * to be used. */ - -static int dInfoCalc_SetFromDateAndTime(struct date_info *dinfo, int year, - int month, int day, int hour, - int minute, double second, - int calendar) { - /* Calculate the absolute date */ - { - int leap; - npy_int64 absdate; - int yearoffset; - - /* Range check */ - Py_AssertWithArg(year > -(INT_MAX / 366) && year < (INT_MAX / 366), - PyExc_ValueError, "year out of range: %i", year); - - /* Is it a leap year ? */ - leap = dInfoCalc_Leapyear(year, calendar); - - /* Negative month values indicate months relative to the years end */ - if (month < 0) month += 13; - Py_AssertWithArg(month >= 1 && month <= 12, PyExc_ValueError, - "month out of range (1-12): %i", month); - - /* Negative values indicate days relative to the months end */ - if (day < 0) day += days_in_month[leap][month - 1] + 1; - Py_AssertWithArg(day >= 1 && day <= days_in_month[leap][month - 1], - PyExc_ValueError, "day out of range: %i", day); - - yearoffset = dInfoCalc_YearOffset(year, calendar); - if (yearoffset == INT_ERR_CODE) goto onError; - - absdate = day + month_offset[leap][month - 1] + yearoffset; - - dinfo->absdate = absdate; - - dinfo->year = year; - dinfo->month = month; - dinfo->quarter = ((month - 1) / 3) + 1; - dinfo->day = day; - - dinfo->day_of_week = dInfoCalc_DayOfWeek(absdate); - dinfo->day_of_year = (short)(absdate - yearoffset); - - dinfo->calendar = calendar; - } - - /* Calculate the absolute time */ - { - Py_AssertWithArg(hour >= 0 && hour <= 23, PyExc_ValueError, - "hour out of range (0-23): %i", hour); - Py_AssertWithArg(minute >= 0 && minute <= 59, PyExc_ValueError, - "minute out of range (0-59): %i", minute); - Py_AssertWithArg( - second >= (double)0.0 && - (second < (double)60.0 || - (hour == 23 && minute == 59 && second < (double)61.0)), - PyExc_ValueError, - "second out of range (0.0 - <60.0; <61.0 for 23:59): %f", second); - - dinfo->abstime = (double)(hour * 3600 + minute * 60) + second; - - dinfo->hour = hour; - dinfo->minute = minute; - dinfo->second = second; - } - return 0; - -onError: - return INT_ERR_CODE; -} - -/* Sets the date part of the date_info struct using the indicated - calendar. - - XXX This could also be done using some integer arithmetics rather - than with this iterative approach... */ -static int dInfoCalc_SetFromAbsDate(register struct date_info *dinfo, - npy_int64 absdate, int calendar) { - register npy_int64 year; - npy_int64 yearoffset; - int leap, dayoffset; - int *monthoffset; - - /* Approximate year */ - if (calendar == GREGORIAN_CALENDAR) { - year = (npy_int64)(((double)absdate) / 365.2425); - } else if (calendar == JULIAN_CALENDAR) { - year = (npy_int64)(((double)absdate) / 365.25); - } else { - Py_Error(PyExc_ValueError, "unknown calendar"); - } - - if (absdate > 0) year++; - - /* Apply corrections to reach the correct year */ - while (1) { - /* Calculate the year offset */ - yearoffset = dInfoCalc_YearOffset(year, calendar); - if (yearoffset == INT_ERR_CODE) goto onError; - - /* Backward correction: absdate must be greater than the - yearoffset */ - if (yearoffset >= absdate) { - year--; - continue; - } - - dayoffset = absdate - yearoffset; - leap = dInfoCalc_Leapyear(year, calendar); - - /* Forward correction: non leap years only have 365 days */ - if (dayoffset > 365 && !leap) { - year++; - continue; - } - break; - } - - dinfo->year = year; - dinfo->calendar = calendar; - - /* Now iterate to find the month */ - monthoffset = month_offset[leap]; - { - register int month; - - for (month = 1; month < 13; month++) { - if (monthoffset[month] >= dayoffset) break; - } - - dinfo->month = month; - dinfo->quarter = monthToQuarter(month); - dinfo->day = dayoffset - month_offset[leap][month - 1]; - } - - dinfo->day_of_week = dInfoCalc_DayOfWeek(absdate); - dinfo->day_of_year = dayoffset; - dinfo->absdate = absdate; - - return 0; - -onError: - return INT_ERR_CODE; -} - -/////////////////////////////////////////////// - -// frequency specific conversion routines -// each function must take an integer fromDate and -// a char relation ('S' or 'E' for 'START' or 'END') -/////////////////////////////////////////////////////////////////////// - -// helpers for frequency conversion routines // - -static int daytime_conversion_factors[][2] = { - {FR_DAY, 1}, {FR_HR, 24}, {FR_MIN, 60}, {FR_SEC, 60}, - {FR_MS, 1000}, {FR_US, 1000}, {FR_NS, 1000}, {0, 0}}; - -static npy_int64 **daytime_conversion_factor_matrix = NULL; - -PANDAS_INLINE int max_value(int a, int b) { return a > b ? a : b; } - -PANDAS_INLINE int min_value(int a, int b) { return a < b ? a : b; } - -PANDAS_INLINE int get_freq_group(int freq) { return (freq / 1000) * 1000; } - -PANDAS_INLINE int get_freq_group_index(int freq) { return freq / 1000; } - -static int calc_conversion_factors_matrix_size(void) { - int matrix_size = 0; - int index; - for (index = 0;; index++) { - int period_value = - get_freq_group_index(daytime_conversion_factors[index][0]); - if (period_value == 0) { - break; - } - matrix_size = max_value(matrix_size, period_value); - } - return matrix_size + 1; -} - -static void alloc_conversion_factors_matrix(int matrix_size) { - int row_index; - int column_index; - daytime_conversion_factor_matrix = - malloc(matrix_size * sizeof(**daytime_conversion_factor_matrix)); - for (row_index = 0; row_index < matrix_size; row_index++) { - daytime_conversion_factor_matrix[row_index] = - malloc(matrix_size * sizeof(**daytime_conversion_factor_matrix)); - for (column_index = 0; column_index < matrix_size; column_index++) { - daytime_conversion_factor_matrix[row_index][column_index] = 0; - } - } -} - -static npy_int64 calculate_conversion_factor(int start_value, int end_value) { - npy_int64 conversion_factor = 0; - int index; - for (index = 0;; index++) { - int freq_group = daytime_conversion_factors[index][0]; - - if (freq_group == 0) { - conversion_factor = 0; - break; - } - - if (freq_group == start_value) { - conversion_factor = 1; - } else { - conversion_factor *= daytime_conversion_factors[index][1]; - } - - if (freq_group == end_value) { - break; - } - } - return conversion_factor; -} - -static void populate_conversion_factors_matrix(void) { - int row_index_index; - int row_value, row_index; - int column_index_index; - int column_value, column_index; - - for (row_index_index = 0;; row_index_index++) { - row_value = daytime_conversion_factors[row_index_index][0]; - if (row_value == 0) { - break; - } - row_index = get_freq_group_index(row_value); - for (column_index_index = row_index_index;; column_index_index++) { - column_value = daytime_conversion_factors[column_index_index][0]; - if (column_value == 0) { - break; - } - column_index = get_freq_group_index(column_value); - - daytime_conversion_factor_matrix[row_index][column_index] = - calculate_conversion_factor(row_value, column_value); - } - } -} - -void initialize_daytime_conversion_factor_matrix() { - if (daytime_conversion_factor_matrix == NULL) { - int matrix_size = calc_conversion_factors_matrix_size(); - alloc_conversion_factors_matrix(matrix_size); - populate_conversion_factors_matrix(); - } -} - -PANDAS_INLINE npy_int64 get_daytime_conversion_factor(int from_index, - int to_index) { - return daytime_conversion_factor_matrix[min_value(from_index, to_index)] - [max_value(from_index, to_index)]; -} - -PANDAS_INLINE npy_int64 upsample_daytime(npy_int64 ordinal, - asfreq_info *af_info, int atEnd) { - if (atEnd) { - return (ordinal + 1) * af_info->intraday_conversion_factor - 1; - } else { - return ordinal * af_info->intraday_conversion_factor; - } -} - -PANDAS_INLINE npy_int64 downsample_daytime(npy_int64 ordinal, - asfreq_info *af_info, int atEnd) { - return ordinal / (af_info->intraday_conversion_factor); -} - -PANDAS_INLINE npy_int64 transform_via_day(npy_int64 ordinal, char relation, - asfreq_info *af_info, - freq_conv_func first_func, - freq_conv_func second_func) { - // printf("transform_via_day(%ld, %ld, %d)\n", ordinal, - // af_info->intraday_conversion_factor, - // af_info->intraday_conversion_upsample); - npy_int64 result; - - result = (*first_func)(ordinal, relation, af_info); - result = (*second_func)(result, relation, af_info); - - return result; -} - -static npy_int64 DtoB_weekday(npy_int64 absdate) { - return (((absdate) / 7) * 5) + (absdate) % 7 - BDAY_OFFSET; -} - -static npy_int64 DtoB_WeekendToMonday(npy_int64 absdate, int day_of_week) { - if (day_of_week > 4) { - // change to Monday after weekend - absdate += (7 - day_of_week); - } - return DtoB_weekday(absdate); -} - -static npy_int64 DtoB_WeekendToFriday(npy_int64 absdate, int day_of_week) { - if (day_of_week > 4) { - // change to friday before weekend - absdate -= (day_of_week - 4); - } - return DtoB_weekday(absdate); -} - -static npy_int64 absdate_from_ymd(int y, int m, int d) { - struct date_info tempDate; - if (dInfoCalc_SetFromDateAndTime(&tempDate, y, m, d, 0, 0, 0, - GREGORIAN_CALENDAR)) { - return INT_ERR_CODE; - } - return tempDate.absdate; -} - -//************ FROM DAILY *************** - -static npy_int64 asfreq_DTtoA(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - struct date_info dinfo; - ordinal = downsample_daytime(ordinal, af_info, 0); - if (dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET, - GREGORIAN_CALENDAR)) - return INT_ERR_CODE; - if (dinfo.month > af_info->to_a_year_end) { - return (npy_int64)(dinfo.year + 1 - BASE_YEAR); - } else { - return (npy_int64)(dinfo.year - BASE_YEAR); - } -} - -static npy_int64 DtoQ_yq(npy_int64 ordinal, asfreq_info *af_info, int *year, - int *quarter) { - struct date_info dinfo; - if (dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET, - GREGORIAN_CALENDAR)) - return INT_ERR_CODE; - if (af_info->to_q_year_end != 12) { - dinfo.month -= af_info->to_q_year_end; - if (dinfo.month <= 0) { - dinfo.month += 12; - } else { - dinfo.year += 1; - } - dinfo.quarter = monthToQuarter(dinfo.month); - } - - *year = dinfo.year; - *quarter = dinfo.quarter; - - return 0; -} - -static npy_int64 asfreq_DTtoQ(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - int year, quarter; - - ordinal = downsample_daytime(ordinal, af_info, 0); - - if (DtoQ_yq(ordinal, af_info, &year, &quarter) == INT_ERR_CODE) { - return INT_ERR_CODE; - } - - return (npy_int64)((year - BASE_YEAR) * 4 + quarter - 1); -} - -static npy_int64 asfreq_DTtoM(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - struct date_info dinfo; - - ordinal = downsample_daytime(ordinal, af_info, 0); - - if (dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET, - GREGORIAN_CALENDAR)) - return INT_ERR_CODE; - return (npy_int64)((dinfo.year - BASE_YEAR) * 12 + dinfo.month - 1); -} - -static npy_int64 asfreq_DTtoW(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - ordinal = downsample_daytime(ordinal, af_info, 0); - return (ordinal + ORD_OFFSET - (1 + af_info->to_week_end)) / 7 + 1 - - WEEK_OFFSET; -} - -static npy_int64 asfreq_DTtoB(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - struct date_info dinfo; - - ordinal = downsample_daytime(ordinal, af_info, 0); - - if (dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET, - GREGORIAN_CALENDAR)) - return INT_ERR_CODE; - - if (relation == 'S') { - return DtoB_WeekendToFriday(dinfo.absdate, dinfo.day_of_week); - } else { - return DtoB_WeekendToMonday(dinfo.absdate, dinfo.day_of_week); - } -} - -// all intra day calculations are now done within one function -static npy_int64 asfreq_DownsampleWithinDay(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return downsample_daytime(ordinal, af_info, relation == 'E'); -} - -static npy_int64 asfreq_UpsampleWithinDay(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return upsample_daytime(ordinal, af_info, relation == 'E'); -} -//************ FROM BUSINESS *************** - -static npy_int64 asfreq_BtoDT(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - ordinal += BDAY_OFFSET; - ordinal = - (((ordinal - 1) / 5) * 7 + mod_compat(ordinal - 1, 5) + 1 - ORD_OFFSET); - - return upsample_daytime(ordinal, af_info, relation != 'S'); -} - -static npy_int64 asfreq_BtoA(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_BtoDT, - asfreq_DTtoA); -} - -static npy_int64 asfreq_BtoQ(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_BtoDT, - asfreq_DTtoQ); -} - -static npy_int64 asfreq_BtoM(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_BtoDT, - asfreq_DTtoM); -} - -static npy_int64 asfreq_BtoW(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_BtoDT, - asfreq_DTtoW); -} - -//************ FROM WEEKLY *************** - -static npy_int64 asfreq_WtoDT(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - ordinal += WEEK_OFFSET; - if (relation != 'S') { - ordinal += 1; - } - - ordinal = ordinal * 7 - 6 + af_info->from_week_end - ORD_OFFSET; - - if (relation != 'S') { - ordinal -= 1; - } - - return upsample_daytime(ordinal, af_info, relation != 'S'); -} - -static npy_int64 asfreq_WtoA(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_WtoDT, - asfreq_DTtoA); -} - -static npy_int64 asfreq_WtoQ(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_WtoDT, - asfreq_DTtoQ); -} - -static npy_int64 asfreq_WtoM(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_WtoDT, - asfreq_DTtoM); -} - -static npy_int64 asfreq_WtoW(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_WtoDT, - asfreq_DTtoW); -} - -static npy_int64 asfreq_WtoB(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - struct date_info dinfo; - if (dInfoCalc_SetFromAbsDate( - &dinfo, asfreq_WtoDT(ordinal, relation, af_info) + ORD_OFFSET, - GREGORIAN_CALENDAR)) - return INT_ERR_CODE; - - if (relation == 'S') { - return DtoB_WeekendToMonday(dinfo.absdate, dinfo.day_of_week); - } else { - return DtoB_WeekendToFriday(dinfo.absdate, dinfo.day_of_week); - } -} - -//************ FROM MONTHLY *************** -static void MtoD_ym(npy_int64 ordinal, int *y, int *m) { - *y = floordiv(ordinal, 12) + BASE_YEAR; - *m = mod_compat(ordinal, 12) + 1; -} - -static npy_int64 asfreq_MtoDT(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - npy_int64 absdate; - int y, m; - - if (relation == 'E') { - ordinal += 1; - } - MtoD_ym(ordinal, &y, &m); - if ((absdate = absdate_from_ymd(y, m, 1)) == INT_ERR_CODE) - return INT_ERR_CODE; - ordinal = absdate - ORD_OFFSET; - - if (relation == 'E') { - ordinal -= 1; - } - - return upsample_daytime(ordinal, af_info, relation != 'S'); -} - -static npy_int64 asfreq_MtoA(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_MtoDT, - asfreq_DTtoA); -} - -static npy_int64 asfreq_MtoQ(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_MtoDT, - asfreq_DTtoQ); -} - -static npy_int64 asfreq_MtoW(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_MtoDT, - asfreq_DTtoW); -} - -static npy_int64 asfreq_MtoB(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - struct date_info dinfo; - - if (dInfoCalc_SetFromAbsDate( - &dinfo, asfreq_MtoDT(ordinal, relation, af_info) + ORD_OFFSET, - GREGORIAN_CALENDAR)) - return INT_ERR_CODE; - - if (relation == 'S') { - return DtoB_WeekendToMonday(dinfo.absdate, dinfo.day_of_week); - } else { - return DtoB_WeekendToFriday(dinfo.absdate, dinfo.day_of_week); - } -} - -//************ FROM QUARTERLY *************** - -static void QtoD_ym(npy_int64 ordinal, int *y, int *m, asfreq_info *af_info) { - *y = floordiv(ordinal, 4) + BASE_YEAR; - *m = mod_compat(ordinal, 4) * 3 + 1; - - if (af_info->from_q_year_end != 12) { - *m += af_info->from_q_year_end; - if (*m > 12) { - *m -= 12; - } else { - *y -= 1; - } - } -} - -static npy_int64 asfreq_QtoDT(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - npy_int64 absdate; - int y, m; - - if (relation == 'E') { - ordinal += 1; - } - - QtoD_ym(ordinal, &y, &m, af_info); - - if ((absdate = absdate_from_ymd(y, m, 1)) == INT_ERR_CODE) - return INT_ERR_CODE; - - if (relation == 'E') { - absdate -= 1; - } - - return upsample_daytime(absdate - ORD_OFFSET, af_info, relation != 'S'); -} - -static npy_int64 asfreq_QtoQ(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_QtoDT, - asfreq_DTtoQ); -} - -static npy_int64 asfreq_QtoA(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_QtoDT, - asfreq_DTtoA); -} - -static npy_int64 asfreq_QtoM(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_QtoDT, - asfreq_DTtoM); -} - -static npy_int64 asfreq_QtoW(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_QtoDT, - asfreq_DTtoW); -} - -static npy_int64 asfreq_QtoB(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - struct date_info dinfo; - if (dInfoCalc_SetFromAbsDate( - &dinfo, asfreq_QtoDT(ordinal, relation, af_info) + ORD_OFFSET, - GREGORIAN_CALENDAR)) - return INT_ERR_CODE; - - if (relation == 'S') { - return DtoB_WeekendToMonday(dinfo.absdate, dinfo.day_of_week); - } else { - return DtoB_WeekendToFriday(dinfo.absdate, dinfo.day_of_week); - } -} - -//************ FROM ANNUAL *************** - -static npy_int64 asfreq_AtoDT(npy_int64 year, char relation, - asfreq_info *af_info) { - npy_int64 absdate; - int month = (af_info->from_a_year_end) % 12; - - // start from 1970 - year += BASE_YEAR; - - month += 1; - - if (af_info->from_a_year_end != 12) { - year -= 1; - } - - if (relation == 'E') { - year += 1; - } - - absdate = absdate_from_ymd(year, month, 1); - - if (absdate == INT_ERR_CODE) { - return INT_ERR_CODE; - } - - if (relation == 'E') { - absdate -= 1; - } - - return upsample_daytime(absdate - ORD_OFFSET, af_info, relation != 'S'); -} - -static npy_int64 asfreq_AtoA(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_AtoDT, - asfreq_DTtoA); -} - -static npy_int64 asfreq_AtoQ(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_AtoDT, - asfreq_DTtoQ); -} - -static npy_int64 asfreq_AtoM(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_AtoDT, - asfreq_DTtoM); -} - -static npy_int64 asfreq_AtoW(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return transform_via_day(ordinal, relation, af_info, asfreq_AtoDT, - asfreq_DTtoW); -} - -static npy_int64 asfreq_AtoB(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - struct date_info dinfo; - if (dInfoCalc_SetFromAbsDate( - &dinfo, asfreq_AtoDT(ordinal, relation, af_info) + ORD_OFFSET, - GREGORIAN_CALENDAR)) - return INT_ERR_CODE; - - if (relation == 'S') { - return DtoB_WeekendToMonday(dinfo.absdate, dinfo.day_of_week); - } else { - return DtoB_WeekendToFriday(dinfo.absdate, dinfo.day_of_week); - } -} - -static npy_int64 nofunc(npy_int64 ordinal, char relation, - asfreq_info *af_info) { - return INT_ERR_CODE; -} -static npy_int64 no_op(npy_int64 ordinal, char relation, asfreq_info *af_info) { - return ordinal; -} - -// end of frequency specific conversion routines - -static int calc_a_year_end(int freq, int group) { - int result = (freq - group) % 12; - if (result == 0) { - return 12; - } else { - return result; - } -} - -static int calc_week_end(int freq, int group) { return freq - group; } - -void get_asfreq_info(int fromFreq, int toFreq, asfreq_info *af_info) { - int fromGroup = get_freq_group(fromFreq); - int toGroup = get_freq_group(toFreq); - - af_info->intraday_conversion_factor = get_daytime_conversion_factor( - get_freq_group_index(max_value(fromGroup, FR_DAY)), - get_freq_group_index(max_value(toGroup, FR_DAY))); - - // printf("get_asfreq_info(%d, %d) %ld, %d\n", fromFreq, toFreq, - // af_info->intraday_conversion_factor, - // af_info->intraday_conversion_upsample); - - switch (fromGroup) { - case FR_WK: - af_info->from_week_end = calc_week_end(fromFreq, fromGroup); - break; - case FR_ANN: - af_info->from_a_year_end = calc_a_year_end(fromFreq, fromGroup); - break; - case FR_QTR: - af_info->from_q_year_end = calc_a_year_end(fromFreq, fromGroup); - break; - } - - switch (toGroup) { - case FR_WK: - af_info->to_week_end = calc_week_end(toFreq, toGroup); - break; - case FR_ANN: - af_info->to_a_year_end = calc_a_year_end(toFreq, toGroup); - break; - case FR_QTR: - af_info->to_q_year_end = calc_a_year_end(toFreq, toGroup); - break; - } -} - -freq_conv_func get_asfreq_func(int fromFreq, int toFreq) { - int fromGroup = get_freq_group(fromFreq); - int toGroup = get_freq_group(toFreq); - - if (fromGroup == FR_UND) { - fromGroup = FR_DAY; - } - - switch (fromGroup) { - case FR_ANN: - switch (toGroup) { - case FR_ANN: - return &asfreq_AtoA; - case FR_QTR: - return &asfreq_AtoQ; - case FR_MTH: - return &asfreq_AtoM; - case FR_WK: - return &asfreq_AtoW; - case FR_BUS: - return &asfreq_AtoB; - case FR_DAY: - case FR_HR: - case FR_MIN: - case FR_SEC: - case FR_MS: - case FR_US: - case FR_NS: - return &asfreq_AtoDT; - - default: - return &nofunc; - } - - case FR_QTR: - switch (toGroup) { - case FR_ANN: - return &asfreq_QtoA; - case FR_QTR: - return &asfreq_QtoQ; - case FR_MTH: - return &asfreq_QtoM; - case FR_WK: - return &asfreq_QtoW; - case FR_BUS: - return &asfreq_QtoB; - case FR_DAY: - case FR_HR: - case FR_MIN: - case FR_SEC: - case FR_MS: - case FR_US: - case FR_NS: - return &asfreq_QtoDT; - default: - return &nofunc; - } - - case FR_MTH: - switch (toGroup) { - case FR_ANN: - return &asfreq_MtoA; - case FR_QTR: - return &asfreq_MtoQ; - case FR_MTH: - return &no_op; - case FR_WK: - return &asfreq_MtoW; - case FR_BUS: - return &asfreq_MtoB; - case FR_DAY: - case FR_HR: - case FR_MIN: - case FR_SEC: - case FR_MS: - case FR_US: - case FR_NS: - return &asfreq_MtoDT; - default: - return &nofunc; - } - - case FR_WK: - switch (toGroup) { - case FR_ANN: - return &asfreq_WtoA; - case FR_QTR: - return &asfreq_WtoQ; - case FR_MTH: - return &asfreq_WtoM; - case FR_WK: - return &asfreq_WtoW; - case FR_BUS: - return &asfreq_WtoB; - case FR_DAY: - case FR_HR: - case FR_MIN: - case FR_SEC: - case FR_MS: - case FR_US: - case FR_NS: - return &asfreq_WtoDT; - default: - return &nofunc; - } - - case FR_BUS: - switch (toGroup) { - case FR_ANN: - return &asfreq_BtoA; - case FR_QTR: - return &asfreq_BtoQ; - case FR_MTH: - return &asfreq_BtoM; - case FR_WK: - return &asfreq_BtoW; - case FR_BUS: - return &no_op; - case FR_DAY: - case FR_HR: - case FR_MIN: - case FR_SEC: - case FR_MS: - case FR_US: - case FR_NS: - return &asfreq_BtoDT; - default: - return &nofunc; - } - - case FR_DAY: - case FR_HR: - case FR_MIN: - case FR_SEC: - case FR_MS: - case FR_US: - case FR_NS: - switch (toGroup) { - case FR_ANN: - return &asfreq_DTtoA; - case FR_QTR: - return &asfreq_DTtoQ; - case FR_MTH: - return &asfreq_DTtoM; - case FR_WK: - return &asfreq_DTtoW; - case FR_BUS: - return &asfreq_DTtoB; - case FR_DAY: - case FR_HR: - case FR_MIN: - case FR_SEC: - case FR_MS: - case FR_US: - case FR_NS: - if (fromGroup > toGroup) { - return &asfreq_DownsampleWithinDay; - } else { - return &asfreq_UpsampleWithinDay; - } - default: - return &nofunc; - } - - default: - return &nofunc; - } -} - -double get_abs_time(int freq, npy_int64 date_ordinal, npy_int64 ordinal) { - // printf("get_abs_time %d %lld %lld\n", freq, date_ordinal, ordinal); - - int freq_index, day_index, base_index; - npy_int64 per_day, start_ord; - double unit, result; - - if (freq <= FR_DAY) { - return 0; - } - - freq_index = get_freq_group_index(freq); - day_index = get_freq_group_index(FR_DAY); - base_index = get_freq_group_index(FR_SEC); - - // printf(" indices: day %d, freq %d, base %d\n", day_index, freq_index, - // base_index); - - per_day = get_daytime_conversion_factor(day_index, freq_index); - unit = get_daytime_conversion_factor(freq_index, base_index); - - // printf(" per_day: %lld, unit: %f\n", per_day, unit); - - if (base_index < freq_index) { - unit = 1 / unit; - // printf(" corrected unit: %f\n", unit); - } - - start_ord = date_ordinal * per_day; - // printf("start_ord: %lld\n", start_ord); - result = (double)(unit * (ordinal - start_ord)); - // printf(" result: %f\n", result); - return result; -} - -/* Sets the time part of the DateTime object. */ -static int dInfoCalc_SetFromAbsTime(struct date_info *dinfo, double abstime) { - int inttime; - int hour, minute; - double second; - - inttime = (int)abstime; - hour = inttime / 3600; - minute = (inttime % 3600) / 60; - second = abstime - (double)(hour * 3600 + minute * 60); - - dinfo->hour = hour; - dinfo->minute = minute; - dinfo->second = second; - - dinfo->abstime = abstime; - - return 0; -} - -/* Set the instance's value using the given date and time. calendar - may be set to the flags: GREGORIAN_CALENDAR, JULIAN_CALENDAR to - indicate the calendar to be used. */ -static int dInfoCalc_SetFromAbsDateTime(struct date_info *dinfo, - npy_int64 absdate, double abstime, - int calendar) { - /* Bounds check */ - Py_AssertWithArg(abstime >= 0.0 && abstime <= SECONDS_PER_DAY, - PyExc_ValueError, - "abstime out of range (0.0 - 86400.0): %f", abstime); - - /* Calculate the date */ - if (dInfoCalc_SetFromAbsDate(dinfo, absdate, calendar)) goto onError; - - /* Calculate the time */ - if (dInfoCalc_SetFromAbsTime(dinfo, abstime)) goto onError; - - return 0; -onError: - return INT_ERR_CODE; -} - -/* ------------------------------------------------------------------ - * New pandas API-helper code, to expose to cython - * ------------------------------------------------------------------*/ - -npy_int64 asfreq(npy_int64 period_ordinal, int freq1, int freq2, - char relation) { - npy_int64 val; - freq_conv_func func; - asfreq_info finfo; - - func = get_asfreq_func(freq1, freq2); - - get_asfreq_info(freq1, freq2, &finfo); - - // printf("\n%x %d %d %ld %ld\n", func, freq1, freq2, - // finfo.intraday_conversion_factor, -finfo.intraday_conversion_factor); - - val = (*func)(period_ordinal, relation, &finfo); - - if (val == INT_ERR_CODE) { - // Py_Error(PyExc_ValueError, "Unable to convert to desired - // frequency."); - goto onError; - } - return val; -onError: - return INT_ERR_CODE; -} - -/* generate an ordinal in period space */ -npy_int64 get_period_ordinal(int year, int month, int day, int hour, int minute, - int second, int microseconds, int picoseconds, - int freq) { - npy_int64 absdays, delta, seconds; - npy_int64 weeks, days; - npy_int64 ordinal, day_adj; - int freq_group, fmonth, mdiff; - freq_group = get_freq_group(freq); - - if (freq == FR_SEC || freq == FR_MS || freq == FR_US || freq == FR_NS) { - absdays = absdate_from_ymd(year, month, day); - delta = (absdays - ORD_OFFSET); - seconds = - (npy_int64)(delta * 86400 + hour * 3600 + minute * 60 + second); - - switch (freq) { - case FR_MS: - return seconds * 1000 + microseconds / 1000; - - case FR_US: - return seconds * 1000000 + microseconds; - - case FR_NS: - return seconds * 1000000000 + microseconds * 1000 + - picoseconds / 1000; - } - - return seconds; - } - - if (freq == FR_MIN) { - absdays = absdate_from_ymd(year, month, day); - delta = (absdays - ORD_OFFSET); - return (npy_int64)(delta * 1440 + hour * 60 + minute); - } - - if (freq == FR_HR) { - if ((absdays = absdate_from_ymd(year, month, day)) == INT_ERR_CODE) { - goto onError; - } - delta = (absdays - ORD_OFFSET); - return (npy_int64)(delta * 24 + hour); - } - - if (freq == FR_DAY) { - return (npy_int64)(absdate_from_ymd(year, month, day) - ORD_OFFSET); - } - - if (freq == FR_UND) { - return (npy_int64)(absdate_from_ymd(year, month, day) - ORD_OFFSET); - } - - if (freq == FR_BUS) { - if ((days = absdate_from_ymd(year, month, day)) == INT_ERR_CODE) { - goto onError; - } - // calculate the current week assuming sunday as last day of a week - weeks = (days - BASE_WEEK_TO_DAY_OFFSET) / DAYS_PER_WEEK; - // calculate the current weekday (in range 1 .. 7) - delta = (days - BASE_WEEK_TO_DAY_OFFSET) % DAYS_PER_WEEK + 1; - // return the number of business days in full weeks plus the business - // days in the last - possible partial - week - return (npy_int64)(weeks * BUSINESS_DAYS_PER_WEEK) + - (delta <= BUSINESS_DAYS_PER_WEEK ? delta - : BUSINESS_DAYS_PER_WEEK + 1) - - BDAY_OFFSET; - } - - if (freq_group == FR_WK) { - if ((ordinal = (npy_int64)absdate_from_ymd(year, month, day)) == - INT_ERR_CODE) { - goto onError; - } - day_adj = freq - FR_WK; - return (ordinal - (1 + day_adj)) / 7 + 1 - WEEK_OFFSET; - } - - if (freq == FR_MTH) { - return (year - BASE_YEAR) * 12 + month - 1; - } - - if (freq_group == FR_QTR) { - fmonth = freq - FR_QTR; - if (fmonth == 0) fmonth = 12; - - mdiff = month - fmonth; - if (mdiff < 0) mdiff += 12; - if (month >= fmonth) mdiff += 12; - - return (year - BASE_YEAR) * 4 + (mdiff - 1) / 3; - } - - if (freq_group == FR_ANN) { - fmonth = freq - FR_ANN; - if (fmonth == 0) fmonth = 12; - if (month <= fmonth) { - return year - BASE_YEAR; - } else { - return year - BASE_YEAR + 1; - } - } - - Py_Error(PyExc_RuntimeError, "Unable to generate frequency ordinal"); - -onError: - return INT_ERR_CODE; -} - -/* - Returns the proleptic Gregorian ordinal of the date, as an integer. - This corresponds to the number of days since Jan., 1st, 1AD. - When the instance has a frequency less than daily, the proleptic date - is calculated for the last day of the period. - */ - -npy_int64 get_python_ordinal(npy_int64 period_ordinal, int freq) { - asfreq_info af_info; - freq_conv_func toDaily = NULL; - - if (freq == FR_DAY) return period_ordinal + ORD_OFFSET; - - toDaily = get_asfreq_func(freq, FR_DAY); - get_asfreq_info(freq, FR_DAY, &af_info); - - return toDaily(period_ordinal, 'E', &af_info) + ORD_OFFSET; -} - - -// function to generate a nice string representation of the period -// object, originally from DateObject_strftime - -char *c_strftime(struct date_info *tmp, char *fmt) { - struct tm c_date; - char *result; - struct date_info dinfo = *tmp; - int result_len = strlen(fmt) + 50; - - c_date.tm_sec = (int)dinfo.second; - c_date.tm_min = dinfo.minute; - c_date.tm_hour = dinfo.hour; - c_date.tm_mday = dinfo.day; - c_date.tm_mon = dinfo.month - 1; - c_date.tm_year = dinfo.year - 1900; - c_date.tm_wday = (dinfo.day_of_week + 1) % 7; - c_date.tm_yday = dinfo.day_of_year - 1; - c_date.tm_isdst = -1; - - result = malloc(result_len * sizeof(char)); - - strftime(result, result_len, fmt, &c_date); - - return result; -} - -int get_yq(npy_int64 ordinal, int freq, int *quarter, int *year) { - asfreq_info af_info; - int qtr_freq; - npy_int64 daily_ord; - npy_int64 (*toDaily)(npy_int64, char, asfreq_info *) = NULL; - - toDaily = get_asfreq_func(freq, FR_DAY); - get_asfreq_info(freq, FR_DAY, &af_info); - - daily_ord = toDaily(ordinal, 'E', &af_info); - - if (get_freq_group(freq) == FR_QTR) { - qtr_freq = freq; - } else { - qtr_freq = FR_QTR; - } - get_asfreq_info(FR_DAY, qtr_freq, &af_info); - - if (DtoQ_yq(daily_ord, &af_info, year, quarter) == INT_ERR_CODE) return -1; - - return 0; -} - -static int _quarter_year(npy_int64 ordinal, int freq, int *year, int *quarter) { - asfreq_info af_info; - int qtr_freq; - - ordinal = get_python_ordinal(ordinal, freq) - ORD_OFFSET; - - if (get_freq_group(freq) == FR_QTR) - qtr_freq = freq; - else - qtr_freq = FR_QTR; - - get_asfreq_info(FR_DAY, qtr_freq, &af_info); - - if (DtoQ_yq(ordinal, &af_info, year, quarter) == INT_ERR_CODE) - return INT_ERR_CODE; - - if ((qtr_freq % 1000) > 12) *year -= 1; - - return 0; -} - -static int _ISOWeek(struct date_info *dinfo) { - int week; - - /* Estimate */ - week = (dinfo->day_of_year - 1) - dinfo->day_of_week + 3; - if (week >= 0) week = week / 7 + 1; - - /* Verify */ - if (week < 0) { - /* The day lies in last week of the previous year */ - if ((week > -2) || (week == -2 && dInfoCalc_Leapyear(dinfo->year - 1, - dinfo->calendar))) - week = 53; - else - week = 52; - } else if (week == 53) { - /* Check if the week belongs to year or year+1 */ - if (31 - dinfo->day + dinfo->day_of_week < 3) { - week = 1; - } - } - - return week; -} - -int get_date_info(npy_int64 ordinal, int freq, struct date_info *dinfo) { - npy_int64 absdate = get_python_ordinal(ordinal, freq); - double abstime = get_abs_time(freq, absdate - ORD_OFFSET, ordinal); - - while (abstime < 0) { - abstime += 86400; - absdate -= 1; - } - while (abstime >= 86400) { - abstime -= 86400; - absdate += 1; - } - - if (dInfoCalc_SetFromAbsDateTime(dinfo, absdate, abstime, - GREGORIAN_CALENDAR)) - return INT_ERR_CODE; - - return 0; -} - -int pyear(npy_int64 ordinal, int freq) { - struct date_info dinfo; - get_date_info(ordinal, freq, &dinfo); - return dinfo.year; -} - -int pqyear(npy_int64 ordinal, int freq) { - int year, quarter; - if (_quarter_year(ordinal, freq, &year, &quarter) == INT_ERR_CODE) - return INT_ERR_CODE; - return year; -} - -int pquarter(npy_int64 ordinal, int freq) { - int year, quarter; - if (_quarter_year(ordinal, freq, &year, &quarter) == INT_ERR_CODE) - return INT_ERR_CODE; - return quarter; -} - -int pmonth(npy_int64 ordinal, int freq) { - struct date_info dinfo; - if (get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE) - return INT_ERR_CODE; - return dinfo.month; -} - -int pday(npy_int64 ordinal, int freq) { - struct date_info dinfo; - if (get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE) - return INT_ERR_CODE; - return dinfo.day; -} - -int pweekday(npy_int64 ordinal, int freq) { - struct date_info dinfo; - if (get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE) - return INT_ERR_CODE; - return dinfo.day_of_week; -} - -int pday_of_week(npy_int64 ordinal, int freq) { - struct date_info dinfo; - if (get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE) - return INT_ERR_CODE; - return dinfo.day_of_week; -} - -int pday_of_year(npy_int64 ordinal, int freq) { - struct date_info dinfo; - if (get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE) - return INT_ERR_CODE; - return dinfo.day_of_year; -} - -int pweek(npy_int64 ordinal, int freq) { - struct date_info dinfo; - if (get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE) - return INT_ERR_CODE; - return _ISOWeek(&dinfo); -} - -int phour(npy_int64 ordinal, int freq) { - struct date_info dinfo; - if (get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE) - return INT_ERR_CODE; - return dinfo.hour; -} - -int pminute(npy_int64 ordinal, int freq) { - struct date_info dinfo; - if (get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE) - return INT_ERR_CODE; - return dinfo.minute; -} - -int psecond(npy_int64 ordinal, int freq) { - struct date_info dinfo; - if (get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE) - return INT_ERR_CODE; - return (int)dinfo.second; -} - -int pdays_in_month(npy_int64 ordinal, int freq) { - int days; - struct date_info dinfo; - if (get_date_info(ordinal, freq, &dinfo) == INT_ERR_CODE) - return INT_ERR_CODE; - - days = days_in_month[dInfoCalc_Leapyear(dinfo.year, dinfo.calendar)] - [dinfo.month - 1]; - return days; -} diff --git a/pandas/_libs/src/period_helper.h b/pandas/_libs/src/period_helper.h deleted file mode 100644 index 35dd20848a2ec..0000000000000 --- a/pandas/_libs/src/period_helper.h +++ /dev/null @@ -1,181 +0,0 @@ -/* -Copyright (c) 2016, PyData Development Team -All rights reserved. - -Distributed under the terms of the BSD Simplified License. - -The full license is in the LICENSE file, distributed with this software. - -Borrowed and derived code from scikits.timeseries that we will expose via -Cython to pandas. This primarily concerns interval representation and -frequency conversion routines. -*/ - -#ifndef PANDAS__LIBS_SRC_PERIOD_HELPER_H_ -#define PANDAS__LIBS_SRC_PERIOD_HELPER_H_ - -#include -#include "headers/stdint.h" -#include "helper.h" -#include "limits.h" -#include "numpy/ndarraytypes.h" - -/* - * declarations from period here - */ - -#define GREGORIAN_CALENDAR 0 -#define JULIAN_CALENDAR 1 - -#define SECONDS_PER_DAY ((double)86400.0) - -#define Py_AssertWithArg(x, errortype, errorstr, a1) \ - { \ - if (!(x)) { \ - PyErr_Format(errortype, errorstr, a1); \ - goto onError; \ - } \ - } -#define Py_Error(errortype, errorstr) \ - { \ - PyErr_SetString(errortype, errorstr); \ - goto onError; \ - } - -/*** FREQUENCY CONSTANTS ***/ - -// HIGHFREQ_ORIG is the datetime ordinal from which to begin the second -// frequency ordinal sequence - -// #define HIGHFREQ_ORIG 62135683200LL -#define BASE_YEAR 1970 -#define ORD_OFFSET 719163LL // days until 1970-01-01 -#define BDAY_OFFSET 513689LL // days until 1970-01-01 -#define WEEK_OFFSET 102737LL -#define BASE_WEEK_TO_DAY_OFFSET \ - 1 // difference between day 0 and end of week in days -#define DAYS_PER_WEEK 7 -#define BUSINESS_DAYS_PER_WEEK 5 -#define HIGHFREQ_ORIG 0 // ORD_OFFSET * 86400LL // days until 1970-01-01 - -#define FR_ANN 1000 /* Annual */ -#define FR_ANNDEC FR_ANN /* Annual - December year end*/ -#define FR_ANNJAN 1001 /* Annual - January year end*/ -#define FR_ANNFEB 1002 /* Annual - February year end*/ -#define FR_ANNMAR 1003 /* Annual - March year end*/ -#define FR_ANNAPR 1004 /* Annual - April year end*/ -#define FR_ANNMAY 1005 /* Annual - May year end*/ -#define FR_ANNJUN 1006 /* Annual - June year end*/ -#define FR_ANNJUL 1007 /* Annual - July year end*/ -#define FR_ANNAUG 1008 /* Annual - August year end*/ -#define FR_ANNSEP 1009 /* Annual - September year end*/ -#define FR_ANNOCT 1010 /* Annual - October year end*/ -#define FR_ANNNOV 1011 /* Annual - November year end*/ - -/* The standard quarterly frequencies with various fiscal year ends - eg, Q42005 for Q@OCT runs Aug 1, 2005 to Oct 31, 2005 */ -#define FR_QTR 2000 /* Quarterly - December year end (default quarterly) */ -#define FR_QTRDEC FR_QTR /* Quarterly - December year end */ -#define FR_QTRJAN 2001 /* Quarterly - January year end */ -#define FR_QTRFEB 2002 /* Quarterly - February year end */ -#define FR_QTRMAR 2003 /* Quarterly - March year end */ -#define FR_QTRAPR 2004 /* Quarterly - April year end */ -#define FR_QTRMAY 2005 /* Quarterly - May year end */ -#define FR_QTRJUN 2006 /* Quarterly - June year end */ -#define FR_QTRJUL 2007 /* Quarterly - July year end */ -#define FR_QTRAUG 2008 /* Quarterly - August year end */ -#define FR_QTRSEP 2009 /* Quarterly - September year end */ -#define FR_QTROCT 2010 /* Quarterly - October year end */ -#define FR_QTRNOV 2011 /* Quarterly - November year end */ - -#define FR_MTH 3000 /* Monthly */ - -#define FR_WK 4000 /* Weekly */ -#define FR_WKSUN FR_WK /* Weekly - Sunday end of week */ -#define FR_WKMON 4001 /* Weekly - Monday end of week */ -#define FR_WKTUE 4002 /* Weekly - Tuesday end of week */ -#define FR_WKWED 4003 /* Weekly - Wednesday end of week */ -#define FR_WKTHU 4004 /* Weekly - Thursday end of week */ -#define FR_WKFRI 4005 /* Weekly - Friday end of week */ -#define FR_WKSAT 4006 /* Weekly - Saturday end of week */ - -#define FR_BUS 5000 /* Business days */ -#define FR_DAY 6000 /* Daily */ -#define FR_HR 7000 /* Hourly */ -#define FR_MIN 8000 /* Minutely */ -#define FR_SEC 9000 /* Secondly */ -#define FR_MS 10000 /* Millisecondly */ -#define FR_US 11000 /* Microsecondly */ -#define FR_NS 12000 /* Nanosecondly */ - -#define FR_UND -10000 /* Undefined */ - -#define INT_ERR_CODE INT32_MIN - -typedef struct asfreq_info { - int from_week_end; // day the week ends on in the "from" frequency - int to_week_end; // day the week ends on in the "to" frequency - - int from_a_year_end; // month the year ends on in the "from" frequency - int to_a_year_end; // month the year ends on in the "to" frequency - - int from_q_year_end; // month the year ends on in the "from" frequency - int to_q_year_end; // month the year ends on in the "to" frequency - - npy_int64 intraday_conversion_factor; -} asfreq_info; - -typedef struct date_info { - npy_int64 absdate; - double abstime; - - double second; - int minute; - int hour; - int day; - int month; - int quarter; - int year; - int day_of_week; - int day_of_year; - int calendar; -} date_info; - -typedef npy_int64 (*freq_conv_func)(npy_int64, char, asfreq_info *); - -/* - * new pandas API helper functions here - */ - -npy_int64 asfreq(npy_int64 period_ordinal, int freq1, int freq2, char relation); - -npy_int64 get_period_ordinal(int year, int month, int day, int hour, int minute, - int second, int microseconds, int picoseconds, - int freq); - -npy_int64 get_python_ordinal(npy_int64 period_ordinal, int freq); - -int get_date_info(npy_int64 ordinal, int freq, struct date_info *dinfo); -freq_conv_func get_asfreq_func(int fromFreq, int toFreq); -void get_asfreq_info(int fromFreq, int toFreq, asfreq_info *af_info); - -int pyear(npy_int64 ordinal, int freq); -int pqyear(npy_int64 ordinal, int freq); -int pquarter(npy_int64 ordinal, int freq); -int pmonth(npy_int64 ordinal, int freq); -int pday(npy_int64 ordinal, int freq); -int pweekday(npy_int64 ordinal, int freq); -int pday_of_week(npy_int64 ordinal, int freq); -int pday_of_year(npy_int64 ordinal, int freq); -int pweek(npy_int64 ordinal, int freq); -int phour(npy_int64 ordinal, int freq); -int pminute(npy_int64 ordinal, int freq); -int psecond(npy_int64 ordinal, int freq); -int pdays_in_month(npy_int64 ordinal, int freq); - -char *c_strftime(struct date_info *dinfo, char *fmt); -int get_yq(npy_int64 ordinal, int freq, int *quarter, int *year); - -void initialize_daytime_conversion_factor_matrix(void); - -#endif // PANDAS__LIBS_SRC_PERIOD_HELPER_H_ diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index e2caebe4c4afc..eb6d634ddf390 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -7,6 +7,10 @@ from cpython cimport ( PyObject_RichCompareBool, Py_EQ, Py_NE) +from libc.stdlib cimport malloc +from libc.time cimport strftime, tm +from libc.string cimport strlen + from numpy cimport int64_t, import_array, ndarray import numpy as np import_array() @@ -34,6 +38,7 @@ from timezones cimport is_utc, is_tzlocal, get_utcoffset, get_dst_info from timedeltas cimport delta_to_nanoseconds from ccalendar import MONTH_NUMBERS +from ccalendar cimport get_days_in_month from frequencies cimport (get_freq_code, get_base_alias, get_to_timestamp_base, get_freq_str, get_rule_month) @@ -46,71 +51,92 @@ from pandas.tseries import offsets from pandas.tseries import frequencies -cdef extern from "period_helper.h": - ctypedef struct date_info: - int64_t absdate - double abstime - double second - int minute - int hour - int day - int month - int quarter - int year - int day_of_week - int day_of_year - int calendar - - ctypedef struct asfreq_info: - int from_week_end - int to_week_end - - int from_a_year_end - int to_a_year_end - - int from_q_year_end - int to_q_year_end - - ctypedef int64_t (*freq_conv_func)(int64_t, char, asfreq_info*) - - void initialize_daytime_conversion_factor_matrix() - int64_t asfreq(int64_t dtordinal, int freq1, int freq2, - char relation) except INT32_MIN - freq_conv_func get_asfreq_func(int fromFreq, int toFreq) - void get_asfreq_info(int fromFreq, int toFreq, asfreq_info *af_info) - - int64_t get_period_ordinal(int year, int month, int day, - int hour, int minute, int second, - int microseconds, int picoseconds, - int freq) nogil except INT32_MIN - - int get_date_info(int64_t ordinal, int freq, - date_info *dinfo) nogil except INT32_MIN - - int pyear(int64_t ordinal, int freq) except INT32_MIN - int pqyear(int64_t ordinal, int freq) except INT32_MIN - int pquarter(int64_t ordinal, int freq) except INT32_MIN - int pmonth(int64_t ordinal, int freq) except INT32_MIN - int pday(int64_t ordinal, int freq) except INT32_MIN - int pweekday(int64_t ordinal, int freq) except INT32_MIN - int pday_of_week(int64_t ordinal, int freq) except INT32_MIN - # TODO: pday_of_week and pweekday are identical. Make one an alias instead - # of importing them separately. - int pday_of_year(int64_t ordinal, int freq) except INT32_MIN - int pweek(int64_t ordinal, int freq) except INT32_MIN - int phour(int64_t ordinal, int freq) except INT32_MIN - int pminute(int64_t ordinal, int freq) except INT32_MIN - int psecond(int64_t ordinal, int freq) except INT32_MIN - int pdays_in_month(int64_t ordinal, int freq) except INT32_MIN - char *c_strftime(date_info *dinfo, char *fmt) - int get_yq(int64_t ordinal, int freq, int *quarter, int *year) - -initialize_daytime_conversion_factor_matrix() +from period_asfreq cimport (asfreq_info, get_asfreq_info, + freq_conv_func, get_asfreq_func, DtoQ_yq, + get_python_ordinal, asfreq, get_date_info) +from period_info cimport (_ISOWeek, + CALENDARS, get_period_ordinal, date_info) +from period_asfreq cimport (pqyear, pquarter, pday_of_year, + pweek, pweekday, + pyear, pmonth, pday, phour, pminute, psecond, + pdays_in_month) +from period_conversion cimport get_freq_group + +cdef int GREGORIAN_CALENDAR = CALENDARS.GREGORIAN_CALENDAR +cdef int JULIAN_CALENDAR = CALENDARS.JULIAN_CALENDAR + +cdef enum OFFSETS: + ORD_OFFSET = 719163LL # days until 1970-01-01 + +cdef enum FREQS: + FR_ANN = 1000 # Annual + FR_QTR = 2000 # Quarterly + FR_MTH = 3000 # Monthly + FR_WK = 4000 # Weekly + FR_BUS = 5000 # Business days + FR_DAY = 6000 # Daily + FR_HR = 7000 # Hourly + FR_MIN = 8000 # Minutely + FR_SEC = 9000 # Secondly + FR_MS = 10000 # Millisecondly + FR_US = 11000 # Microsecondly + FR_NS = 12000 # Nanosecondly + FR_UND =-10000 # Undefined + + +@cython.cdivision +cdef char* c_strftime(date_info *dinfo, char *fmt): + cdef: + tm c_date + char *result + int result_len = strlen(fmt) + 50 + + c_date.tm_sec = dinfo.second + c_date.tm_min = dinfo.minute + c_date.tm_hour = dinfo.hour + c_date.tm_mday = dinfo.day + c_date.tm_mon = dinfo.month - 1 + c_date.tm_year = dinfo.year - 1900 + c_date.tm_wday = (dinfo.day_of_week + 1) % 7 + c_date.tm_yday = dinfo.day_of_year - 1 + c_date.tm_isdst = -1 + + result = malloc(result_len * sizeof(char)) + + strftime(result, result_len, fmt, &c_date) + + return result + + +cdef int get_yq(int64_t ordinal, int freq, int *quarter, int *year) nogil: + cdef: + asfreq_info af_info + int qtr_freq + int64_t daily_ord + # int64_t (*toDaily)(int64_t, char, asfreq_info *) nogil + + # toDaily = get_asfreq_func(freq, FR_DAY) + # get_asfreq_info(freq, FR_DAY, &af_info) + + # daily_ord = toDaily(ordinal, 'E', &af_info) + daily_ord = get_python_ordinal(ordinal, freq) - ORD_OFFSET + + if get_freq_group(freq) == FR_QTR: + qtr_freq = freq + else: + qtr_freq = FR_QTR + + get_asfreq_info(FR_DAY, qtr_freq, &af_info) + + if DtoQ_yq(daily_ord, &af_info, year, quarter) == INT32_MIN: + return -1 + + return 0 + # ---------------------------------------------------------------------- # Period logic - @cython.wraparound(False) @cython.boundscheck(False) def dt64arr_to_periodarr(ndarray[int64_t] dtarr, int freq, tz=None): @@ -171,8 +197,7 @@ cdef char START = 'S' cdef char END = 'E' -cpdef int64_t period_asfreq(int64_t period_ordinal, int freq1, int freq2, - bint end): +cpdef int64_t period_asfreq(int64_t ordinal, int freq1, int freq2, bint end): """ Convert period ordinal from one frequency to another, and if upsampling, choose to use start ('S') or end ('E') of period. @@ -180,13 +205,13 @@ cpdef int64_t period_asfreq(int64_t period_ordinal, int freq1, int freq2, cdef: int64_t retval - if period_ordinal == iNaT: + if ordinal == iNaT: return iNaT if end: - retval = asfreq(period_ordinal, freq1, freq2, END) + retval = asfreq(ordinal, freq1, freq2, END) else: - retval = asfreq(period_ordinal, freq1, freq2, START) + retval = asfreq(ordinal, freq1, freq2, START) if retval == INT32_MIN: raise ValueError('Frequency conversion failed') @@ -317,6 +342,7 @@ cdef list extra_fmts = [(b"%q", b"^`AB`^"), cdef list str_extra_fmts = ["^`AB`^", "^`CD`^", "^`EF`^", "^`GH`^", "^`IJ`^", "^`KL`^"] + cdef object _period_strftime(int64_t value, int freq, object fmt): cdef: Py_ssize_t i @@ -367,19 +393,18 @@ cdef object _period_strftime(int64_t value, int freq, object fmt): return result -# period accessors - -ctypedef int (*accessor)(int64_t ordinal, int freq) except INT32_MIN +# -------------------------------------------------------------------- +# period accessors def get_period_field_arr(int code, ndarray[int64_t] arr, int freq): cdef: Py_ssize_t i, sz ndarray[int64_t] out - accessor f + accessor func - f = _get_accessor_func(code) - if f is NULL: + func = _get_accessor_func(code) + if func is NULL: raise ValueError('Unrecognized period code: %d' % code) sz = len(arr) @@ -389,39 +414,44 @@ def get_period_field_arr(int code, ndarray[int64_t] arr, int freq): if arr[i] == iNaT: out[i] = -1 continue - out[i] = f(arr[i], freq) + out[i] = func(arr[i], freq) return out +ctypedef int (*accessor)(int64_t ordinal, int freq) except INT32_MIN + + cdef accessor _get_accessor_func(int code): if code == 0: - return &pyear + return pyear elif code == 1: - return &pqyear + return pqyear elif code == 2: - return &pquarter + return pquarter elif code == 3: - return &pmonth + return pmonth elif code == 4: - return &pday + return pday elif code == 5: - return &phour + return phour elif code == 6: - return &pminute + return pminute elif code == 7: - return &psecond + return psecond elif code == 8: - return &pweek + return pweek elif code == 9: - return &pday_of_year + return pday_of_year elif code == 10: - return &pweekday + return pweekday elif code == 11: - return &pdays_in_month + return pdays_in_month return NULL +# ----------------------------------------------------------------------- + def extract_ordinals(ndarray[object] values, freq): cdef: Py_ssize_t i, n = len(values) @@ -784,6 +814,8 @@ cdef class _Period(object): @property def second(self): + cdef: + date_info dinfo base, mult = get_freq_code(self.freq) return psecond(self.ordinal, base) diff --git a/pandas/_libs/tslibs/period_asfreq.pxd b/pandas/_libs/tslibs/period_asfreq.pxd new file mode 100644 index 0000000000000..486bbbc293d83 --- /dev/null +++ b/pandas/_libs/tslibs/period_asfreq.pxd @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- + +from numpy cimport int64_t + +from period_info cimport date_info + + +ctypedef struct asfreq_info: + int from_week_end + int to_week_end + + int from_a_year_end + int to_a_year_end + + int from_q_year_end + int to_q_year_end + + int64_t intraday_conversion_factor + + +ctypedef int64_t (*freq_conv_func)(int64_t, char, asfreq_info*) nogil + +cdef freq_conv_func get_asfreq_func(int fromFreq, int toFreq) nogil +cdef int64_t DtoQ_yq(int64_t ordinal, asfreq_info *af_info, int *year, + int *quarter) nogil +cdef void get_asfreq_info(int fromFreq, int toFreq, asfreq_info *af_info) nogil +cdef int64_t get_python_ordinal(int64_t period_ordinal, int freq) nogil +cdef int64_t asfreq(int64_t ordinal, int freq1, int freq2, char relation) + +cdef int get_date_info(int64_t ordinal, int freq, + date_info *dinfo) nogil except -1 + +cdef int pqyear(int64_t ordinal, int freq) +cdef int pquarter(int64_t ordinal, int freq) +cdef int pday_of_year(int64_t ordinal, int freq) +cdef int pweek(int64_t ordinal, int freq) +cdef int pweekday(int64_t ordinal, int freq) +cdef int pyear(int64_t ordinal, int freq) +cdef int pmonth(int64_t ordinal, int freq) +cdef int pday(int64_t ordinal, int freq) +cdef int phour(int64_t ordinal, int freq) +cdef int pminute(int64_t ordinal, int freq) +cdef int psecond(int64_t ordinal, int freq) +cdef int pdays_in_month(int64_t ordinal, int freq) diff --git a/pandas/_libs/tslibs/period_asfreq.pyx b/pandas/_libs/tslibs/period_asfreq.pyx new file mode 100644 index 0000000000000..64395a25d5497 --- /dev/null +++ b/pandas/_libs/tslibs/period_asfreq.pyx @@ -0,0 +1,899 @@ +# -*- coding: utf-8 -*- +cimport cython + +import numpy as np +from numpy cimport int64_t + +from util cimport INT32_MIN + +from period_info cimport (dInfoCalc_SetFromAbsDateTime, + dInfoCalc_SetFromAbsDate, + dInfoCalc_Leapyear, + absdate_from_ymd, monthToQuarter, _ISOWeek) +from period_conversion cimport (get_daytime_conversion_factor, max_value, + get_abs_time, + get_freq_group, get_freq_group_index) + +# ---------------------------------------------------------------------- +# Constants + +cdef int BASE_YEAR = 1970 + +cdef enum CALENDARS: + GREGORIAN_CALENDAR = 1 + JULIAN_CALENDAR = 2 + +cdef enum OFFSETS: + ORD_OFFSET = 719163LL # days until 1970-01-01 + BDAY_OFFSET = 513689LL # days until 1970-01-01 + WEEK_OFFSET = 102737LL + +cdef enum FREQS: + FR_ANN = 1000 # Annual + FR_QTR = 2000 # Quarterly - December year end (default quarterly) + FR_MTH = 3000 # Monthly + FR_WK = 4000 # Weekly + FR_BUS = 5000 # Business days + FR_DAY = 6000 # Daily + FR_HR = 7000 # Hourly + FR_MIN = 8000 # Minutely + FR_SEC = 9000 # Secondly + FR_MS = 10000 # Millisecondly + FR_US = 11000 # Microsecondly + FR_NS = 12000 # Nanosecondly + FR_UND = -10000 # Undefined + +# Table of number of days in a month (0-based, without and with leap) +cdef int64_t[:, :] days_in_month = np.array( + # Windows builds seem to require super-explicit casting + [[val for val in row] for row in + [[31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], + [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]]], + dtype=np.int64) + + +# --------------------------------------------------------------- +# Code derived from scikits.timeseries + +@cython.cdivision +cdef int mod_compat(int x, int m) nogil: + cdef: + int result = x % m + if result < 0: + return result + m + return result + + +@cython.cdivision +cdef int floordiv(int x, int divisor) nogil: + if x < 0: + if mod_compat(x, divisor): + return x / divisor - 1 + else: + return x / divisor + else: + return x / divisor + + +# -------------------------------------------------------------------- +# Date Info Construction + +cdef int get_date_info(int64_t ordinal, int freq, + date_info *dinfo) nogil except -1: + cdef: + int64_t absdate = get_python_ordinal(ordinal, freq) + double abstime = get_abs_time(freq, absdate - ORD_OFFSET, ordinal) + + while abstime < 0: + abstime += 86400 + absdate -= 1 + + while abstime >= 86400: + abstime -= 86400 + absdate += 1 + + if dInfoCalc_SetFromAbsDateTime(dinfo, absdate, abstime, + GREGORIAN_CALENDAR): + return -1 + + return 0 + +# ---------------------------------------------------------------------- + +cdef int64_t get_python_ordinal(int64_t period_ordinal, int freq) nogil: + """ + Returns the proleptic Gregorian ordinal of the date, as an integer. + This corresponds to the number of days since Jan., 1st, 1AD. + When the instance has a frequency less than daily, the proleptic date + is calculated for the last day of the period. + """ + cdef: + asfreq_info af_info + freq_conv_func toDaily = NULL + + if freq == FR_DAY: + return period_ordinal + ORD_OFFSET + + toDaily = get_asfreq_func(freq, FR_DAY) + get_asfreq_info(freq, FR_DAY, &af_info) + + return toDaily(period_ordinal, 'E', &af_info) + ORD_OFFSET + + +cdef void get_asfreq_info(int fromFreq, int toFreq, + asfreq_info *af_info) nogil: + cdef: + int fromGroup = get_freq_group(fromFreq) + int toGroup = get_freq_group(toFreq) + + af_info.intraday_conversion_factor = get_daytime_conversion_factor( + get_freq_group_index(max_value(fromGroup, FR_DAY)), + get_freq_group_index(max_value(toGroup, FR_DAY))) + + if fromGroup == FR_WK: + af_info.from_week_end = calc_week_end(fromFreq, fromGroup) + elif fromGroup == FR_ANN: + af_info.from_a_year_end = calc_a_year_end(fromFreq, fromGroup) + elif fromGroup == FR_QTR: + af_info.from_q_year_end = calc_a_year_end(fromFreq, fromGroup) + + if toGroup == FR_WK: + af_info.to_week_end = calc_week_end(toFreq, toGroup) + elif toGroup == FR_ANN: + af_info.to_a_year_end = calc_a_year_end(toFreq, toGroup) + elif toGroup == FR_QTR: + af_info.to_q_year_end = calc_a_year_end(toFreq, toGroup) + + +cdef int calc_week_end(int freq, int group) nogil: + return freq - group + + +@cython.cdivision +cdef int calc_a_year_end(int freq, int group) nogil: + cdef: + int result = (freq - group) % 12 + if result == 0: + return 12 + else: + return result + +# ---------------------------------------------------------------------- + +cdef int64_t asfreq(int64_t ordinal, int freq1, int freq2, char relation): + cdef: + int64_t val + freq_conv_func func + asfreq_info finfo + + func = get_asfreq_func(freq1, freq2) + + get_asfreq_info(freq1, freq2, &finfo) + val = func(ordinal, relation, &finfo) + + if val == INT32_MIN: + # // Py_Error(PyExc_ValueError, "Unable to convert to desired + # // frequency."); + return INT32_MIN + + return val + + +cdef freq_conv_func get_asfreq_func(int fromFreq, int toFreq) nogil: + cdef: + int fromGroup = get_freq_group(fromFreq) + int toGroup = get_freq_group(toFreq) + + if fromGroup == FR_UND: + fromGroup = FR_DAY + + if fromGroup == FR_ANN: + if toGroup == FR_ANN: + return asfreq_AtoA + elif toGroup == FR_QTR: + return asfreq_AtoQ + elif toGroup == FR_MTH: + return asfreq_AtoM + elif toGroup == FR_WK: + return asfreq_AtoW + elif toGroup == FR_BUS: + return asfreq_AtoB + elif toGroup in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]: + return asfreq_AtoDT + else: + return nofunc + + elif fromGroup == FR_QTR: + if toGroup == FR_ANN: + return asfreq_QtoA + elif toGroup == FR_QTR: + return asfreq_QtoQ + elif toGroup == FR_MTH: + return asfreq_QtoM + elif toGroup == FR_WK: + return asfreq_QtoW + elif toGroup == FR_BUS: + return asfreq_QtoB + elif toGroup in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]: + return asfreq_QtoDT + else: + return nofunc + + elif fromGroup == FR_MTH: + if toGroup == FR_ANN: + return asfreq_MtoA + elif toGroup == FR_QTR: + return asfreq_MtoQ + elif toGroup == FR_MTH: + return no_op + elif toGroup == FR_WK: + return asfreq_MtoW + elif toGroup == FR_BUS: + return asfreq_MtoB + elif toGroup in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]: + return asfreq_MtoDT + else: + return nofunc + + elif fromGroup == FR_WK: + if toGroup == FR_ANN: + return asfreq_WtoA + elif toGroup == FR_QTR: + return asfreq_WtoQ + elif toGroup == FR_MTH: + return asfreq_WtoM + elif toGroup == FR_WK: + return asfreq_WtoW + elif toGroup == FR_BUS: + return asfreq_WtoB + elif toGroup in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]: + return asfreq_WtoDT + else: + return nofunc + + elif fromGroup == FR_BUS: + if toGroup == FR_ANN: + return asfreq_BtoA + elif toGroup == FR_QTR: + return asfreq_BtoQ + elif toGroup == FR_MTH: + return asfreq_BtoM + elif toGroup == FR_WK: + return asfreq_BtoW + elif toGroup == FR_BUS: + return no_op + elif toGroup in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]: + return asfreq_BtoDT + else: + return nofunc + + elif fromGroup in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]: + if toGroup == FR_ANN: + return asfreq_DTtoA + elif toGroup == FR_QTR: + return asfreq_DTtoQ + elif toGroup == FR_MTH: + return asfreq_DTtoM + elif toGroup == FR_WK: + return asfreq_DTtoW + elif toGroup == FR_BUS: + return asfreq_DTtoB + elif toGroup in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]: + if fromGroup > toGroup: + return asfreq_DownsampleWithinDay + else: + return asfreq_UpsampleWithinDay + else: + return nofunc + + else: + return nofunc + + +cdef int64_t nofunc(int64_t ordinal, char relation, asfreq_info *af_info): + return INT32_MIN + + +cdef int64_t no_op(int64_t ordinal, char relation, asfreq_info *af_info): + return ordinal + + +# --------------------------------------------------------------- + +cdef int64_t DtoQ_yq(int64_t ordinal, asfreq_info *af_info, int *year, + int *quarter) nogil: + cdef: + date_info dinfo + + if dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET, + GREGORIAN_CALENDAR): + return INT32_MIN + + if af_info.to_q_year_end != 12: + dinfo.month -= af_info.to_q_year_end + if dinfo.month <= 0: + dinfo.month += 12 + else: + dinfo.year += 1 + + dinfo.quarter = monthToQuarter(dinfo.month) + + year[0] = dinfo.year + quarter[0] = dinfo.quarter + return 0 + + +cdef inline int64_t transform_via_day(int64_t ordinal, char relation, + asfreq_info *af_info, + freq_conv_func first_func, + freq_conv_func second_func) nogil: + cdef: + int64_t result + + result = (first_func)(ordinal, relation, af_info) + result = (second_func)(result, relation, af_info) + + return result + + +cdef inline int64_t upsample_daytime(int64_t ordinal, + asfreq_info *af_info, int atEnd) nogil: + if atEnd: + return (ordinal + 1) * af_info.intraday_conversion_factor - 1 + else: + return ordinal * af_info.intraday_conversion_factor + + +@cython.cdivision +cdef inline int64_t downsample_daytime(int64_t ordinal, + asfreq_info *af_info, int atEnd) nogil: + return ordinal / af_info.intraday_conversion_factor + + +# ---------------------------------------------------------------------- +# From Annual + +@cython.cdivision +cdef int64_t asfreq_AtoDT(int64_t year, char relation, + asfreq_info *af_info) nogil: + cdef: + int64_t absdate + int month = (af_info.from_a_year_end) % 12 + + # start from 1970 + year += BASE_YEAR + + month += 1 + + if af_info.from_a_year_end != 12: + year -= 1 + + if relation == 'E': + year += 1 + + absdate = absdate_from_ymd(year, month, 1) + + if absdate == INT32_MIN: + return INT32_MIN + + if relation == 'E': + absdate -= 1 + + return upsample_daytime(absdate - ORD_OFFSET, af_info, relation != 'S') + + +cdef int64_t asfreq_AtoA(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + return transform_via_day(ordinal, relation, af_info, asfreq_AtoDT, + asfreq_DTtoA) + + +cdef int64_t asfreq_AtoQ(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + return transform_via_day(ordinal, relation, af_info, asfreq_AtoDT, + asfreq_DTtoQ) + + +cdef int64_t asfreq_AtoM(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + return transform_via_day(ordinal, relation, af_info, asfreq_AtoDT, + asfreq_DTtoM) + + +cdef int64_t asfreq_AtoW(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + return transform_via_day(ordinal, relation, af_info, asfreq_AtoDT, + asfreq_DTtoW) + + +cdef int64_t asfreq_AtoB(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + cdef: + date_info dinfo + + if dInfoCalc_SetFromAbsDate(&dinfo, + asfreq_AtoDT(ordinal, relation, + af_info) + ORD_OFFSET, + GREGORIAN_CALENDAR): + return INT32_MIN + + if relation == 'S': + return DtoB_WeekendToMonday(dinfo.absdate, dinfo.day_of_week) + else: + return DtoB_WeekendToFriday(dinfo.absdate, dinfo.day_of_week) + + +@cython.cdivision +cdef int64_t DtoB_weekday(int64_t absdate) nogil: + return (((absdate) / 7) * 5) + (absdate) % 7 - BDAY_OFFSET + + +cdef int64_t DtoB_WeekendToMonday(int64_t absdate, int day_of_week) nogil: + if day_of_week > 4: + # change to Monday after weekend + absdate += (7 - day_of_week) + return DtoB_weekday(absdate) + + +cdef int64_t DtoB_WeekendToFriday(int64_t absdate, int day_of_week) nogil: + if day_of_week > 4: + # change to friday before weekend + absdate -= (day_of_week - 4) + + return DtoB_weekday(absdate) + + +# ---------------------------------------------------------------------- +# From Quarterly + +cdef void QtoD_ym(int64_t ordinal, int *y, int *m, asfreq_info *af_info) nogil: + y[0] = floordiv(ordinal, 4) + BASE_YEAR + m[0] = mod_compat(ordinal, 4) * 3 + 1 + + if af_info.from_q_year_end != 12: + m[0] += af_info.from_q_year_end + if m[0] > 12: + m[0] -= 12 + else: + y[0] -= 1 + + +cdef int64_t asfreq_QtoDT(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + cdef: + int64_t absdate + int y, m + + if relation == 'E': + ordinal += 1 + + QtoD_ym(ordinal, &y, &m, af_info) + + absdate = absdate_from_ymd(y, m, 1) + if absdate == INT32_MIN: + return INT32_MIN + + if relation == 'E': + absdate -= 1 + + return upsample_daytime(absdate - ORD_OFFSET, af_info, relation != 'S') + + +cdef int64_t asfreq_QtoQ(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + return transform_via_day(ordinal, relation, af_info, asfreq_QtoDT, + asfreq_DTtoQ) + + +cdef int64_t asfreq_QtoA(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + return transform_via_day(ordinal, relation, af_info, asfreq_QtoDT, + asfreq_DTtoA) + + +cdef int64_t asfreq_QtoM(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + return transform_via_day(ordinal, relation, af_info, asfreq_QtoDT, + asfreq_DTtoM) + + +cdef int64_t asfreq_QtoW(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + return transform_via_day(ordinal, relation, af_info, asfreq_QtoDT, + asfreq_DTtoW) + + +cdef int64_t asfreq_QtoB(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + cdef: + date_info dinfo + + if dInfoCalc_SetFromAbsDate(&dinfo, + asfreq_QtoDT(ordinal, relation, + af_info) + ORD_OFFSET, + GREGORIAN_CALENDAR): + return INT32_MIN + + if relation == 'S': + return DtoB_WeekendToMonday(dinfo.absdate, dinfo.day_of_week) + else: + return DtoB_WeekendToFriday(dinfo.absdate, dinfo.day_of_week) + + +# ---------------------------------------------------------------------- +# From Monthly + +cdef void MtoD_ym(int64_t ordinal, int *y, int *m) nogil: + y[0] = floordiv(ordinal, 12) + BASE_YEAR + m[0] = mod_compat(ordinal, 12) + 1 + + +cdef int64_t asfreq_MtoDT(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + cdef: + int64_t absdate + int y, m + + if relation == 'E': + ordinal += 1 + + MtoD_ym(ordinal, &y, &m) + absdate = absdate_from_ymd(y, m, 1) + if absdate == INT32_MIN: + return INT32_MIN + + ordinal = absdate - ORD_OFFSET + + if relation == 'E': + ordinal -= 1 + + return upsample_daytime(ordinal, af_info, relation != 'S') + + +cdef int64_t asfreq_MtoA(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + return transform_via_day(ordinal, relation, af_info, asfreq_MtoDT, + asfreq_DTtoA) + + +cdef int64_t asfreq_MtoQ(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + return transform_via_day(ordinal, relation, af_info, asfreq_MtoDT, + asfreq_DTtoQ) + + +cdef int64_t asfreq_MtoW(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + return transform_via_day(ordinal, relation, af_info, asfreq_MtoDT, + asfreq_DTtoW) + + +cdef int64_t asfreq_MtoB(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + cdef: + date_info dinfo + + if dInfoCalc_SetFromAbsDate(&dinfo, + asfreq_MtoDT(ordinal, relation, + af_info) + ORD_OFFSET, + GREGORIAN_CALENDAR): + return INT32_MIN + + if relation == 'S': + return DtoB_WeekendToMonday(dinfo.absdate, dinfo.day_of_week) + else: + return DtoB_WeekendToFriday(dinfo.absdate, dinfo.day_of_week) + + +# ---------------------------------------------------------------------- +# From Weekly + +cdef int64_t asfreq_WtoDT(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + ordinal += WEEK_OFFSET + if relation != 'S': + ordinal += 1 + + ordinal = ordinal * 7 - 6 + af_info.from_week_end - ORD_OFFSET + + if relation != 'S': + ordinal -= 1 + + return upsample_daytime(ordinal, af_info, relation != 'S') + + +cdef int64_t asfreq_WtoA(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + return transform_via_day(ordinal, relation, af_info, asfreq_WtoDT, + asfreq_DTtoA) + + +cdef int64_t asfreq_WtoQ(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + return transform_via_day(ordinal, relation, af_info, asfreq_WtoDT, + asfreq_DTtoQ) + + +cdef int64_t asfreq_WtoM(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + return transform_via_day(ordinal, relation, af_info, asfreq_WtoDT, + asfreq_DTtoM) + + +cdef int64_t asfreq_WtoW(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + return transform_via_day(ordinal, relation, af_info, asfreq_WtoDT, + asfreq_DTtoW) + + +cdef int64_t asfreq_WtoB(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + cdef: + date_info dinfo + + if dInfoCalc_SetFromAbsDate(&dinfo, + asfreq_WtoDT(ordinal, relation, + af_info) + ORD_OFFSET, + GREGORIAN_CALENDAR): + return INT32_MIN + + if relation == 'S': + return DtoB_WeekendToMonday(dinfo.absdate, dinfo.day_of_week) + else: + return DtoB_WeekendToFriday(dinfo.absdate, dinfo.day_of_week) + + +# ---------------------------------------------------------------------- +# From Business-Freq + +@cython.cdivision +cdef int64_t asfreq_BtoDT(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + ordinal += BDAY_OFFSET + ordinal = (((ordinal - 1) / 5) * 7 + mod_compat(ordinal - 1, 5) + + 1 - ORD_OFFSET) + + return upsample_daytime(ordinal, af_info, relation != 'S') + + +cdef int64_t asfreq_BtoA(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + return transform_via_day(ordinal, relation, af_info, asfreq_BtoDT, + asfreq_DTtoA) + + +cdef int64_t asfreq_BtoQ(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + return transform_via_day(ordinal, relation, af_info, asfreq_BtoDT, + asfreq_DTtoQ) + + +cdef int64_t asfreq_BtoM(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + return transform_via_day(ordinal, relation, af_info, asfreq_BtoDT, + asfreq_DTtoM) + + +cdef int64_t asfreq_BtoW(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + return transform_via_day(ordinal, relation, af_info, asfreq_BtoDT, + asfreq_DTtoW) + + +# ---------------------------------------------------------------------- +# From Daily + +cdef int64_t asfreq_DTtoA(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + cdef: + date_info dinfo + + ordinal = downsample_daytime(ordinal, af_info, 0) + if dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET, + GREGORIAN_CALENDAR): + return INT32_MIN + + if dinfo.month > af_info.to_a_year_end: + return (dinfo.year + 1 - BASE_YEAR) + else: + return (dinfo.year - BASE_YEAR) + + +cdef int64_t asfreq_DTtoQ(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + cdef: + int year, quarter + + ordinal = downsample_daytime(ordinal, af_info, 0) + + if DtoQ_yq(ordinal, af_info, &year, &quarter) == INT32_MIN: + return INT32_MIN + + return ((year - BASE_YEAR) * 4 + quarter - 1) + + +cdef int64_t asfreq_DTtoM(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + cdef: + date_info dinfo + + ordinal = downsample_daytime(ordinal, af_info, 0) + + if dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET, + GREGORIAN_CALENDAR): + return INT32_MIN + return ((dinfo.year - BASE_YEAR) * 12 + dinfo.month - 1) + + +@cython.cdivision +cdef int64_t asfreq_DTtoW(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + ordinal = downsample_daytime(ordinal, af_info, 0) + return ((ordinal + ORD_OFFSET - (1 + af_info.to_week_end)) / 7 + + 1 - WEEK_OFFSET) + + +cdef int64_t asfreq_DTtoB(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + cdef: + date_info dinfo + + ordinal = downsample_daytime(ordinal, af_info, 0) + + if dInfoCalc_SetFromAbsDate(&dinfo, ordinal + ORD_OFFSET, + GREGORIAN_CALENDAR): + return INT32_MIN + + if relation == 'S': + return DtoB_WeekendToFriday(dinfo.absdate, dinfo.day_of_week) + else: + return DtoB_WeekendToMonday(dinfo.absdate, dinfo.day_of_week) + + +# all intra day calculations are now done within one function +cdef int64_t asfreq_DownsampleWithinDay(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + return downsample_daytime(ordinal, af_info, relation == 'E') + + +cdef int64_t asfreq_UpsampleWithinDay(int64_t ordinal, char relation, + asfreq_info *af_info) nogil: + return upsample_daytime(ordinal, af_info, relation == 'E') + + +# ---------------------------------------------------------------------- +# Period Accessors + +cdef int pqyear(int64_t ordinal, int freq): + cdef: + int year, quarter + if _quarter_year(ordinal, freq, &year, &quarter) == INT32_MIN: + return INT32_MIN + return year + + +cdef int pquarter(int64_t ordinal, int freq): + cdef: + int year, quarter + if _quarter_year(ordinal, freq, &year, &quarter) == INT32_MIN: + return INT32_MIN + return quarter + + +cdef int pday_of_year(int64_t ordinal, int freq): + cdef: + date_info dinfo + + if get_date_info(ordinal, freq, &dinfo) == INT32_MIN: + return INT32_MIN + return dinfo.day_of_year + + +cdef int pweek(int64_t ordinal, int freq): + cdef: + date_info dinfo + + if get_date_info(ordinal, freq, &dinfo) == INT32_MIN: + return INT32_MIN + return _ISOWeek(&dinfo) + + +cdef int pweekday(int64_t ordinal, int freq): + cdef: + date_info dinfo + + if get_date_info(ordinal, freq, &dinfo) == INT32_MIN: + return INT32_MIN + return dinfo.day_of_week + + +cdef int pyear(int64_t ordinal, int freq): + cdef: + date_info dinfo + + get_date_info(ordinal, freq, &dinfo) + return dinfo.year + + +cdef int pmonth(int64_t ordinal, int freq): + cdef: + date_info dinfo + + if get_date_info(ordinal, freq, &dinfo) == INT32_MIN: + return INT32_MIN + return dinfo.month + + +cdef int pday(int64_t ordinal, int freq): + cdef: + date_info dinfo + + if get_date_info(ordinal, freq, &dinfo) == INT32_MIN: + return INT32_MIN + return dinfo.day + + +cdef int phour(int64_t ordinal, int freq): + cdef: + date_info dinfo + + if get_date_info(ordinal, freq, &dinfo) == INT32_MIN: + return INT32_MIN + return dinfo.hour + + +cdef int pminute(int64_t ordinal, int freq): + cdef: + date_info dinfo + + if get_date_info(ordinal, freq, &dinfo) == INT32_MIN: + return INT32_MIN + return dinfo.minute + + +cdef int psecond(int64_t ordinal, int freq): + cdef: + date_info dinfo + + if get_date_info(ordinal, freq, &dinfo) == INT32_MIN: + return INT32_MIN + return dinfo.second + + +@cython.boundscheck(False) +cdef int pdays_in_month(int64_t ordinal, int freq): + cdef: + date_info dinfo + int days + Py_ssize_t leap + + if get_date_info(ordinal, freq, &dinfo) == INT32_MIN: + return INT32_MIN + + leap = dInfoCalc_Leapyear(dinfo.year, dinfo.calendar) + days = days_in_month[leap][dinfo.month - 1] + return days + + +@cython.cdivision +cdef int _quarter_year(int64_t ordinal, int freq, int *year, int *quarter): + cdef: + asfreq_info af_info + int qtr_freq + + ordinal = get_python_ordinal(ordinal, freq) - ORD_OFFSET + + if get_freq_group(freq) == FR_QTR: + qtr_freq = freq + else: + qtr_freq = FR_QTR + + get_asfreq_info(FR_DAY, qtr_freq, &af_info) + + if DtoQ_yq(ordinal, &af_info, year, quarter) == INT32_MIN: + return INT32_MIN + + if (qtr_freq % 1000) > 12: + year[0] -= 1 + + return 0 diff --git a/pandas/_libs/tslibs/period_conversion.pxd b/pandas/_libs/tslibs/period_conversion.pxd new file mode 100644 index 0000000000000..ba328878a5840 --- /dev/null +++ b/pandas/_libs/tslibs/period_conversion.pxd @@ -0,0 +1,12 @@ +# -*- coding: utf-8 -*- + +from numpy cimport int64_t + +cdef int64_t[:, :] daytime_conversion_factor_matrix +cdef int64_t get_daytime_conversion_factor(int from_index, int to_index) nogil +cdef int max_value(int a, int b) nogil +cdef int min_value(int a, int b) nogil +cdef double get_abs_time(int freq, int64_t date_ordinal, int64_t ordinal) nogil + +cdef int get_freq_group_index(int freq) nogil +cdef int get_freq_group(int freq) nogil \ No newline at end of file diff --git a/pandas/_libs/tslibs/period_conversion.pyx b/pandas/_libs/tslibs/period_conversion.pyx new file mode 100644 index 0000000000000..1c70eb1d74ed1 --- /dev/null +++ b/pandas/_libs/tslibs/period_conversion.pyx @@ -0,0 +1,112 @@ +# -*- coding: utf-8 -*- +# flake8: noqa:E241,E501 +cimport cython + +import numpy as np +cimport numpy as cnp +from numpy cimport ndarray, int64_t +cnp.import_array() + + +cdef enum FREQS: + FR_ANN = 1000 # Annual + FR_QTR = 2000 # Quarterly - December year end (default quarterly) + FR_MTH = 3000 # Monthly + FR_WK = 4000 # Weekly + FR_BUS = 5000 # Business days + FR_DAY = 6000 # Daily + FR_HR = 7000 # Hourly + FR_MIN = 8000 # Minutely + FR_SEC = 9000 # Secondly + FR_MS = 10000 # Millisecondly + FR_US = 11000 # Microsecondly + FR_NS = 12000 # Nanosecondly + FR_UND = -10000 # Undefined + + +daytime_conversion_factors = [[FR_DAY, 1], [FR_HR, 24], + [FR_MIN, 60], [FR_SEC, 60], + [FR_MS, 1000], [FR_US, 1000], + [FR_NS, 1000], [0, 0]] + + +cdef int64_t[:, :] daytime_conversion_factor_matrix = np.array( + [[val for val in row] for row in + # Building on Windows seems to require super-explicit casting + [ + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 1, 24, 1440, 86400, 86400000, 86400000000, 86400000000000], + [0, 0, 0, 0, 0, 0, 0, 1, 60, 3600, 3600000, 3600000000, 3600000000000], + [0, 0, 0, 0, 0, 0, 0, 0, 1, 60, 60000, 60000000, 60000000000], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1000, 1000000, 1000000000], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1000, 1000000], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1000], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]]], + dtype=np.int64) # noqa + + + + +@cython.boundscheck(False) +cdef int64_t get_daytime_conversion_factor(int from_index, int to_index) nogil: + cdef: + Py_ssize_t row, col + + row = min_value(from_index, to_index) + col = max_value(from_index, to_index) + return daytime_conversion_factor_matrix[row][col] + + +cdef inline int max_value(int a, int b) nogil: + if a > b: + return a + return b + + +cdef inline int min_value(int a, int b) nogil: + if a < b: + return a + return b + + +@cython.cdivision +cdef double get_abs_time(int freq, int64_t date_ordinal, + int64_t ordinal) nogil: + cdef: + int freq_index, day_index, base_index + int64_t per_day, start_ord + double unit, result + + if freq <= FR_DAY: + return 0 + + freq_index = get_freq_group_index(freq) + day_index = get_freq_group_index(FR_DAY) + base_index = get_freq_group_index(FR_SEC) + + per_day = get_daytime_conversion_factor(day_index, freq_index) + unit = get_daytime_conversion_factor(freq_index, base_index) + + if base_index < freq_index: + unit = 1 / unit + + start_ord = date_ordinal * per_day + result = (unit * (ordinal - start_ord)) + return result + + +# ---------------------------------------------------------------------- +# Conventions + +@cython.cdivision +cdef inline int get_freq_group_index(int freq) nogil: + return freq / 1000 + +@cython.cdivision +cdef inline int get_freq_group(int freq) nogil: + return (freq / 1000) * 1000 diff --git a/pandas/_libs/tslibs/period_info.pxd b/pandas/_libs/tslibs/period_info.pxd new file mode 100644 index 0000000000000..5d40bda3c9815 --- /dev/null +++ b/pandas/_libs/tslibs/period_info.pxd @@ -0,0 +1,105 @@ +# -*- coding: utf-8 -*- + +import numpy as np +from numpy cimport int64_t + +from util cimport INT32_MIN + + +ctypedef struct date_info: + int64_t absdate + double abstime + double second + int minute + int hour + int day + int month + int quarter + int year + int day_of_week + int day_of_year + int calendar + + +cdef enum OFFSETS: + ORD_OFFSET = 719163LL # days until 1970-01-01 + BDAY_OFFSET = 513689LL # days until 1970-01-01 + WEEK_OFFSET = 102737LL + +cdef enum CALENDARS: + GREGORIAN_CALENDAR = 1 + JULIAN_CALENDAR = 2 + +cdef enum FREQS: + FR_ANN = 1000 # Annual + FR_ANNDEC = FR_ANN # Annual - December year end + FR_ANNJAN = 1001 # Annual - January year end + FR_ANNFEB = 1002 # Annual - February year end + FR_ANNMAR = 1003 # Annual - March year end + FR_ANNAPR = 1004 # Annual - April year end + FR_ANNMAY = 1005 # Annual - May year end + FR_ANNJUN = 1006 # Annual - June year end + FR_ANNJUL = 1007 # Annual - July year end + FR_ANNAUG = 1008 # Annual - August year end + FR_ANNSEP = 1009 # Annual - September year end + FR_ANNOCT = 1010 # Annual - October year end + FR_ANNNOV = 1011 # Annual - November year end + + # The standard quarterly frequencies with various fiscal year ends + # eg, Q42005 for Q@OCT runs Aug 1, 2005 to Oct 31, 2005 + FR_QTR = 2000 # Quarterly - December year end (default quarterly) + FR_QTRDEC = FR_QTR # Quarterly - December year end + FR_QTRJAN = 2001 # Quarterly - January year end + FR_QTRFEB = 2002 # Quarterly - February year end + FR_QTRMAR = 2003 # Quarterly - March year end + FR_QTRAPR = 2004 # Quarterly - April year end + FR_QTRMAY = 2005 # Quarterly - May year end + FR_QTRJUN = 2006 # Quarterly - June year end + FR_QTRJUL = 2007 # Quarterly - July year end + FR_QTRAUG = 2008 # Quarterly - August year end + FR_QTRSEP = 2009 # Quarterly - September year end + FR_QTROCT = 2010 # Quarterly - October year end + FR_QTRNOV = 2011 # Quarterly - November year end + + FR_MTH = 3000 # Monthly + + FR_WK = 4000 # Weekly + FR_WKSUN = FR_WK # Weekly - Sunday end of week + FR_WKMON = 4001 # Weekly - Monday end of week + FR_WKTUE = 4002 # Weekly - Tuesday end of week + FR_WKWED = 4003 # Weekly - Wednesday end of week + FR_WKTHU = 4004 # Weekly - Thursday end of week + FR_WKFRI = 4005 # Weekly - Friday end of week + FR_WKSAT = 4006 # Weekly - Saturday end of week + + FR_BUS = 5000 # Business days + FR_DAY = 6000 # Daily + FR_HR = 7000 # Hourly + FR_MIN = 8000 # Minutely + FR_SEC = 9000 # Secondly + FR_MS = 10000 # Millisecondly + FR_US = 11000 # Microsecondly + FR_NS = 12000 # Nanosecondly + + FR_UND = -10000 # Undefined + + +cdef int dInfoCalc_SetFromAbsDateTime(date_info *dinfo, + int64_t absdate, double abstime, + int calendar) nogil except -1 +cdef int dInfoCalc_SetFromAbsDate(date_info *dinfo, + int64_t absdate, int calendar) nogil +cdef int dInfoCalc_SetFromAbsTime(date_info *dinfo, double abstime) nogil + +cdef int64_t absdate_from_ymd(int y, int m, int d) nogil +cdef int monthToQuarter(int month) nogil + +cdef int dInfoCalc_YearOffset(int64_t year, int calendar) nogil except? -1 +cdef int dInfoCalc_DayOfWeek(int64_t absdate) nogil +cdef bint dInfoCalc_Leapyear(int64_t year, int calendar) nogil +cdef int _ISOWeek(date_info *dinfo) + +cdef int64_t get_period_ordinal(int year, int month, int day, + int hour, int minute, int second, + int microseconds, int picoseconds, + int freq) nogil except INT32_MIN diff --git a/pandas/_libs/tslibs/period_info.pyx b/pandas/_libs/tslibs/period_info.pyx new file mode 100644 index 0000000000000..ef90fea380546 --- /dev/null +++ b/pandas/_libs/tslibs/period_info.pyx @@ -0,0 +1,433 @@ +# -*- coding: utf-8 -*- +cimport cython + +import numpy as np +from numpy cimport int64_t + +from util cimport INT32_MIN +from libc.limits cimport INT_MAX + +from period_conversion cimport get_freq_group + +cdef double SECONDS_PER_DAY = 86400 +cdef int BASE_YEAR = 1970 +cdef int BASE_WEEK_TO_DAY_OFFSET = 1 # diff between day 0 and end of week +cdef int DAYS_PER_WEEK = 7 +cdef int BUSINESS_DAYS_PER_WEEK = 5 + +# Table of number of days in a month (0-based, without and with leap) +cdef int64_t[:, :] days_in_month = np.array( + # Windows builds seem to require super-explicit casting + [[val for val in row] for row in + [[31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], + [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]]], + dtype=np.int64) + +# Table with day offsets for each month (0-based, without and with leap) +cdef int64_t[:, :] month_offset = np.array( + [[val for val in row] for row in + [[0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365], + [0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366]]], + dtype=np.int64) + + +# ---------------------------------------------------------------------- +# ccalendar-like functions + +@cython.cdivision +cdef inline int monthToQuarter(int month) nogil: + return ((month - 1) / 3) + 1 + + +@cython.cdivision +cdef int dInfoCalc_YearOffset(int64_t year, int calendar) nogil except? -1: + """ + Return the year offset, that is the absolute date of the day + 31.12.(year-1) in the given calendar. + + Note: + For the Julian calendar we shift the absdate (which is measured + using the Gregorian Epoch) value by two days because the Epoch + (0001-01-01) in the Julian calendar lies 2 days before the Epoch in + the Gregorian calendar. + """ + year -= 1 + if calendar == GREGORIAN_CALENDAR: + if (year >= 0 or -1 / 4 == -1): # TODO: DOES THIS CONDITION MAKE SENSE + return year * 365 + year / 4 - year / 100 + year / 400 + else: + return (year * 365 + (year - 3) / 4 - + (year - 99) / 100 + (year - 399) / 400) + elif calendar == JULIAN_CALENDAR: + if (year >= 0 or -1 / 4 == -1): # TODO: DOES THIS CONDITION MAKE SENSE + return year * 365 + year / 4 - 2 + else: + return year * 365 + (year - 3) / 4 - 2 + else: + return -1 + # raise ValueError("unknown calendar") + + +@cython.cdivision +cdef int dInfoCalc_DayOfWeek(int64_t absdate) nogil: + """Return the day of the week for the given absolute date""" + cdef: + int day_of_week + + if absdate >= 1: + day_of_week = (absdate - 1) % 7 + else: + day_of_week = 6 - ((-absdate) % 7) + return day_of_week + + +@cython.cdivision +cdef bint dInfoCalc_Leapyear(int64_t year, int calendar) nogil: + """ Return 1/0 iff year points to a leap year in calendar.""" + if calendar == GREGORIAN_CALENDAR: + return (year % 4 == 0) and ((year % 100 != 0) or (year % 400 == 0)) + else: + return (year % 4 == 0) + + +# ---------------------------------------------------------------------- + +@cython.cdivision +cdef int _ISOWeek(date_info *dinfo): + cdef: + int week + + # Estimate + week = (dinfo.day_of_year - 1) - dinfo.day_of_week + 3 + if week >= 0: + week = week / 7 + 1 + + # Verify + if week < 0: + # The day lies in last week of the previous year + if (week > -2) or (week == -2 and + dInfoCalc_Leapyear(dinfo.year - 1, dinfo.calendar)): + week = 53 + else: + week = 52 + elif week == 53: + # Check if the week belongs to year or year+1 + if (31 - dinfo.day + dinfo.day_of_week < 3): + week = 1 + + return week + + +cdef int dInfoCalc_SetFromAbsDateTime(date_info *dinfo, + int64_t absdate, double abstime, + int calendar) nogil except -1: + """ + Set the instance's value using the given date and time. calendar + may be set to the flags: GREGORIAN_CALENDAR, JULIAN_CALENDAR to + indicate the calendar to be used. + """ + # Bounds check + if not (abstime >= 0.0 and abstime <= SECONDS_PER_DAY): + return -1 + # Py_AssertWithArg(abstime >= 0.0 and abstime <= SECONDS_PER_DAY, + # PyExc_ValueError, + # "abstime out of range (0.0 - 86400.0): %f", abstime); + + # Calculate the date + if dInfoCalc_SetFromAbsDate(dinfo, absdate, calendar): + return -1 + + # Calculate the time + if dInfoCalc_SetFromAbsTime(dinfo, abstime): + return -1 + + return 0 + + +@cython.cdivision +cdef int dInfoCalc_SetFromAbsTime(date_info *dinfo, double abstime) nogil: + """Sets the time part of the DateTime object.""" + cdef: + int inttime + int hour, minute + double second + + inttime = abstime + hour = inttime / 3600 + minute = (inttime % 3600) / 60 + second = abstime - (hour * 3600 + minute * 60) + + dinfo.hour = hour + dinfo.minute = minute + dinfo.second = second + + dinfo.abstime = abstime + + return 0 + + +@cython.boundscheck(False) +@cython.cdivision +cdef int dInfoCalc_SetFromAbsDate(date_info *dinfo, + int64_t absdate, int calendar) nogil: + """ + Sets the date part of the date_info struct using the indicated + calendar. + + XXX This could also be done using some integer arithmetics rather + than with this iterative approach... + """ + cdef: + int64_t year + int64_t yearoffset + int leap, dayoffset, month + int64_t[:] monthoffset + + # Approximate year + if calendar == GREGORIAN_CALENDAR: + year = ((absdate) / 365.2425) + elif calendar == JULIAN_CALENDAR: + year = ((absdate) / 365.25) + # else: + # Py_Error(PyExc_ValueError, "unknown calendar") + + if absdate > 0: + year += 1 + + # Apply corrections to reach the correct year + while True: + # Calculate the year offset + yearoffset = dInfoCalc_YearOffset(year, calendar) + if yearoffset == INT32_MIN: + return INT32_MIN + + # Backward correction: absdate must be greater than the yearoffset + if yearoffset >= absdate: + year -= 1 + continue + + dayoffset = absdate - yearoffset + leap = dInfoCalc_Leapyear(year, calendar) + + # Forward correction: non leap years only have 365 days + if dayoffset > 365 and not leap: + year += 1 + continue + + break + + dinfo.year = year + dinfo.calendar = calendar + + # Now iterate to find the month + monthoffset = month_offset[leap] + + for month in range(1, 13): + if monthoffset[month] >= dayoffset: + break + + dinfo.month = month + dinfo.quarter = monthToQuarter(month) + dinfo.day = dayoffset - month_offset[leap][month - 1] + + dinfo.day_of_week = dInfoCalc_DayOfWeek(absdate) + dinfo.day_of_year = dayoffset + dinfo.absdate = absdate + + return 0 + + +@cython.boundscheck(False) +@cython.cdivision +cdef int dInfoCalc_SetFromDateAndTime(date_info *dinfo, int year, + int month, int day, int hour, + int minute, double second, + int calendar) nogil: + """ + Set the instance's value using the given date and time. calendar may be set + to the flags: GREGORIAN_CALENDAR, JULIAN_CALENDAR to indicate the calendar + to be used. */ + """ + # Calculate the absolute date + cdef: + bint leap + int64_t absdate + int yearoffset + + # Range check + if not year > -(INT_MAX / 366) and year < (INT_MAX / 366): + return 1 + # raise ValueError("year out of range: %i" % year) + + # Is it a leap year? + leap = dInfoCalc_Leapyear(year, calendar) + + # Negative month values indicate months relative to the years end + if month < 0: + month += 13 + + if not (month >= 1 and month <= 12): + return 1 + # raise ValueError("month out of range (1-12): %i" % month) + + # Negative values indicate days relative to the months end + if day < 0: + day += days_in_month[leap][month - 1] + 1 + + if not (day >= 1 and day <= days_in_month[leap][month - 1]): + return 1 + # raise ValueError("day out of range: %i" % day) + + yearoffset = dInfoCalc_YearOffset(year, calendar) + if yearoffset == INT32_MIN: + return INT32_MIN + + absdate = day + month_offset[leap][month - 1] + yearoffset + + dinfo.absdate = absdate + + dinfo.year = year; + dinfo.month = month + dinfo.quarter = ((month - 1) / 3) + 1 + dinfo.day = day + + dinfo.day_of_week = dInfoCalc_DayOfWeek(absdate) + dinfo.day_of_year = (absdate - yearoffset) + + dinfo.calendar = calendar + + # Calculate the absolute time + if not (hour >= 0 and hour <= 23): + return 1 + # raise ValueError("hour out of range (0-23): %i" % hour) + if not (minute >= 0 and minute <= 59): + return 1 + # raise ValueError("minute out of range (0-59): %i" % minute) + if not (second >= 0.0 and + (second < 60.0 or + (hour == 23 and minute == 59 and second < 61.0))): + return 1 + # raise ValueError("second out of range (0.0 - <60.0; <61.0 for " + # "23:59): %f" % second) + + dinfo.abstime = (hour * 3600 + minute * 60) + second + + dinfo.hour = hour + dinfo.minute = minute + dinfo.second = second + return 0 + + +cdef int64_t absdate_from_ymd(int y, int m, int d) nogil: + cdef: + date_info tempDate + + if dInfoCalc_SetFromDateAndTime(&tempDate, y, m, d, 0, 0, 0, + GREGORIAN_CALENDAR): + return INT32_MIN + + return tempDate.absdate + + +@cython.cdivision +cdef int64_t get_period_ordinal(int year, int month, int day, + int hour, int minute, int second, + int microseconds, int picoseconds, + int freq) nogil except INT32_MIN: + """generate an ordinal in period space""" + cdef: + int64_t absdays, delta, seconds + int64_t weeks, days + int64_t ordinal, day_adj + int freq_group, fmonth, mdiff + + freq_group = get_freq_group(freq) + + if freq == FR_SEC or freq == FR_MS or freq == FR_US or freq == FR_NS: + absdays = absdate_from_ymd(year, month, day) + delta = absdays - ORD_OFFSET + seconds = (delta * 86400 + hour * 3600 + minute * 60 + second) + + if freq == FR_MS: + return seconds * 1000 + microseconds / 1000 + + elif freq == FR_US: + return seconds * 1000000 + microseconds + + elif freq == FR_NS: + return (seconds * 1000000000 + + microseconds * 1000 + picoseconds / 1000) + + return seconds + + if freq == FR_MIN: + absdays = absdate_from_ymd(year, month, day) + delta = absdays - ORD_OFFSET + return (delta * 1440 + hour * 60 + minute) + + if freq == FR_HR: + absdays = absdate_from_ymd(year, month, day) + if absdays == INT32_MIN: + return INT32_MIN + + delta = (absdays - ORD_OFFSET) + return (delta * 24 + hour) + + if freq == FR_DAY: + return (absdate_from_ymd(year, month, day) - ORD_OFFSET) + + if freq == FR_UND: + return (absdate_from_ymd(year, month, day) - ORD_OFFSET) + + if freq == FR_BUS: + days = absdate_from_ymd(year, month, day) + if days == INT32_MIN: + return INT32_MIN + + # calculate the current week assuming sunday as last day of a week + weeks = (days - BASE_WEEK_TO_DAY_OFFSET) / DAYS_PER_WEEK + # calculate the current weekday (in range 1 .. 7) + delta = (days - BASE_WEEK_TO_DAY_OFFSET) % DAYS_PER_WEEK + 1 + # return the number of business days in full weeks plus the business + # days in the last - possible partial - week + if delta <= BUSINESS_DAYS_PER_WEEK: + return ((weeks * BUSINESS_DAYS_PER_WEEK) + + delta - BDAY_OFFSET) + else: + return ((weeks * BUSINESS_DAYS_PER_WEEK) + + BUSINESS_DAYS_PER_WEEK + 1 - BDAY_OFFSET) + + if freq_group == FR_WK: + ordinal = absdate_from_ymd(year, month, day) + if ordinal == INT32_MIN: + return INT32_MIN + + day_adj = freq - FR_WK + return (ordinal - (1 + day_adj)) / 7 + 1 - WEEK_OFFSET + + if freq == FR_MTH: + return (year - BASE_YEAR) * 12 + month - 1 + + if freq_group == FR_QTR: + fmonth = freq - FR_QTR + if fmonth == 0: + fmonth = 12 + + mdiff = month - fmonth + if mdiff < 0: + mdiff += 12 + if month >= fmonth: + mdiff += 12 + + return (year - BASE_YEAR) * 4 + (mdiff - 1) / 3 + + if freq_group == FR_ANN: + fmonth = freq - FR_ANN + if fmonth == 0: + fmonth = 12 + if month <= fmonth: + return year - BASE_YEAR + else: + return year - BASE_YEAR + 1 + + # Py_Error(PyExc_RuntimeError, "Unable to generate frequency ordinal") diff --git a/setup.py b/setup.py index 27943a776c414..305b26e3e6351 100755 --- a/setup.py +++ b/setup.py @@ -233,14 +233,12 @@ def initialize_options(self): base = pjoin('pandas', '_libs', 'src') dt = pjoin(base, 'datetime') - src = base util = pjoin('pandas', 'util') parser = pjoin(base, 'parser') ujson_python = pjoin(base, 'ujson', 'python') ujson_lib = pjoin(base, 'ujson', 'lib') self._clean_exclude = [pjoin(dt, 'np_datetime.c'), pjoin(dt, 'np_datetime_strings.c'), - pjoin(src, 'period_helper.c'), pjoin(parser, 'tokenizer.c'), pjoin(parser, 'io.c'), pjoin(ujson_python, 'ujson.c'), @@ -317,6 +315,9 @@ class CheckSDist(sdist_class): 'pandas/_libs/parsers.pyx', 'pandas/_libs/tslibs/ccalendar.pyx', 'pandas/_libs/tslibs/period.pyx', + 'pandas/_libs/tslibs/period_asfreq.pyx', + 'pandas/_libs/tslibs/period_info.pyx', + 'pandas/_libs/tslibs/period_conversion.pyx', 'pandas/_libs/tslibs/strptime.pyx', 'pandas/_libs/tslibs/np_datetime.pyx', 'pandas/_libs/tslibs/timedeltas.pyx', @@ -516,9 +517,23 @@ def pxd(name): '_libs/missing', '_libs/tslibs/timedeltas', '_libs/tslibs/timezones', - '_libs/tslibs/nattype'], - 'depends': tseries_depends + ['pandas/_libs/src/period_helper.h'], - 'sources': np_datetime_sources + ['pandas/_libs/src/period_helper.c']}, + '_libs/tslibs/nattype', + '_libs/tslibs/period_asfreq', + '_libs/tslibs/period_info', + '_libs/tslibs/period_conversion'], + 'depends': tseries_depends, + 'sources': np_datetime_sources}, + '_libs.tslibs.period_asfreq': { + 'pyxfile': '_libs/tslibs/period_asfreq', + 'pxdfiles': ['_libs/tslibs/period_conversion', + '_libs/tslibs/period_info']}, + '_libs.tslibs.period_info': { + 'pyxfile': '_libs/tslibs/period_info', + 'pxdfiles': ['_libs/src/util', + '_libs/tslibs/period_conversion']}, + '_libs.tslibs.period_conversion': { + 'pyxfile': '_libs/tslibs/period_conversion', + 'pxdfiles': ['_libs/src/util']}, '_libs.properties': { 'pyxfile': '_libs/properties', 'include': []},