diff --git a/pandas/src/datetime.pxd b/pandas/src/datetime.pxd index 13fc7768c4b7f..09073dcd4d3b3 100644 --- a/pandas/src/datetime.pxd +++ b/pandas/src/datetime.pxd @@ -1,4 +1,4 @@ -from numpy cimport int64_t, int32_t, npy_int64, npy_int32 +from numpy cimport int64_t, int32_t, npy_int64, npy_int32, ndarray from cpython cimport PyObject from cpython cimport PyUnicode_Check, PyUnicode_AsASCIIString diff --git a/pandas/src/datetime.pyx b/pandas/src/datetime.pyx index 44660cd3bb682..ed956149280d7 100644 --- a/pandas/src/datetime.pyx +++ b/pandas/src/datetime.pyx @@ -5,6 +5,7 @@ import numpy as np from numpy cimport int32_t, int64_t, import_array, ndarray from cpython cimport * +from libc.stdlib cimport free # this is our datetime.pxd from datetime cimport * from util cimport is_integer_object, is_datetime64_object @@ -1603,3 +1604,401 @@ cpdef normalize_date(object dt): return datetime(dt.year, dt.month, dt.day) else: raise TypeError('Unrecognized type: %s' % type(dt)) + +cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps, + int freq, object tz): + cdef: + Py_ssize_t n = len(stamps) + ndarray[int64_t] result = np.empty(n, dtype=np.int64) + ndarray[int64_t] trans, deltas, pos + pandas_datetimestruct dts + + if not have_pytz: + raise Exception('Could not find pytz module') + + if _is_utc(tz): + for i in range(n): + if stamps[i] == NPY_NAT: + result[i] = NPY_NAT + continue + pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts) + result[i] = get_period_ordinal(dts.year, dts.month, dts.day, + dts.hour, dts.min, dts.sec, freq) + + elif _is_tzlocal(tz): + for i in range(n): + if stamps[i] == NPY_NAT: + result[i] = NPY_NAT + continue + pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, + &dts) + dt = datetime(dts.year, dts.month, dts.day, dts.hour, + dts.min, dts.sec, dts.us, tz) + delta = int(total_seconds(_get_utcoffset(tz, dt))) * 1000000000 + pandas_datetime_to_datetimestruct(stamps[i] + delta, + PANDAS_FR_ns, &dts) + result[i] = get_period_ordinal(dts.year, dts.month, dts.day, + dts.hour, dts.min, dts.sec, freq) + else: + # Adjust datetime64 timestamp, recompute datetimestruct + trans = _get_transitions(tz) + deltas = _get_deltas(tz) + _pos = trans.searchsorted(stamps, side='right') - 1 + if _pos.dtype != np.int64: + _pos = _pos.astype(np.int64) + pos = _pos + + # statictzinfo + if not hasattr(tz, '_transition_info'): + for i in range(n): + if stamps[i] == NPY_NAT: + result[i] = NPY_NAT + continue + pandas_datetime_to_datetimestruct(stamps[i] + deltas[0], + PANDAS_FR_ns, &dts) + result[i] = get_period_ordinal(dts.year, dts.month, dts.day, + dts.hour, dts.min, dts.sec, freq) + else: + for i in range(n): + if stamps[i] == NPY_NAT: + result[i] = NPY_NAT + continue + pandas_datetime_to_datetimestruct(stamps[i] + deltas[pos[i]], + PANDAS_FR_ns, &dts) + result[i] = get_period_ordinal(dts.year, dts.month, dts.day, + dts.hour, dts.min, dts.sec, freq) + + return result + + +cdef extern from "period.h": + ctypedef struct date_info: + int64_t absdate + double abstime + double second + int minute + int hour + int day + int month + int quarter + int year + int day_of_week + int day_of_year + int calendar + + ctypedef struct asfreq_info: + int from_week_end + int to_week_end + + int from_a_year_end + int to_a_year_end + + int from_q_year_end + int to_q_year_end + + ctypedef int64_t (*freq_conv_func)(int64_t, char, asfreq_info*) + + int64_t asfreq(int64_t dtordinal, int freq1, int freq2, char relation) except INT32_MIN + freq_conv_func get_asfreq_func(int fromFreq, int toFreq) + void get_asfreq_info(int fromFreq, int toFreq, asfreq_info *af_info) + + int64_t get_period_ordinal(int year, int month, int day, + int hour, int minute, int second, + int freq) except INT32_MIN + + int64_t get_python_ordinal(int64_t period_ordinal, int freq) except INT32_MIN + + int get_date_info(int64_t ordinal, int freq, date_info *dinfo) except INT32_MIN + double getAbsTime(int, int64_t, int64_t) + + int pyear(int64_t ordinal, int freq) except INT32_MIN + int pqyear(int64_t ordinal, int freq) except INT32_MIN + int pquarter(int64_t ordinal, int freq) except INT32_MIN + int pmonth(int64_t ordinal, int freq) except INT32_MIN + int pday(int64_t ordinal, int freq) except INT32_MIN + int pweekday(int64_t ordinal, int freq) except INT32_MIN + int pday_of_week(int64_t ordinal, int freq) except INT32_MIN + int pday_of_year(int64_t ordinal, int freq) except INT32_MIN + int pweek(int64_t ordinal, int freq) except INT32_MIN + int phour(int64_t ordinal, int freq) except INT32_MIN + int pminute(int64_t ordinal, int freq) except INT32_MIN + int psecond(int64_t ordinal, int freq) except INT32_MIN + char *c_strftime(date_info *dinfo, char *fmt) + int get_yq(int64_t ordinal, int freq, int *quarter, int *year) + +# Period logic +#---------------------------------------------------------------------- + +cdef inline int64_t apply_mult(int64_t period_ord, int64_t mult): + """ + Get freq+multiple ordinal value from corresponding freq-only ordinal value. + For example, 5min ordinal will be 1/5th the 1min ordinal (rounding down to + integer). + """ + if mult == 1: + return period_ord + + return (period_ord - 1) // mult + +cdef inline int64_t remove_mult(int64_t period_ord_w_mult, int64_t mult): + """ + Get freq-only ordinal value from corresponding freq+multiple ordinal. + """ + if mult == 1: + return period_ord_w_mult + + return period_ord_w_mult * mult + 1; + +def dt64arr_to_periodarr(ndarray[int64_t] dtarr, int freq, tz=None): + """ + Convert array of datetime64 values (passed in as 'i8' dtype) to a set of + periods corresponding to desired frequency, per period convention. + """ + cdef: + ndarray[int64_t] out + Py_ssize_t i, l + pandas_datetimestruct dts + + l = len(dtarr) + + out = np.empty(l, dtype='i8') + + if tz is None: + for i in range(l): + pandas_datetime_to_datetimestruct(dtarr[i], PANDAS_FR_ns, &dts) + out[i] = get_period_ordinal(dts.year, dts.month, dts.day, + dts.hour, dts.min, dts.sec, freq) + else: + out = localize_dt64arr_to_period(dtarr, freq, tz) + return out + +def periodarr_to_dt64arr(ndarray[int64_t] periodarr, int freq): + """ + Convert array to datetime64 values from a set of ordinals corresponding to + periods per period convention. + """ + cdef: + ndarray[int64_t] out + Py_ssize_t i, l + + l = len(periodarr) + + out = np.empty(l, dtype='i8') + + for i in range(l): + out[i] = period_ordinal_to_dt64(periodarr[i], freq) + + return out + +cdef char START = 'S' +cdef char END = 'E' + +cpdef int64_t period_asfreq(int64_t period_ordinal, int freq1, int freq2, + bint end): + """ + Convert period ordinal from one frequency to another, and if upsampling, + choose to use start ('S') or end ('E') of period. + """ + cdef: + int64_t retval + + if end: + retval = asfreq(period_ordinal, freq1, freq2, END) + else: + retval = asfreq(period_ordinal, freq1, freq2, START) + + if retval == INT32_MIN: + raise ValueError('Frequency conversion failed') + + return retval + +def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int freq2, bint end): + """ + Convert int64-array of period ordinals from one frequency to another, and + if upsampling, choose to use start ('S') or end ('E') of period. + """ + cdef: + ndarray[int64_t] result + Py_ssize_t i, n + freq_conv_func func + asfreq_info finfo + int64_t val, ordinal + char relation + + n = len(arr) + result = np.empty(n, dtype=np.int64) + + func = get_asfreq_func(freq1, freq2) + get_asfreq_info(freq1, freq2, &finfo) + + if end: + relation = END + else: + relation = START + + for i in range(n): + val = func(arr[i], relation, &finfo) + if val == INT32_MIN: + raise ValueError("Unable to convert to desired frequency.") + result[i] = val + + return result + +def period_ordinal(int y, int m, int d, int h, int min, int s, int freq): + cdef: + int64_t ordinal + + return get_period_ordinal(y, m, d, h, min, s, freq) + + +cpdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq): + cdef: + pandas_datetimestruct dts + date_info dinfo + + get_date_info(ordinal, freq, &dinfo) + + dts.year = dinfo.year + dts.month = dinfo.month + dts.day = dinfo.day + dts.hour = dinfo.hour + dts.min = dinfo.minute + dts.sec = int(dinfo.second) + dts.us = dts.ps = 0 + + return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts) + +def period_format(int64_t value, int freq, object fmt=None): + cdef: + int freq_group + + if fmt is None: + freq_group = (freq // 1000) * 1000 + if freq_group == 1000: # FR_ANN + fmt = b'%Y' + elif freq_group == 2000: # FR_QTR + fmt = b'%FQ%q' + elif freq_group == 3000: # FR_MTH + fmt = b'%Y-%m' + elif freq_group == 4000: # WK + left = period_asfreq(value, freq, 6000, 0) + right = period_asfreq(value, freq, 6000, 1) + return '%s/%s' % (period_format(left, 6000), + period_format(right, 6000)) + elif (freq_group == 5000 # BUS + or freq_group == 6000): # DAY + fmt = b'%Y-%m-%d' + elif freq_group == 7000: # HR + fmt = b'%Y-%m-%d %H:00' + elif freq_group == 8000: # MIN + fmt = b'%Y-%m-%d %H:%M' + elif freq_group == 9000: # SEC + fmt = b'%Y-%m-%d %H:%M:%S' + else: + raise ValueError('Unknown freq: %d' % freq) + + return _period_strftime(value, freq, fmt) + + +cdef list extra_fmts = [(b"%q", b"^`AB`^"), + (b"%f", b"^`CD`^"), + (b"%F", b"^`EF`^")] + +cdef list str_extra_fmts = ["^`AB`^", "^`CD`^", "^`EF`^"] + +cdef _period_strftime(int64_t value, int freq, object fmt): + cdef: + Py_ssize_t i + date_info dinfo + char *formatted + object pat, repl, result + list found_pat = [False] * len(extra_fmts) + int year, quarter + + if PyUnicode_Check(fmt): + fmt = fmt.encode('utf-8') + + get_date_info(value, freq, &dinfo) + for i in range(len(extra_fmts)): + pat = extra_fmts[i][0] + repl = extra_fmts[i][1] + if pat in fmt: + fmt = fmt.replace(pat, repl) + found_pat[i] = True + + formatted = c_strftime(&dinfo, fmt) + + result = util.char_to_string(formatted) + free(formatted) + + for i in range(len(extra_fmts)): + if found_pat[i]: + if get_yq(value, freq, &quarter, &year) < 0: + raise ValueError('Unable to get quarter and year') + + if i == 0: + repl = '%d' % quarter + elif i == 1: # %f, 2-digit year + repl = '%.2d' % (year % 100) + elif i == 2: + repl = '%d' % year + + result = result.replace(str_extra_fmts[i], repl) + + # Py3? + if not PyString_Check(result): + result = str(result) + + return result + +# period accessors + +ctypedef int (*accessor)(int64_t ordinal, int freq) except INT32_MIN + +def get_period_field(int code, int64_t value, int freq): + cdef accessor f = _get_accessor_func(code) + return f(value, freq) + +def get_period_field_arr(int code, ndarray[int64_t] arr, int freq): + cdef: + Py_ssize_t i, sz + ndarray[int64_t] out + accessor f + + f = _get_accessor_func(code) + + sz = len(arr) + out = np.empty(sz, dtype=np.int64) + + for i in range(sz): + out[i] = f(arr[i], freq) + + return out + + + +cdef accessor _get_accessor_func(int code): + if code == 0: + return &pyear + elif code == 1: + return &pqyear + elif code == 2: + return &pquarter + elif code == 3: + return &pmonth + elif code == 4: + return &pday + elif code == 5: + return &phour + elif code == 6: + return &pminute + elif code == 7: + return &psecond + elif code == 8: + return &pweek + elif code == 9: + return &pday_of_year + elif code == 10: + return &pweekday + else: + raise ValueError('Unrecognized code: %s' % code) diff --git a/pandas/src/plib.pyx b/pandas/src/plib.pyx deleted file mode 100644 index a6fe1b034bb5f..0000000000000 --- a/pandas/src/plib.pyx +++ /dev/null @@ -1,356 +0,0 @@ -# cython: profile=False - -cimport numpy as np -import numpy as np - -from numpy cimport int32_t, int64_t, import_array, ndarray -from cpython cimport * - -from libc.stdlib cimport free - -# this is our datetime.pxd -from datetime cimport * -from util cimport is_integer_object, is_datetime64_object - -from datetime import timedelta -from dateutil.parser import parse as parse_date -cimport util - -import cython - -# initialize numpy -import_array() - -# import datetime C API -PyDateTime_IMPORT - - -cdef extern from "period.h": - ctypedef struct date_info: - int64_t absdate - double abstime - double second - int minute - int hour - int day - int month - int quarter - int year - int day_of_week - int day_of_year - int calendar - - ctypedef struct asfreq_info: - int from_week_end - int to_week_end - - int from_a_year_end - int to_a_year_end - - int from_q_year_end - int to_q_year_end - - ctypedef int64_t (*freq_conv_func)(int64_t, char, asfreq_info*) - - int64_t asfreq(int64_t dtordinal, int freq1, int freq2, char relation) except INT32_MIN - freq_conv_func get_asfreq_func(int fromFreq, int toFreq) - void get_asfreq_info(int fromFreq, int toFreq, asfreq_info *af_info) - - int64_t get_period_ordinal(int year, int month, int day, - int hour, int minute, int second, - int freq) except INT32_MIN - - int64_t get_python_ordinal(int64_t period_ordinal, int freq) except INT32_MIN - - int get_date_info(int64_t ordinal, int freq, date_info *dinfo) except INT32_MIN - double getAbsTime(int, int64_t, int64_t) - - int pyear(int64_t ordinal, int freq) except INT32_MIN - int pqyear(int64_t ordinal, int freq) except INT32_MIN - int pquarter(int64_t ordinal, int freq) except INT32_MIN - int pmonth(int64_t ordinal, int freq) except INT32_MIN - int pday(int64_t ordinal, int freq) except INT32_MIN - int pweekday(int64_t ordinal, int freq) except INT32_MIN - int pday_of_week(int64_t ordinal, int freq) except INT32_MIN - int pday_of_year(int64_t ordinal, int freq) except INT32_MIN - int pweek(int64_t ordinal, int freq) except INT32_MIN - int phour(int64_t ordinal, int freq) except INT32_MIN - int pminute(int64_t ordinal, int freq) except INT32_MIN - int psecond(int64_t ordinal, int freq) except INT32_MIN - char *c_strftime(date_info *dinfo, char *fmt) - int get_yq(int64_t ordinal, int freq, int *quarter, int *year) - -# Period logic -#---------------------------------------------------------------------- - -cdef inline int64_t apply_mult(int64_t period_ord, int64_t mult): - """ - Get freq+multiple ordinal value from corresponding freq-only ordinal value. - For example, 5min ordinal will be 1/5th the 1min ordinal (rounding down to - integer). - """ - if mult == 1: - return period_ord - - return (period_ord - 1) // mult - -cdef inline int64_t remove_mult(int64_t period_ord_w_mult, int64_t mult): - """ - Get freq-only ordinal value from corresponding freq+multiple ordinal. - """ - if mult == 1: - return period_ord_w_mult - - return period_ord_w_mult * mult + 1; - -def dt64arr_to_periodarr(ndarray[int64_t] dtarr, int freq): - """ - Convert array of datetime64 values (passed in as 'i8' dtype) to a set of - periods corresponding to desired frequency, per period convention. - """ - cdef: - ndarray[int64_t] out - Py_ssize_t i, l - pandas_datetimestruct dts - - l = len(dtarr) - - out = np.empty(l, dtype='i8') - - for i in range(l): - pandas_datetime_to_datetimestruct(dtarr[i], PANDAS_FR_ns, &dts) - out[i] = get_period_ordinal(dts.year, dts.month, dts.day, - dts.hour, dts.min, dts.sec, freq) - return out - -def periodarr_to_dt64arr(ndarray[int64_t] periodarr, int freq): - """ - Convert array to datetime64 values from a set of ordinals corresponding to - periods per period convention. - """ - cdef: - ndarray[int64_t] out - Py_ssize_t i, l - - l = len(periodarr) - - out = np.empty(l, dtype='i8') - - for i in range(l): - out[i] = period_ordinal_to_dt64(periodarr[i], freq) - - return out - -cdef char START = 'S' -cdef char END = 'E' - -cpdef int64_t period_asfreq(int64_t period_ordinal, int freq1, int freq2, - bint end): - """ - Convert period ordinal from one frequency to another, and if upsampling, - choose to use start ('S') or end ('E') of period. - """ - cdef: - int64_t retval - - if end: - retval = asfreq(period_ordinal, freq1, freq2, END) - else: - retval = asfreq(period_ordinal, freq1, freq2, START) - - if retval == INT32_MIN: - raise ValueError('Frequency conversion failed') - - return retval - -def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int freq2, bint end): - """ - Convert int64-array of period ordinals from one frequency to another, and - if upsampling, choose to use start ('S') or end ('E') of period. - """ - cdef: - ndarray[int64_t] result - Py_ssize_t i, n - freq_conv_func func - asfreq_info finfo - int64_t val, ordinal - char relation - - n = len(arr) - result = np.empty(n, dtype=np.int64) - - func = get_asfreq_func(freq1, freq2) - get_asfreq_info(freq1, freq2, &finfo) - - if end: - relation = END - else: - relation = START - - for i in range(n): - val = func(arr[i], relation, &finfo) - if val == INT32_MIN: - raise ValueError("Unable to convert to desired frequency.") - result[i] = val - - return result - -def period_ordinal(int y, int m, int d, int h, int min, int s, int freq): - cdef: - int64_t ordinal - - return get_period_ordinal(y, m, d, h, min, s, freq) - - -cpdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq): - cdef: - pandas_datetimestruct dts - date_info dinfo - - get_date_info(ordinal, freq, &dinfo) - - dts.year = dinfo.year - dts.month = dinfo.month - dts.day = dinfo.day - dts.hour = dinfo.hour - dts.min = dinfo.minute - dts.sec = int(dinfo.second) - dts.us = dts.ps = 0 - - return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts) - -def period_format(int64_t value, int freq, object fmt=None): - cdef: - int freq_group - - if fmt is None: - freq_group = (freq // 1000) * 1000 - if freq_group == 1000: # FR_ANN - fmt = b'%Y' - elif freq_group == 2000: # FR_QTR - fmt = b'%FQ%q' - elif freq_group == 3000: # FR_MTH - fmt = b'%Y-%m' - elif freq_group == 4000: # WK - left = period_asfreq(value, freq, 6000, 0) - right = period_asfreq(value, freq, 6000, 1) - return '%s/%s' % (period_format(left, 6000), - period_format(right, 6000)) - elif (freq_group == 5000 # BUS - or freq_group == 6000): # DAY - fmt = b'%Y-%m-%d' - elif freq_group == 7000: # HR - fmt = b'%Y-%m-%d %H:00' - elif freq_group == 8000: # MIN - fmt = b'%Y-%m-%d %H:%M' - elif freq_group == 9000: # SEC - fmt = b'%Y-%m-%d %H:%M:%S' - else: - raise ValueError('Unknown freq: %d' % freq) - - return _period_strftime(value, freq, fmt) - - -cdef list extra_fmts = [(b"%q", b"^`AB`^"), - (b"%f", b"^`CD`^"), - (b"%F", b"^`EF`^")] - -cdef list str_extra_fmts = ["^`AB`^", "^`CD`^", "^`EF`^"] - -cdef _period_strftime(int64_t value, int freq, object fmt): - cdef: - Py_ssize_t i - date_info dinfo - char *formatted - object pat, repl, result - list found_pat = [False] * len(extra_fmts) - int year, quarter - - if PyUnicode_Check(fmt): - fmt = fmt.encode('utf-8') - - get_date_info(value, freq, &dinfo) - for i in range(len(extra_fmts)): - pat = extra_fmts[i][0] - repl = extra_fmts[i][1] - if pat in fmt: - fmt = fmt.replace(pat, repl) - found_pat[i] = True - - formatted = c_strftime(&dinfo, fmt) - - result = util.char_to_string(formatted) - free(formatted) - - for i in range(len(extra_fmts)): - if found_pat[i]: - if get_yq(value, freq, &quarter, &year) < 0: - raise ValueError('Unable to get quarter and year') - - if i == 0: - repl = '%d' % quarter - elif i == 1: # %f, 2-digit year - repl = '%.2d' % (year % 100) - elif i == 2: - repl = '%d' % year - - result = result.replace(str_extra_fmts[i], repl) - - # Py3? - if not PyString_Check(result): - result = str(result) - - return result - -# period accessors - -ctypedef int (*accessor)(int64_t ordinal, int freq) except INT32_MIN - -def get_period_field(int code, int64_t value, int freq): - cdef accessor f = _get_accessor_func(code) - return f(value, freq) - -def get_period_field_arr(int code, ndarray[int64_t] arr, int freq): - cdef: - Py_ssize_t i, sz - ndarray[int64_t] out - accessor f - - f = _get_accessor_func(code) - - sz = len(arr) - out = np.empty(sz, dtype=np.int64) - - for i in range(sz): - out[i] = f(arr[i], freq) - - return out - - - -cdef accessor _get_accessor_func(int code): - if code == 0: - return &pyear - elif code == 1: - return &pqyear - elif code == 2: - return &pquarter - elif code == 3: - return &pmonth - elif code == 4: - return &pday - elif code == 5: - return &phour - elif code == 6: - return &pminute - elif code == 7: - return &psecond - elif code == 8: - return &pweek - elif code == 9: - return &pday_of_year - elif code == 10: - return &pweekday - else: - raise ValueError('Unrecognized code: %s' % code) - diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 92aeb1faf0ef5..789232359749b 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -702,7 +702,7 @@ def to_period(self, freq=None): if freq is None: freq = get_period_alias(self.freqstr) - return PeriodIndex(self.values, freq=freq) + return PeriodIndex(self.values, freq=freq, tz=self.tz) def order(self, return_indexer=False, ascending=True): """ diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 85b3654bac70a..02748b3c68cf6 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -15,7 +15,6 @@ from pandas.lib import Timestamp import pandas.lib as lib -import pandas._period as plib import pandas._algos as _algos @@ -25,7 +24,7 @@ def _period_field_accessor(name, alias): def f(self): base, mult = _gfc(self.freq) - return plib.get_period_field(alias, self.ordinal, base) + return lib.get_period_field(alias, self.ordinal, base) f.__name__ = name return property(f) @@ -33,7 +32,7 @@ def f(self): def _field_accessor(name, alias): def f(self): base, mult = _gfc(self.freq) - return plib.get_period_field_arr(alias, self.values, base) + return lib.get_period_field_arr(alias, self.values, base) f.__name__ = name return property(f) @@ -120,7 +119,7 @@ def __init__(self, value=None, freq=None, ordinal=None, raise ValueError('Only mult == 1 supported') if self.ordinal is None: - self.ordinal = plib.period_ordinal(dt.year, dt.month, dt.day, + self.ordinal = lib.period_ordinal(dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second, base) @@ -175,7 +174,7 @@ def asfreq(self, freq, how='E'): raise ValueError('Only mult == 1 supported') end = how == 'E' - new_ordinal = plib.period_asfreq(self.ordinal, base1, base2, end) + new_ordinal = lib.period_asfreq(self.ordinal, base1, base2, end) return Period(ordinal=new_ordinal, freq=base2) @@ -215,7 +214,7 @@ def to_timestamp(self, freq=None, how='start'): base, mult = _gfc(freq) val = self.asfreq(freq, how) - dt64 = plib.period_ordinal_to_dt64(val.ordinal, base) + dt64 = lib.period_ordinal_to_dt64(val.ordinal, base) return Timestamp(dt64) year = _period_field_accessor('year', 0) @@ -238,13 +237,13 @@ def now(cls, freq=None): def __repr__(self): base, mult = _gfc(self.freq) - formatted = plib.period_format(self.ordinal, base) + formatted = lib.period_format(self.ordinal, base) freqstr = _freq_mod._reverse_period_code_map[base] return "Period('%s', '%s')" % (formatted, freqstr) def __str__(self): base, mult = _gfc(self.freq) - formatted = plib.period_format(self.ordinal, base) + formatted = lib.period_format(self.ordinal, base) return ("%s" % formatted) def strftime(self, fmt): @@ -385,7 +384,7 @@ def strftime(self, fmt): 'Jan. 01, 2001 was a Monday' """ base, mult = _gfc(self.freq) - return plib.period_format(self.ordinal, base, fmt) + return lib.period_format(self.ordinal, base, fmt) def _get_date_and_freq(value, freq): @@ -421,12 +420,12 @@ def _get_ordinals(data, freq): return lib.map_infer(data, f) -def dt64arr_to_periodarr(data, freq): +def dt64arr_to_periodarr(data, freq, tz): if data.dtype != np.dtype('M8[ns]'): raise ValueError('Wrong dtype: %s' % data.dtype) base, mult = _gfc(freq) - return plib.dt64arr_to_periodarr(data.view('i8'), base) + return lib.dt64arr_to_periodarr(data.view('i8'), base, tz) # --- Period index sketch def _period_index_cmp(opname): @@ -494,6 +493,8 @@ class PeriodIndex(Int64Index): hour : int or array, default None minute : int or array, default None second : int or array, default None + tz : object, default None + Timezone for converting datetime64 data to Periods Examples -------- @@ -514,7 +515,8 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, periods=None, copy=False, name=None, year=None, month=None, quarter=None, day=None, - hour=None, minute=None, second=None): + hour=None, minute=None, second=None, + tz=None): freq = _freq_mod.get_standard_freq(freq) @@ -531,9 +533,9 @@ def __new__(cls, data=None, ordinal=None, else: fields = [year, month, quarter, day, hour, minute, second] data, freq = cls._generate_range(start, end, periods, - freq, fields) + freq, fields) else: - ordinal, freq = cls._from_arraylike(data, freq) + ordinal, freq = cls._from_arraylike(data, freq, tz) data = np.array(ordinal, dtype=np.int64, copy=False) subarr = data.view(cls) @@ -562,7 +564,7 @@ def _generate_range(cls, start, end, periods, freq, fields): return subarr, freq @classmethod - def _from_arraylike(cls, data, freq): + def _from_arraylike(cls, data, freq, tz): if not isinstance(data, np.ndarray): if np.isscalar(data) or isinstance(data, Period): raise ValueError('PeriodIndex() must be called with a ' @@ -598,7 +600,7 @@ def _from_arraylike(cls, data, freq): else: base1, _ = _gfc(data.freq) base2, _ = _gfc(freq) - data = plib.period_asfreq_arr(data.values, base1, base2, 1) + data = lib.period_asfreq_arr(data.values, base1, base2, 1) else: if freq is None and len(data) > 0: freq = getattr(data[0], 'freq', None) @@ -608,7 +610,7 @@ def _from_arraylike(cls, data, freq): 'inferred from first element')) if np.issubdtype(data.dtype, np.datetime64): - data = dt64arr_to_periodarr(data, freq) + data = dt64arr_to_periodarr(data, freq, tz) elif data.dtype == np.int64: pass else: @@ -719,7 +721,7 @@ def asfreq(self, freq=None, how='E'): raise ValueError('Only mult == 1 supported') end = how == 'E' - new_data = plib.period_asfreq_arr(self.values, base1, base2, end) + new_data = lib.period_asfreq_arr(self.values, base1, base2, end) result = new_data.view(PeriodIndex) result.name = self.name @@ -786,7 +788,7 @@ def to_timestamp(self, freq=None, how='start'): base, mult = _gfc(freq) new_data = self.asfreq(freq, how) - new_data = plib.periodarr_to_dt64arr(new_data.values, base) + new_data = lib.periodarr_to_dt64arr(new_data.values, base) return DatetimeIndex(new_data, freq='infer', name=self.name) def shift(self, n): @@ -1128,7 +1130,7 @@ def _range_from_fields(year=None, month=None, quarter=None, day=None, year, quarter = _make_field_arrays(year, quarter) for y, q in zip(year, quarter): y, m = _quarter_to_myear(y, q, freq) - val = plib.period_ordinal(y, m, 1, 1, 1, 1, base) + val = lib.period_ordinal(y, m, 1, 1, 1, 1, base) ordinals.append(val) else: base, mult = _gfc(freq) @@ -1137,7 +1139,7 @@ def _range_from_fields(year=None, month=None, quarter=None, day=None, arrays = _make_field_arrays(year, month, day, hour, minute, second) for y, mth, d, h, mn, s in zip(*arrays): - ordinals.append(plib.period_ordinal(y, mth, d, h, mn, s, base)) + ordinals.append(lib.period_ordinal(y, mth, d, h, mn, s, base)) return np.array(ordinals, dtype=np.int64), freq @@ -1166,7 +1168,7 @@ def _ordinal_from_fields(year, month, quarter, day, hour, minute, if quarter is not None: year, month = _quarter_to_myear(year, quarter, freq) - return plib.period_ordinal(year, month, day, hour, minute, second, base) + return lib.period_ordinal(year, month, day, hour, minute, second, base) def _quarter_to_myear(year, quarter, freq): @@ -1219,4 +1221,3 @@ def period_range(start=None, end=None, periods=None, freq='D', name=None): """ return PeriodIndex(start=start, end=end, periods=periods, freq=freq, name=name) - diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index ef35c44b53772..003e6f301935d 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -1067,6 +1067,38 @@ def test_to_period(self): pts = ts.to_period('M') self.assert_(pts.index.equals(exp.index.asfreq('M'))) + def test_to_period_tz(self): + _skip_if_no_pytz() + from dateutil.tz import tzlocal + from pandas.tseries.period import period_range + from pytz import utc as UTC + + xp = date_range('1/1/2000', '4/1/2000').to_period() + + ts = date_range('1/1/2000', '4/1/2000', tz='US/Eastern') + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assert_(result == expected) + self.assert_(ts.to_period().equals(xp)) + + ts = date_range('1/1/2000', '4/1/2000', tz=UTC) + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assert_(result == expected) + self.assert_(ts.to_period().equals(xp)) + + ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) + + result = ts.to_period()[0] + expected = ts[0].to_period() + + self.assert_(result == expected) + self.assert_(ts.to_period().equals(xp)) + def test_frame_to_period(self): K = 5 from pandas.tseries.period import period_range @@ -2205,7 +2237,8 @@ def test_series_set_value(self): def test_slice_locs_indexerror(self): times = [datetime(2000, 1, 1) + timedelta(minutes=i) for i in range(1000000)] s = Series(range(1000000), times) - s.ix[datetime(1900,1,1):datetime(2100,1,1)] + s.ix[datetime(1900,1,1) +:datetime(2100,1,1)] class TestSeriesDatetime64(unittest.TestCase): diff --git a/setup.py b/setup.py index ca152588b9554..07a172570df62 100755 --- a/setup.py +++ b/setup.py @@ -551,8 +551,6 @@ def run(self): 'reduce', 'stats', 'datetime', 'hashtable', 'inference', 'properties', 'join', 'engines'] -plib_depends = ['plib'] - def srcpath(name=None, suffix='.pyx', subdir='src'): return pjoin('pandas', subdir, name+suffix) @@ -560,9 +558,6 @@ def srcpath(name=None, suffix='.pyx', subdir='src'): tseries_depends = [srcpath(f, suffix='.pyx') for f in tseries_depends] tseries_depends.append('pandas/src/util.pxd') - plib_depends = [srcpath(f, suffix='.pyx') - for f in plib_depends] - plib_depends.append('pandas/src/util.pxd') else: tseries_depends = [] plib_depends = [] @@ -577,7 +572,8 @@ def srcpath(name=None, suffix='.pyx', subdir='src'): lib_depends = tseries_depends + ['pandas/src/numpy_helper.h', 'pandas/src/parse_helper.h', 'pandas/src/datetime/np_datetime.h', - 'pandas/src/datetime/np_datetime_strings.h'] + 'pandas/src/datetime/np_datetime_strings.h', + 'pandas/src/period.h'] # some linux distros require it libraries = ['m'] if 'win32' not in sys.platform else [] @@ -586,7 +582,8 @@ def srcpath(name=None, suffix='.pyx', subdir='src'): depends=lib_depends, sources=[srcpath('tseries', suffix=suffix), 'pandas/src/datetime/np_datetime.c', - 'pandas/src/datetime/np_datetime_strings.c'], + 'pandas/src/datetime/np_datetime_strings.c', + 'pandas/src/period.c'], include_dirs=common_include, # pyrex_gdb=True, # extra_compile_args=['-Wconversion'] @@ -597,14 +594,6 @@ def srcpath(name=None, suffix='.pyx', subdir='src'): include_dirs=[np.get_include()], libraries=libraries) -period_ext = Extension('pandas._period', - depends=plib_depends + ['pandas/src/numpy_helper.h', - 'pandas/src/period.h'], - sources=[srcpath('plib', suffix=suffix), - 'pandas/src/datetime/np_datetime.c', - 'pandas/src/period.c'], - include_dirs=[np.get_include()]) - parser_ext = Extension('pandas._parser', depends=['pandas/src/parser/tokenizer.h', 'pandas/src/parser/io.h', @@ -632,7 +621,6 @@ def srcpath(name=None, suffix='.pyx', subdir='src'): extensions = [algos_ext, lib_ext, - period_ext, sparse_ext, pytables_ext, parser_ext]