From eb9fbc9204dae83f4f0aa09faef86dcd29a07dad Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 13 Sep 2017 18:19:44 -0700 Subject: [PATCH 1/5] Cut/paste (most) remaning tz funcs to tslibs/timezones --- pandas/_libs/period.pyx | 8 +- pandas/_libs/tslib.pxd | 2 - pandas/_libs/tslib.pyx | 190 ++------------------------- pandas/_libs/tslibs/timezones.pxd | 6 + pandas/_libs/tslibs/timezones.pyx | 207 +++++++++++++++++++++++++++++- 5 files changed, 223 insertions(+), 190 deletions(-) diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index 9e473a7f362b4..eef3c571ef37a 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -34,11 +34,9 @@ from lib cimport is_null_datetimelike, is_period from pandas._libs import tslib, lib from pandas._libs.tslib import (Timedelta, Timestamp, iNaT, NaT) -from tslibs.timezones cimport is_utc, is_tzlocal, get_utcoffset -from tslib cimport ( - maybe_get_tz, - _get_dst_info, - _nat_scalar_rules) +from tslibs.timezones cimport ( + is_utc, is_tzlocal, get_utcoffset, _get_dst_info, maybe_get_tz) +from tslib cimport _nat_scalar_rules from tslibs.frequencies cimport get_freq_code diff --git a/pandas/_libs/tslib.pxd b/pandas/_libs/tslib.pxd index c1b25963a6257..ee8adfe67bb5e 100644 --- a/pandas/_libs/tslib.pxd +++ b/pandas/_libs/tslib.pxd @@ -2,7 +2,5 @@ from numpy cimport ndarray, int64_t cdef convert_to_tsobject(object, object, object, bint, bint) cpdef convert_to_timedelta64(object, object) -cpdef object maybe_get_tz(object) -cdef object _get_dst_info(object) cdef bint _nat_scalar_rules[6] cdef bint _check_all_nulls(obj) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 629325c28ea9c..d6527702b01e0 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -73,19 +73,12 @@ import re # dateutil compat from dateutil.tz import (tzoffset, tzlocal as _dateutil_tzlocal, - tzfile as _dateutil_tzfile, tzutc as _dateutil_tzutc, tzstr as _dateutil_tzstr) -from pandas.compat import is_platform_windows -if is_platform_windows(): - from dateutil.zoneinfo import gettz as _dateutil_gettz -else: - from dateutil.tz import gettz as _dateutil_gettz from dateutil.relativedelta import relativedelta from dateutil.parser import DEFAULTPARSER -from pytz.tzinfo import BaseTzInfo as _pytz_BaseTzInfo from pandas.compat import (parse_date, string_types, iteritems, StringIO, callable) @@ -108,11 +101,16 @@ iNaT = NPY_NAT from tslibs.timezones cimport ( - is_utc, is_tzlocal, + is_utc, is_tzlocal, _is_fixed_offset, treat_tz_as_dateutil, treat_tz_as_pytz, - get_timezone, - get_utcoffset) -from tslibs.timezones import get_timezone, get_utcoffset # noqa + get_timezone, get_utcoffset, maybe_get_tz, + _get_dst_info + ) +from tslibs.timezones import ( # noqa + get_timezone, get_utcoffset, maybe_get_tz, + _p_tz_cache_key, dst_cache, + _unbox_utcoffsets + ) cdef inline object create_timestamp_from_ts( @@ -241,20 +239,6 @@ def ints_to_pytimedelta(ndarray[int64_t] arr, box=False): return result -cdef inline bint _is_fixed_offset(object tz): - if treat_tz_as_dateutil(tz): - if len(tz._trans_idx) == 0 and len(tz._trans_list) == 0: - return 1 - else: - return 0 - elif treat_tz_as_pytz(tz): - if (len(tz._transition_info) == 0 - and len(tz._utc_transition_times) == 0): - return 1 - else: - return 0 - return 1 - _zero_time = datetime_time(0, 0) _no_input = object() @@ -1709,27 +1693,6 @@ def _localize_pydatetime(object dt, object tz): return dt.replace(tzinfo=tz) -cpdef inline object maybe_get_tz(object tz): - """ - (Maybe) Construct a timezone object from a string. If tz is a string, use - it to construct a timezone object. Otherwise, just return tz. - """ - if isinstance(tz, string_types): - if tz == 'tzlocal()': - tz = _dateutil_tzlocal() - elif tz.startswith('dateutil/'): - zone = tz[9:] - tz = _dateutil_gettz(zone) - # On Python 3 on Windows, the filename is not always set correctly. - if isinstance(tz, _dateutil_tzfile) and '.tar.gz' in tz._filename: - tz._filename = zone - else: - tz = pytz.timezone(tz) - elif is_integer_object(tz): - tz = pytz.FixedOffset(tz / 60) - return tz - - class OutOfBoundsDatetime(ValueError): pass @@ -4237,141 +4200,6 @@ def tz_convert_single(int64_t val, object tz1, object tz2): offset = deltas[pos] return utc_date + offset -# Timezone data caches, key is the pytz string or dateutil file name. -dst_cache = {} - - -def _p_tz_cache_key(tz): - """ Python interface for cache function to facilitate testing.""" - return _tz_cache_key(tz) - - -cdef inline object _tz_cache_key(object tz): - """ - Return the key in the cache for the timezone info object or None - if unknown. - - The key is currently the tz string for pytz timezones, the filename for - dateutil timezones. - - Notes - ===== - This cannot just be the hash of a timezone object. Unfortunately, the - hashes of two dateutil tz objects which represent the same timezone are - not equal (even though the tz objects will compare equal and represent - the same tz file). Also, pytz objects are not always hashable so we use - str(tz) instead. - """ - if isinstance(tz, _pytz_BaseTzInfo): - return tz.zone - elif isinstance(tz, _dateutil_tzfile): - if '.tar.gz' in tz._filename: - raise ValueError('Bad tz filename. Dateutil on python 3 on ' - 'windows has a bug which causes tzfile._filename ' - 'to be the same for all timezone files. Please ' - 'construct dateutil timezones implicitly by ' - 'passing a string like "dateutil/Europe/London" ' - 'when you construct your pandas objects instead ' - 'of passing a timezone object. See ' - 'https://github.com/pandas-dev/pandas/pull/7362') - return 'dateutil' + tz._filename - else: - return None - - -cdef object _get_dst_info(object tz): - """ - return a tuple of : - (UTC times of DST transitions, - UTC offsets in microseconds corresponding to DST transitions, - string of type of transitions) - - """ - cache_key = _tz_cache_key(tz) - if cache_key is None: - num = int(get_utcoffset(tz, None).total_seconds()) * 1000000000 - return (np.array([NPY_NAT + 1], dtype=np.int64), - np.array([num], dtype=np.int64), - None) - - if cache_key not in dst_cache: - if treat_tz_as_pytz(tz): - trans = np.array(tz._utc_transition_times, dtype='M8[ns]') - trans = trans.view('i8') - try: - if tz._utc_transition_times[0].year == 1: - trans[0] = NPY_NAT + 1 - except Exception: - pass - deltas = _unbox_utcoffsets(tz._transition_info) - typ = 'pytz' - - elif treat_tz_as_dateutil(tz): - if len(tz._trans_list): - # get utc trans times - trans_list = _get_utc_trans_times_from_dateutil_tz(tz) - trans = np.hstack([ - np.array([0], dtype='M8[s]'), # place holder for first item - np.array(trans_list, dtype='M8[s]')]).astype( - 'M8[ns]') # all trans listed - trans = trans.view('i8') - trans[0] = NPY_NAT + 1 - - # deltas - deltas = np.array([v.offset for v in ( - tz._ttinfo_before,) + tz._trans_idx], dtype='i8') - deltas *= 1000000000 - typ = 'dateutil' - - elif _is_fixed_offset(tz): - trans = np.array([NPY_NAT + 1], dtype=np.int64) - deltas = np.array([tz._ttinfo_std.offset], - dtype='i8') * 1000000000 - typ = 'fixed' - else: - trans = np.array([], dtype='M8[ns]') - deltas = np.array([], dtype='i8') - typ = None - - else: - # static tzinfo - trans = np.array([NPY_NAT + 1], dtype=np.int64) - num = int(get_utcoffset(tz, None).total_seconds()) * 1000000000 - deltas = np.array([num], dtype=np.int64) - typ = 'static' - - dst_cache[cache_key] = (trans, deltas, typ) - - return dst_cache[cache_key] - -cdef object _get_utc_trans_times_from_dateutil_tz(object tz): - """ - Transition times in dateutil timezones are stored in local non-dst - time. This code converts them to UTC. It's the reverse of the code - in dateutil.tz.tzfile.__init__. - """ - new_trans = list(tz._trans_list) - last_std_offset = 0 - for i, (trans, tti) in enumerate(zip(tz._trans_list, tz._trans_idx)): - if not tti.isdst: - last_std_offset = tti.offset - new_trans[i] = trans - last_std_offset - return new_trans - - -cpdef ndarray _unbox_utcoffsets(object transinfo): - cdef: - Py_ssize_t i, sz - ndarray[int64_t] arr - - sz = len(transinfo) - arr = np.empty(sz, dtype='i8') - - for i in range(sz): - arr[i] = int(transinfo[i][0].total_seconds()) * 1000000000 - - return arr - @cython.boundscheck(False) @cython.wraparound(False) diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd index ead5566440ca0..fac0018a78bc2 100644 --- a/pandas/_libs/tslibs/timezones.pxd +++ b/pandas/_libs/tslibs/timezones.pxd @@ -1,6 +1,8 @@ # -*- coding: utf-8 -*- # cython: profile=False +from numpy cimport ndarray + cdef bint is_utc(object tz) cdef bint is_tzlocal(object tz) @@ -8,5 +10,9 @@ cdef bint treat_tz_as_pytz(object tz) cdef bint treat_tz_as_dateutil(object tz) cpdef object get_timezone(object tz) +cpdef object maybe_get_tz(object tz) cpdef get_utcoffset(tzinfo, obj) +cdef bint _is_fixed_offset(object tz) + +cdef object _get_dst_info(object tz) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 3db369a09ba2d..346da41e7073b 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -1,15 +1,40 @@ # -*- coding: utf-8 -*- # cython: profile=False +cimport cython +from cython cimport Py_ssize_t + # dateutil compat from dateutil.tz import ( - tzutc as _dateutil_tzutc, - tzlocal as _dateutil_tzlocal) + tzutc as _dateutil_tzutc, + tzlocal as _dateutil_tzlocal, + tzfile as _dateutil_tzfile) + +import sys +if sys.platform == 'win32' or sys.platform == 'cygwin': + # equiv pd.compat.is_platform_windows() + from dateutil.zoneinfo import gettz as _dateutil_gettz +else: + from dateutil.tz import gettz as _dateutil_gettz + +from pytz.tzinfo import BaseTzInfo as _pytz_BaseTzInfo import pytz UTC = pytz.utc +import numpy as np +cimport numpy as np +from numpy cimport ndarray, int64_t +np.import_array() + +# ---------------------------------------------------------------------- +from util cimport is_string_object, is_integer_object, get_nat + +cdef int64_t NPY_NAT = get_nat() + +# ---------------------------------------------------------------------- + cdef inline bint is_utc(object tz): return tz is UTC or isinstance(tz, _dateutil_tzutc) @@ -64,6 +89,70 @@ cpdef inline object get_timezone(object tz): except AttributeError: return tz + +cpdef inline object maybe_get_tz(object tz): + """ + (Maybe) Construct a timezone object from a string. If tz is a string, use + it to construct a timezone object. Otherwise, just return tz. + """ + if is_string_object(tz): + if tz == 'tzlocal()': + tz = _dateutil_tzlocal() + elif tz.startswith('dateutil/'): + zone = tz[9:] + tz = _dateutil_gettz(zone) + # On Python 3 on Windows, the filename is not always set correctly. + if isinstance(tz, _dateutil_tzfile) and '.tar.gz' in tz._filename: + tz._filename = zone + else: + tz = pytz.timezone(tz) + elif is_integer_object(tz): + tz = pytz.FixedOffset(tz / 60) + return tz + + +def _p_tz_cache_key(tz): + """ Python interface for cache function to facilitate testing.""" + return _tz_cache_key(tz) + + +# Timezone data caches, key is the pytz string or dateutil file name. +dst_cache = {} + + +cdef inline object _tz_cache_key(object tz): + """ + Return the key in the cache for the timezone info object or None + if unknown. + + The key is currently the tz string for pytz timezones, the filename for + dateutil timezones. + + Notes + ===== + This cannot just be the hash of a timezone object. Unfortunately, the + hashes of two dateutil tz objects which represent the same timezone are + not equal (even though the tz objects will compare equal and represent + the same tz file). Also, pytz objects are not always hashable so we use + str(tz) instead. + """ + if isinstance(tz, _pytz_BaseTzInfo): + return tz.zone + elif isinstance(tz, _dateutil_tzfile): + if '.tar.gz' in tz._filename: + raise ValueError('Bad tz filename. Dateutil on python 3 on ' + 'windows has a bug which causes tzfile._filename ' + 'to be the same for all timezone files. Please ' + 'construct dateutil timezones implicitly by ' + 'passing a string like "dateutil/Europe/London" ' + 'when you construct your pandas objects instead ' + 'of passing a timezone object. See ' + 'https://github.com/pandas-dev/pandas/pull/7362') + return 'dateutil' + tz._filename + else: + return None + + #---------------------------------------------------------------------- # UTC Offsets @@ -72,3 +161,117 @@ cpdef get_utcoffset(tzinfo, obj): return tzinfo._utcoffset except AttributeError: return tzinfo.utcoffset(obj) + + +cdef inline bint _is_fixed_offset(object tz): + if treat_tz_as_dateutil(tz): + if len(tz._trans_idx) == 0 and len(tz._trans_list) == 0: + return 1 + else: + return 0 + elif treat_tz_as_pytz(tz): + if (len(tz._transition_info) == 0 + and len(tz._utc_transition_times) == 0): + return 1 + else: + return 0 + return 1 + + +cdef object _get_utc_trans_times_from_dateutil_tz(object tz): + """ + Transition times in dateutil timezones are stored in local non-dst + time. This code converts them to UTC. It's the reverse of the code + in dateutil.tz.tzfile.__init__. + """ + new_trans = list(tz._trans_list) + last_std_offset = 0 + for i, (trans, tti) in enumerate(zip(tz._trans_list, tz._trans_idx)): + if not tti.isdst: + last_std_offset = tti.offset + new_trans[i] = trans - last_std_offset + return new_trans + + +cpdef ndarray _unbox_utcoffsets(object transinfo): + cdef: + Py_ssize_t i, sz + ndarray[int64_t] arr + + sz = len(transinfo) + arr = np.empty(sz, dtype='i8') + + for i in range(sz): + arr[i] = int(transinfo[i][0].total_seconds()) * 1000000000 + + return arr + + +# ---------------------------------------------------------------------- +# Daylight Savings + + +cdef object _get_dst_info(object tz): + """ + return a tuple of : + (UTC times of DST transitions, + UTC offsets in microseconds corresponding to DST transitions, + string of type of transitions) + + """ + cache_key = _tz_cache_key(tz) + if cache_key is None: + num = int(get_utcoffset(tz, None).total_seconds()) * 1000000000 + return (np.array([NPY_NAT + 1], dtype=np.int64), + np.array([num], dtype=np.int64), + None) + + if cache_key not in dst_cache: + if treat_tz_as_pytz(tz): + trans = np.array(tz._utc_transition_times, dtype='M8[ns]') + trans = trans.view('i8') + try: + if tz._utc_transition_times[0].year == 1: + trans[0] = NPY_NAT + 1 + except Exception: + pass + deltas = _unbox_utcoffsets(tz._transition_info) + typ = 'pytz' + + elif treat_tz_as_dateutil(tz): + if len(tz._trans_list): + # get utc trans times + trans_list = _get_utc_trans_times_from_dateutil_tz(tz) + trans = np.hstack([ + np.array([0], dtype='M8[s]'), # place holder for first item + np.array(trans_list, dtype='M8[s]')]).astype( + 'M8[ns]') # all trans listed + trans = trans.view('i8') + trans[0] = NPY_NAT + 1 + + # deltas + deltas = np.array([v.offset for v in ( + tz._ttinfo_before,) + tz._trans_idx], dtype='i8') + deltas *= 1000000000 + typ = 'dateutil' + + elif _is_fixed_offset(tz): + trans = np.array([NPY_NAT + 1], dtype=np.int64) + deltas = np.array([tz._ttinfo_std.offset], + dtype='i8') * 1000000000 + typ = 'fixed' + else: + trans = np.array([], dtype='M8[ns]') + deltas = np.array([], dtype='i8') + typ = None + + else: + # static tzinfo + trans = np.array([NPY_NAT + 1], dtype=np.int64) + num = int(get_utcoffset(tz, None).total_seconds()) * 1000000000 + deltas = np.array([num], dtype=np.int64) + typ = 'static' + + dst_cache[cache_key] = (trans, deltas, typ) + + return dst_cache[cache_key] From 698a06c965efcac01fa1dc320f4945c18e64428b Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 13 Sep 2017 18:38:53 -0700 Subject: [PATCH 2/5] Fixup return value caused cython error --- pandas/_libs/period.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index eef3c571ef37a..82943a20172f5 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -252,7 +252,7 @@ def period_ordinal(int y, int m, int d, int h, int min, return get_period_ordinal(y, m, d, h, min, s, us, ps, freq) -cpdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) nogil: +cpdef period_ordinal_to_dt64(int64_t ordinal, int freq) nogil: cdef: pandas_datetimestruct dts date_info dinfo From 30c3fa6b718ddbf99e46e20b4692577265ec0bdc Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 14 Sep 2017 08:26:24 -0700 Subject: [PATCH 3/5] Fix incorrectly removed return type --- pandas/_libs/period.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index 82943a20172f5..eef3c571ef37a 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -252,7 +252,7 @@ def period_ordinal(int y, int m, int d, int h, int min, return get_period_ordinal(y, m, d, h, min, s, us, ps, freq) -cpdef period_ordinal_to_dt64(int64_t ordinal, int freq) nogil: +cpdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) nogil: cdef: pandas_datetimestruct dts date_info dinfo From 109d605ee9ed710e2b91d61ce90982615c02d621 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 14 Sep 2017 09:21:14 -0700 Subject: [PATCH 4/5] Re-add name needed by tests --- pandas/_libs/tslib.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index d6527702b01e0..ab6f392423bb6 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -109,7 +109,8 @@ from tslibs.timezones cimport ( from tslibs.timezones import ( # noqa get_timezone, get_utcoffset, maybe_get_tz, _p_tz_cache_key, dst_cache, - _unbox_utcoffsets + _unbox_utcoffsets, + _dateutil_gettz ) From 0147c24c52061f21b8d320b37ca031517a9e2e67 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 14 Sep 2017 19:27:39 -0700 Subject: [PATCH 5/5] dummy commit to force CI --- pandas/_libs/tslib.pyx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index ab6f392423bb6..ec12611ae7f02 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -21,8 +21,7 @@ from cpython cimport ( PyObject_RichCompare, Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE, PyUnicode_Check, - PyUnicode_AsUTF8String, -) + PyUnicode_AsUTF8String) cdef extern from "Python.h": cdef PyTypeObject *Py_TYPE(object)