From b00126427af95456b03cf434a913c997593fe042 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 2 Aug 2018 14:20:11 -0700 Subject: [PATCH 1/7] de-duplicate tz_convert functions, make tz_convert_dst less of a special case --- pandas/_libs/tslibs/conversion.pyx | 101 +++++++++++++++-------------- pandas/_libs/tslibs/resolution.pyx | 3 +- pandas/_libs/tslibs/strptime.pyx | 1 + 3 files changed, 53 insertions(+), 52 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index a459b185fa48c..13ea9d9d922b7 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -612,7 +612,7 @@ cpdef inline datetime localize_pydatetime(datetime dt, object tz): # ---------------------------------------------------------------------- # Timezone Conversion -cdef inline int64_t[:] _tz_convert_dst(ndarray[int64_t] values, tzinfo tz, +cdef inline int64_t[:] _tz_convert_dst(int64_t[:] values, tzinfo tz, bint to_utc=True): """ tz_convert for non-UTC non-tzlocal cases where we have to check @@ -631,11 +631,10 @@ cdef inline int64_t[:] _tz_convert_dst(ndarray[int64_t] values, tzinfo tz, """ cdef: Py_ssize_t n = len(values) - Py_ssize_t i, j, pos + Py_ssize_t i, pos int64_t[:] result = np.empty(n, dtype=np.int64) - ndarray[int64_t] tt, trans + ndarray[int64_t] trans int64_t[:] deltas - Py_ssize_t[:] posn int64_t v trans, deltas, typ = get_dst_info(tz) @@ -643,21 +642,15 @@ cdef inline int64_t[:] _tz_convert_dst(ndarray[int64_t] values, tzinfo tz, # We add `offset` below instead of subtracting it deltas = -1 * np.array(deltas, dtype='i8') - tt = values[values != NPY_NAT] - if not len(tt): - # if all NaT, return all NaT - return values - - posn = trans.searchsorted(tt, side='right') - - j = 0 for i in range(n): v = values[i] if v == NPY_NAT: result[i] = v else: - pos = posn[j] - 1 - j += 1 + # TODO: Is it more efficient to call searchsorted pointwise or + # on `values` outside the loop? We are not consistent about this. + # relative effiency of pointwise increases with number of iNaTs + pos = trans.searchsorted(v, side='right') - 1 if pos < 0: raise ValueError('First time before start of DST info') result[i] = v - deltas[pos] @@ -734,7 +727,7 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2): Py_ssize_t pos int64_t v, offset, utc_date npy_datetimestruct dts - ndarray[int64_t] arr # TODO: Is there a lighter-weight way to do this? + int64_t arr[1] # See GH#17734 We should always be converting either from UTC or to UTC assert (is_utc(tz1) or tz1 == 'UTC') or (is_utc(tz2) or tz2 == 'UTC') @@ -746,7 +739,7 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2): if is_tzlocal(tz1): utc_date = _tz_convert_tzlocal_utc(val, tz1, to_utc=True) elif get_timezone(tz1) != 'UTC': - arr = np.array([val]) + arr[0] = val utc_date = _tz_convert_dst(arr, tz1, to_utc=True)[0] else: utc_date = val @@ -757,7 +750,7 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2): return _tz_convert_tzlocal_utc(utc_date, tz2, to_utc=False) else: # Convert UTC to other timezone - arr = np.array([utc_date]) + arr[0] = utc_date # Note: at least with cython 0.28.3, doing a lookup `[0]` in the next # line is sensitive to the declared return type of _tz_convert_dst; # if it is declared as returning ndarray[int64_t], a compile-time error @@ -765,9 +758,45 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2): return _tz_convert_dst(arr, tz2, to_utc=False)[0] +cdef inline int64_t[:] _tz_convert(int64_t[:] vals, object tz, bint to_utc): + """ + Convert the given values (in i8) either to UTC or from UTC. + + Parameters + ---------- + vals : int64 ndarray + tz1 : string / timezone object + to_utc : bint + + Returns + ------- + converted : ndarray[int64_t] + """ + cdef: + int64_t[:] converted, result + Py_ssize_t i, n = len(vals) + int64_t val + + if get_timezone(tz) != 'UTC': + converted = np.empty(n, dtype=np.int64) + if is_tzlocal(tz): + for i in range(n): + val = vals[i] + if val == NPY_NAT: + converted[i] = NPY_NAT + else: + converted[i] = _tz_convert_tzlocal_utc(val, tz, to_utc) + else: + converted = _tz_convert_dst(vals, tz, to_utc) + else: + converted = vals + + return converted + + @cython.boundscheck(False) @cython.wraparound(False) -def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): +def tz_convert(int64_t[:] vals, object tz1, object tz2): """ Convert the values (in i8) from timezone1 to timezone2 @@ -783,43 +812,15 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): """ cdef: - ndarray[int64_t] utc_dates, result - Py_ssize_t i, j, pos, n = len(vals) - int64_t v + int64_t[:] utc_dates, converted if len(vals) == 0: return np.array([], dtype=np.int64) # Convert to UTC - if get_timezone(tz1) != 'UTC': - utc_dates = np.empty(n, dtype=np.int64) - if is_tzlocal(tz1): - for i in range(n): - v = vals[i] - if v == NPY_NAT: - utc_dates[i] = NPY_NAT - else: - utc_dates[i] = _tz_convert_tzlocal_utc(v, tz1, to_utc=True) - else: - utc_dates = np.array(_tz_convert_dst(vals, tz1, to_utc=True)) - else: - utc_dates = vals - - if get_timezone(tz2) == 'UTC': - return utc_dates - - elif is_tzlocal(tz2): - result = np.zeros(n, dtype=np.int64) - for i in range(n): - v = utc_dates[i] - if v == NPY_NAT: - result[i] = NPY_NAT - else: - result[i] = _tz_convert_tzlocal_utc(v, tz2, to_utc=False) - return result - else: - # Convert UTC to other timezone - return np.array(_tz_convert_dst(utc_dates, tz2, to_utc=False)) + utc_dates = _tz_convert(vals, tz1, to_utc=True) + converted = _tz_convert(utc_dates, tz2, to_utc=False) + return np.array(converted, dtype=np.int64) # TODO: cdef scalar version to call from convert_str_to_tsobject diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 18cc21ccd59e0..fec6207978f30 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -11,8 +11,7 @@ from util cimport is_string_object, get_nat from np_datetime cimport npy_datetimestruct, dt64_to_dtstruct from frequencies cimport get_freq_code -from timezones cimport (is_utc, is_tzlocal, - maybe_get_tz, get_dst_info) +from timezones cimport is_utc, is_tzlocal, maybe_get_tz, get_dst_info from conversion cimport tz_convert_utc_to_tzlocal from ccalendar cimport get_days_in_month diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 59d673881bb40..7d0db69855761 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -622,6 +622,7 @@ cdef _calc_julian_from_U_or_W(int year, int week_of_year, days_to_week = week_0_length + (7 * (week_of_year - 1)) return 1 + days_to_week + day_of_week + cdef parse_timezone_directive(object z): """ Parse the '%z' directive and return a pytz.FixedOffset From ef8997021b6a8385bd4a9db8b37f552b5bccb879 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 2 Aug 2018 14:25:52 -0700 Subject: [PATCH 2/7] clean up imports --- pandas/_libs/tslibs/offsets.pyx | 2 +- pandas/_libs/tslibs/resolution.pyx | 1 - pandas/_libs/tslibs/strptime.pyx | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 3ba2270a851d5..daaaf8d227798 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -5,7 +5,7 @@ cimport cython from cython cimport Py_ssize_t import time -from cpython.datetime cimport (PyDateTime_IMPORT, PyDateTime_CheckExact, +from cpython.datetime cimport (PyDateTime_IMPORT, datetime, timedelta, time as dt_time) PyDateTime_IMPORT diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index fec6207978f30..83be739a6ae0a 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # cython: profile=False -cimport cython from cython cimport Py_ssize_t import numpy as np diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 7d0db69855761..8e7c55051a3c0 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -23,7 +23,6 @@ except: import pytz from cython cimport Py_ssize_t -from cpython cimport PyFloat_Check import numpy as np from numpy cimport int64_t From 327922e4a421aeb2919f8a5be4b2e2b1f49c134e Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 2 Aug 2018 14:26:22 -0700 Subject: [PATCH 3/7] remove cnp dep from frequencies --- pandas/_libs/tslibs/frequencies.pyx | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx index 5c8efa8c03712..7e27080848a35 100644 --- a/pandas/_libs/tslibs/frequencies.pyx +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -2,10 +2,8 @@ # cython: profile=False import re -cimport numpy as cnp -cnp.import_array() - -from util cimport is_integer_object, is_string_object +from util cimport is_integer_object, is_string_object, import_array +import_array() from ccalendar import MONTH_NUMBERS From f368a2d6363eab1146e00512accca299adca5d4e Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 2 Aug 2018 14:28:08 -0700 Subject: [PATCH 4/7] revert out of scope --- pandas/_libs/tslibs/frequencies.pyx | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx index 7e27080848a35..5c8efa8c03712 100644 --- a/pandas/_libs/tslibs/frequencies.pyx +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -2,8 +2,10 @@ # cython: profile=False import re -from util cimport is_integer_object, is_string_object, import_array -import_array() +cimport numpy as cnp +cnp.import_array() + +from util cimport is_integer_object, is_string_object from ccalendar import MONTH_NUMBERS From 1f2c7dab27ecaa72af8f6e8174cef020654e1186 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 2 Aug 2018 14:30:45 -0700 Subject: [PATCH 5/7] Cleanup import --- pandas/_libs/tslibs/parsing.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index afda2046fd12d..61fb48c6913d3 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -6,7 +6,6 @@ Parsing functions for datetime and datetime-like strings. import sys import re -cimport cython from cython cimport Py_ssize_t From cec0db065988c43d022df81794dd458552874c41 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 2 Aug 2018 20:51:36 -0700 Subject: [PATCH 6/7] dummy commit to force CI --- pandas/_libs/tslibs/conversion.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 13ea9d9d922b7..cd142a86c0859 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -810,7 +810,6 @@ def tz_convert(int64_t[:] vals, object tz1, object tz2): ------- int64 ndarray of converted """ - cdef: int64_t[:] utc_dates, converted From 624e1a8427d0bde9200d548b7c2d30d37d9cd919 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 7 Aug 2018 08:21:30 -0700 Subject: [PATCH 7/7] rename _tz_convert --> _tz_convert_one_way --- pandas/_libs/tslibs/conversion.pyx | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index cd142a86c0859..74a9823a85016 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -758,7 +758,8 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2): return _tz_convert_dst(arr, tz2, to_utc=False)[0] -cdef inline int64_t[:] _tz_convert(int64_t[:] vals, object tz, bint to_utc): +cdef inline int64_t[:] _tz_convert_one_way(int64_t[:] vals, object tz, + bint to_utc): """ Convert the given values (in i8) either to UTC or from UTC. @@ -817,8 +818,8 @@ def tz_convert(int64_t[:] vals, object tz1, object tz2): return np.array([], dtype=np.int64) # Convert to UTC - utc_dates = _tz_convert(vals, tz1, to_utc=True) - converted = _tz_convert(utc_dates, tz2, to_utc=False) + utc_dates = _tz_convert_one_way(vals, tz1, to_utc=True) + converted = _tz_convert_one_way(utc_dates, tz2, to_utc=False) return np.array(converted, dtype=np.int64)