From 4b8c4fbd9109b2c5770ad01e6909a2d766b43e8c Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 30 Jun 2020 14:41:49 -0700 Subject: [PATCH 01/38] REF: implement TZConvertInfo --- asv_bench/benchmarks/tslibs/resolution.py | 50 +++++++++++++++++ pandas/_libs/tslibs/resolution.pyx | 66 ++++++++--------------- pandas/_libs/tslibs/timezones.pxd | 13 +++++ pandas/_libs/tslibs/timezones.pyx | 40 ++++++++++++-- 4 files changed, 123 insertions(+), 46 deletions(-) create mode 100644 asv_bench/benchmarks/tslibs/resolution.py diff --git a/asv_bench/benchmarks/tslibs/resolution.py b/asv_bench/benchmarks/tslibs/resolution.py new file mode 100644 index 0000000000000..bce13a6715058 --- /dev/null +++ b/asv_bench/benchmarks/tslibs/resolution.py @@ -0,0 +1,50 @@ +""" +ipython analogue: + +tr = TimeResolution() +mi = pd.MultiIndex.from_product(tr.params[:-1] + ([str(x) for x in tr.params[-1]],)) +df = pd.DataFrame(np.nan, index=mi, columns=["mean", "stdev"]) + +for unit in tr.params[0]: + for size in tr.params[1]: + for tz in tr.params[2]: + tr.setup_cache(unit, size, tz) + key = (unit, size, str(tz)) + print(key) + + val = %timeit -o tr.time_get_resolution(unit, size, tz) + + df.loc[key] = (val.average, val.stdev) + +""" +from datetime import timedelta, timezone + +from dateutil.tz import gettz, tzlocal +import numpy as np +import pytz + +from pandas._libs.tslibs.resolution import get_resolution + + +class TimeResolution: + params = ( + ["D", "h", "m", "s", "us", "ns"], + [1, 100, 10 ** 4, 10 ** 6], + [ + None, + timezone.utc, + timezone(timedelta(minutes=60)), + pytz.timezone("US/Pacific"), + gettz("Asia/Tokyo"), + tzlocal(), + ], + ) + param_names = ["unit", "size", "tz"] + + def setup_cache(self, unit, size, tz): + arr = np.random.randint(0, 10, size=size, dtype="i8") + arr = arr.view(f"M8[{unit}]").astype("M8[ns]").view("i8") + self.i8data = arr + + def time_get_resolution(self, unit, size, tz): + get_resolution(self.i8data, tz) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 4dbecc76ad986..85d5a2e0e9096 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -8,7 +8,10 @@ from pandas._libs.tslibs.dtypes import Resolution from pandas._libs.tslibs.np_datetime cimport ( npy_datetimestruct, dt64_to_dtstruct) from pandas._libs.tslibs.timezones cimport ( - is_utc, is_tzlocal, maybe_get_tz, get_dst_info) + maybe_get_tz, + get_tzconverter, + TZConvertInfo, +) from pandas._libs.tslibs.ccalendar cimport get_days_in_month from pandas._libs.tslibs.tzconversion cimport tz_convert_utc_to_tzlocal @@ -38,54 +41,31 @@ def get_resolution(const int64_t[:] stamps, tz=None): Py_ssize_t i, n = len(stamps) npy_datetimestruct dts int reso = RESO_DAY, curr_reso - ndarray[int64_t] trans - int64_t[:] deltas - Py_ssize_t[:] pos - int64_t local_val, delta + int64_t local_val + TZConvertInfo info if tz is not None: tz = maybe_get_tz(tz) - if is_utc(tz) or tz is None: - for i in range(n): - if stamps[i] == NPY_NAT: - continue - dt64_to_dtstruct(stamps[i], &dts) - curr_reso = _reso_stamp(&dts) - if curr_reso < reso: - reso = curr_reso - elif is_tzlocal(tz): - for i in range(n): - if stamps[i] == NPY_NAT: - continue + info = get_tzconverter(tz, stamps) + + for i in range(n): + if stamps[i] == NPY_NAT: + continue + + if info.use_utc: + local_val = stamps[i] + elif info.use_tzlocal: local_val = tz_convert_utc_to_tzlocal(stamps[i], tz) - dt64_to_dtstruct(local_val, &dts) - curr_reso = _reso_stamp(&dts) - if curr_reso < reso: - reso = curr_reso - else: - # Adjust datetime64 timestamp, recompute datetimestruct - trans, deltas, typ = get_dst_info(tz) - - if typ not in ['pytz', 'dateutil']: - # static/fixed; in this case we know that len(delta) == 1 - delta = deltas[0] - for i in range(n): - if stamps[i] == NPY_NAT: - continue - dt64_to_dtstruct(stamps[i] + delta, &dts) - curr_reso = _reso_stamp(&dts) - if curr_reso < reso: - reso = curr_reso + elif info.use_fixed: + local_val = stamps[i] + info.delta else: - pos = trans.searchsorted(stamps, side='right') - 1 - for i in range(n): - if stamps[i] == NPY_NAT: - continue - dt64_to_dtstruct(stamps[i] + deltas[pos[i]], &dts) - curr_reso = _reso_stamp(&dts) - if curr_reso < reso: - reso = curr_reso + local_val = stamps[i] + info.utcoffsets[info.positions[i]] + + dt64_to_dtstruct(local_val, &dts) + curr_reso = _reso_stamp(&dts) + if curr_reso < reso: + reso = curr_reso return Resolution(reso) diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd index 14c0523787422..9b2f835f3d3fb 100644 --- a/pandas/_libs/tslibs/timezones.pxd +++ b/pandas/_libs/tslibs/timezones.pxd @@ -1,5 +1,7 @@ from cpython.datetime cimport tzinfo +from numpy cimport int64_t, intp_t, ndarray + cdef tzinfo utc_pytz cpdef bint is_utc(object tz) @@ -15,3 +17,14 @@ cdef get_utcoffset(tzinfo tz, obj) cdef bint is_fixed_offset(tzinfo tz) cdef object get_dst_info(object tz) + + +ctypedef struct TZConvertInfo: + bint use_utc + bint use_tzlocal + bint use_fixed + int64_t* utcoffsets + intp_t* positions + int64_t delta + +cdef TZConvertInfo get_tzconverter(tzinfo tz, const int64_t[:] values) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 7fbb50fcbfd41..b9f1bec4e783f 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -17,7 +17,7 @@ UTC = pytz.utc import numpy as np cimport numpy as cnp -from numpy cimport int64_t +from numpy cimport int64_t, intp_t cnp.import_array() # ---------------------------------------------------------------------- @@ -192,10 +192,10 @@ cdef object _get_utc_trans_times_from_dateutil_tz(tzinfo tz): return new_trans -cdef int64_t[:] unbox_utcoffsets(object transinfo): +cdef ndarray[int64_t, ndim=1] unbox_utcoffsets(object transinfo): cdef: Py_ssize_t i, sz - int64_t[:] arr + ndarray[int64_t, ndim=1] arr sz = len(transinfo) arr = np.empty(sz, dtype='i8') @@ -209,6 +209,40 @@ cdef int64_t[:] unbox_utcoffsets(object transinfo): # ---------------------------------------------------------------------- # Daylight Savings +ctypedef struct TZConvertInfo: + bint use_utc + bint use_tzlocal + bint use_fixed + int64_t* utcoffsets + intp_t* positions + int64_t delta + + +cdef TZConvertInfo get_tzconverter(tzinfo tz, const int64_t[:] values): + cdef: + TZConvertInfo info + ndarray[int64_t, ndim=1] deltas, trans, pos + str typ + + info.use_utc = info.use_tzlocal = info.use_fixed = False + + if tz is None or is_utc(tz): + info.use_utc = True + elif is_tzlocal(tz): + info.use_tzlocal = True + else: + trans, deltas, typ = get_dst_info(tz) + if typ not in ["pytz", "dateutil"]: + # Fixed Offset + info.use_fixed = True + info.delta = deltas[0] + else: + info.utcoffsets = cnp.PyArray_DATA(deltas) + pos = trans.searchsorted(values, side="right") - 1 + info.positions = cnp.PyArray_DATA(pos) + + return info + cdef object get_dst_info(object tz): """ From 8114413b372840d95742c6ac5bd8eb48b2fe6fe6 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 30 Jun 2020 18:35:37 -0700 Subject: [PATCH 02/38] setup_cache->setup --- asv_bench/asv.conf.json | 2 +- asv_bench/benchmarks/tslibs/resolution.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json index 7c10a2d17775a..962ec6f50808f 100644 --- a/asv_bench/asv.conf.json +++ b/asv_bench/asv.conf.json @@ -26,7 +26,7 @@ // The Pythons you'd like to test against. If not provided, defaults // to the current version of Python used to run `asv`. // "pythons": ["2.7", "3.4"], - "pythons": ["3.6"], + "pythons": ["3.7"], // The matrix of dependencies to test. Each key is the name of a // package (in PyPI) and the values are version numbers. An empty diff --git a/asv_bench/benchmarks/tslibs/resolution.py b/asv_bench/benchmarks/tslibs/resolution.py index bce13a6715058..274aa1ad6d4a9 100644 --- a/asv_bench/benchmarks/tslibs/resolution.py +++ b/asv_bench/benchmarks/tslibs/resolution.py @@ -8,7 +8,7 @@ for unit in tr.params[0]: for size in tr.params[1]: for tz in tr.params[2]: - tr.setup_cache(unit, size, tz) + tr.setup(unit, size, tz) key = (unit, size, str(tz)) print(key) @@ -41,7 +41,7 @@ class TimeResolution: ) param_names = ["unit", "size", "tz"] - def setup_cache(self, unit, size, tz): + def setup(self, unit, size, tz): arr = np.random.randint(0, 10, size=size, dtype="i8") arr = arr.view(f"M8[{unit}]").astype("M8[ns]").view("i8") self.i8data = arr From 907f9c4b148be5d4f7856fa5dad167f048ca011c Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 30 Jun 2020 19:12:25 -0700 Subject: [PATCH 03/38] revert --- asv_bench/asv.conf.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json index 962ec6f50808f..7c10a2d17775a 100644 --- a/asv_bench/asv.conf.json +++ b/asv_bench/asv.conf.json @@ -26,7 +26,7 @@ // The Pythons you'd like to test against. If not provided, defaults // to the current version of Python used to run `asv`. // "pythons": ["2.7", "3.4"], - "pythons": ["3.7"], + "pythons": ["3.6"], // The matrix of dependencies to test. Each key is the name of a // package (in PyPI) and the values are version numbers. An empty From bb7e60da1e0932208078277f60970b35bb4a5136 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 12:02:29 -0700 Subject: [PATCH 04/38] ensure initialized --- pandas/_libs/tslibs/timezones.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 66023d2f45165..f8fc7e3d0f2d1 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -228,6 +228,7 @@ cdef TZConvertInfo get_tzconverter(tzinfo tz, const int64_t[:] values): str typ info.use_utc = info.use_tzlocal = info.use_fixed = False + info.delta = NPY_NAT # placeholder if tz is None or is_utc(tz): info.use_utc = True From 407f266d12e892f38c51b5d6d2d779b8aa586a72 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 12:16:34 -0700 Subject: [PATCH 05/38] ensure initialized --- pandas/_libs/tslibs/timezones.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index f8fc7e3d0f2d1..a1260b74a9dec 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -229,6 +229,8 @@ cdef TZConvertInfo get_tzconverter(tzinfo tz, const int64_t[:] values): info.use_utc = info.use_tzlocal = info.use_fixed = False info.delta = NPY_NAT # placeholder + info.utcoffsets = NULL + info.positions = NULL if tz is None or is_utc(tz): info.use_utc = True From 5adca2119a0f04f7e4867ee5860e7578b3681ee9 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 16:22:58 -0700 Subject: [PATCH 06/38] debuggina ssertions --- pandas/_libs/tslibs/resolution.pyx | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 466681a17bac4..078272b3e3542 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -43,6 +43,12 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None): info = get_tzconverter(tz, stamps) + if info.use_fixed: + assert info.delta != NPY_NAT + elif not info.use_utc and not info.use_tzlocal: + assert info.utcoffsets is not NULL + assert info.positions is not NULL + for i in range(n): if stamps[i] == NPY_NAT: continue From 69bfb80825515ffc3db9fd1c53f6489ce74611ba Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 16:23:27 -0700 Subject: [PATCH 07/38] debuggina ssertions --- pandas/_libs/tslibs/resolution.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 078272b3e3542..b647a424ce229 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -41,6 +41,7 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None): int64_t local_val TZConvertInfo info + assert False info = get_tzconverter(tz, stamps) if info.use_fixed: From 13244c882aee71678d0d29aa683db97e7cbd700d Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 16:23:58 -0700 Subject: [PATCH 08/38] debuggina ssertions --- pandas/_libs/tslibs/resolution.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index b647a424ce229..5e20b139c136f 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -41,7 +41,6 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None): int64_t local_val TZConvertInfo info - assert False info = get_tzconverter(tz, stamps) if info.use_fixed: @@ -50,6 +49,7 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None): assert info.utcoffsets is not NULL assert info.positions is not NULL + assert False for i in range(n): if stamps[i] == NPY_NAT: continue From c0f8b34536c869de57484f674962cb46b8e694ac Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 16:25:25 -0700 Subject: [PATCH 09/38] debuggina ssertions --- pandas/_libs/tslibs/resolution.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 5e20b139c136f..26896b47e1cd3 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -49,7 +49,6 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None): assert info.utcoffsets is not NULL assert info.positions is not NULL - assert False for i in range(n): if stamps[i] == NPY_NAT: continue @@ -61,6 +60,7 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None): elif info.use_fixed: local_val = stamps[i] + info.delta else: + assert False local_val = stamps[i] + info.utcoffsets[info.positions[i]] dt64_to_dtstruct(local_val, &dts) From 862090522996726579a577192c0b6b998ff58fbc Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 16:27:32 -0700 Subject: [PATCH 10/38] debuggina ssertions --- pandas/_libs/tslibs/resolution.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 26896b47e1cd3..480838746f27f 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -60,6 +60,7 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None): elif info.use_fixed: local_val = stamps[i] + info.delta else: + pos = info.positions[i] assert False local_val = stamps[i] + info.utcoffsets[info.positions[i]] From 3605695348249a99f9c219a8145e87c053593c91 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 16:28:41 -0700 Subject: [PATCH 11/38] debuggina ssertions --- pandas/_libs/tslibs/resolution.pyx | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 480838746f27f..f6ea2483807aa 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -38,8 +38,9 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None): Py_ssize_t i, n = len(stamps) npy_datetimestruct dts int reso = RESO_DAY, curr_reso - int64_t local_val + int64_t local_val, delta TZConvertInfo info + intp_t pos info = get_tzconverter(tz, stamps) @@ -61,8 +62,9 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None): local_val = stamps[i] + info.delta else: pos = info.positions[i] + delta = info.utcoffsets[pos] assert False - local_val = stamps[i] + info.utcoffsets[info.positions[i]] + local_val = stamps[i] + delta dt64_to_dtstruct(local_val, &dts) curr_reso = _reso_stamp(&dts) From a0eb7879116275052d8d6d1b38d4a62f71960697 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 16:29:07 -0700 Subject: [PATCH 12/38] debuggina ssertions --- pandas/_libs/tslibs/resolution.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index f6ea2483807aa..e32ddca58ad5b 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -1,7 +1,7 @@ from cpython.datetime cimport tzinfo import numpy as np -from numpy cimport ndarray, int64_t, int32_t +from numpy cimport ndarray, int64_t, int32_t, intp_t from pandas._libs.tslibs.util cimport get_nat From c8fcc19cb63d39199cde312cd8e481d210a32c94 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 16:30:07 -0700 Subject: [PATCH 13/38] debuggina ssertions --- pandas/_libs/tslibs/resolution.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index e32ddca58ad5b..1d37aeeec57fd 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -62,6 +62,8 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None): local_val = stamps[i] + info.delta else: pos = info.positions[i] + if pos == -1: + assert False delta = info.utcoffsets[pos] assert False local_val = stamps[i] + delta From db71af589d3a1f2901f6abfaa1b62866ac5fae27 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 16:31:40 -0700 Subject: [PATCH 14/38] debuggina ssertions --- pandas/_libs/tslibs/resolution.pyx | 1 + pandas/_libs/tslibs/timezones.pyx | 3 +++ 2 files changed, 4 insertions(+) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 1d37aeeec57fd..7467eda52eed1 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -62,6 +62,7 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None): local_val = stamps[i] + info.delta else: pos = info.positions[i] + assert info.noffsets > pos, (info.noffsets, pos) if pos == -1: assert False delta = info.utcoffsets[pos] diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index a1260b74a9dec..02e033e5f8f26 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -219,6 +219,7 @@ ctypedef struct TZConvertInfo: int64_t* utcoffsets intp_t* positions int64_t delta + int noffsets cdef TZConvertInfo get_tzconverter(tzinfo tz, const int64_t[:] values): @@ -231,6 +232,7 @@ cdef TZConvertInfo get_tzconverter(tzinfo tz, const int64_t[:] values): info.delta = NPY_NAT # placeholder info.utcoffsets = NULL info.positions = NULL + info.noffsets = 0 if tz is None or is_utc(tz): info.use_utc = True @@ -238,6 +240,7 @@ cdef TZConvertInfo get_tzconverter(tzinfo tz, const int64_t[:] values): info.use_tzlocal = True else: trans, deltas, typ = get_dst_info(tz) + info.noffsets = len(deltas) if typ not in ["pytz", "dateutil"]: # Fixed Offset info.use_fixed = True From e47e490987e91e87373ff4d0dcc2d9f49d9d6834 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 16:31:58 -0700 Subject: [PATCH 15/38] debuggina ssertions --- pandas/_libs/tslibs/timezones.pxd | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd index 4bf6fafbfe1b3..5fdf1c9b0aac6 100644 --- a/pandas/_libs/tslibs/timezones.pxd +++ b/pandas/_libs/tslibs/timezones.pxd @@ -26,5 +26,6 @@ ctypedef struct TZConvertInfo: int64_t* utcoffsets intp_t* positions int64_t delta + int noffsets cdef TZConvertInfo get_tzconverter(tzinfo tz, const int64_t[:] values) From 7f8c717a426c441e0405aff675cdde0473b240d4 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 16:35:41 -0700 Subject: [PATCH 16/38] debuggina ssertions --- pandas/_libs/tslibs/timezones.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 02e033e5f8f26..a585ad3ff47ac 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -248,6 +248,8 @@ cdef TZConvertInfo get_tzconverter(tzinfo tz, const int64_t[:] values): else: info.utcoffsets = cnp.PyArray_DATA(deltas) pos = trans.searchsorted(values, side="right") - 1 + + assert (pos < len(deltas)).all(), (max(pos), len(deltas)) info.positions = cnp.PyArray_DATA(pos) return info From e21cd6ade4e757b1180b4298fb629f3f926d231d Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 16:36:42 -0700 Subject: [PATCH 17/38] debuggina ssertions --- pandas/_libs/tslibs/timezones.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index a585ad3ff47ac..5c8ad477b9418 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -226,6 +226,7 @@ cdef TZConvertInfo get_tzconverter(tzinfo tz, const int64_t[:] values): cdef: TZConvertInfo info ndarray[int64_t, ndim=1] deltas, trans, pos + ndarray[intp_t, ndim=1] pos str typ info.use_utc = info.use_tzlocal = info.use_fixed = False From 9a470965854182964459010b32df4dea458f6ec3 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 16:36:59 -0700 Subject: [PATCH 18/38] debuggina ssertions --- pandas/_libs/tslibs/timezones.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 5c8ad477b9418..f8457faf03249 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -225,7 +225,7 @@ ctypedef struct TZConvertInfo: cdef TZConvertInfo get_tzconverter(tzinfo tz, const int64_t[:] values): cdef: TZConvertInfo info - ndarray[int64_t, ndim=1] deltas, trans, pos + ndarray[int64_t, ndim=1] deltas, trans ndarray[intp_t, ndim=1] pos str typ From d6dce1ad9777bcf4edc756318c7b616fcd4b8e54 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 16:38:56 -0700 Subject: [PATCH 19/38] debuggina ssertions --- pandas/_libs/tslibs/timezones.pyx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index f8457faf03249..68933dbc1510c 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -253,6 +253,10 @@ cdef TZConvertInfo get_tzconverter(tzinfo tz, const int64_t[:] values): assert (pos < len(deltas)).all(), (max(pos), len(deltas)) info.positions = cnp.PyArray_DATA(pos) + for i in range(len(values)): + p = info.positions[i] + assert p < info.noffsets, (p, info.noffsets) + return info From 0dbd8ac14a0adcceabaeb6e2bd75c8127448d25d Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 16:41:18 -0700 Subject: [PATCH 20/38] debuggina ssertions --- pandas/_libs/tslibs/resolution.pyx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 7467eda52eed1..99a594a69f900 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -41,6 +41,7 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None): int64_t local_val, delta TZConvertInfo info intp_t pos + ndarray[intp_t, ndim=1] pos2 info = get_tzconverter(tz, stamps) @@ -49,6 +50,9 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None): elif not info.use_utc and not info.use_tzlocal: assert info.utcoffsets is not NULL assert info.positions is not NULL + pos2 = info.positions + assert pos2.max() < info.noffsets, (pos2.max(), info.noffsets) + for i in range(n): if stamps[i] == NPY_NAT: From f9514b4896211956c57f08e4875f6b631fcbae34 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 17:39:19 -0700 Subject: [PATCH 21/38] debuggina ssertions --- pandas/_libs/tslibs/resolution.pyx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 99a594a69f900..4c6d6ac70b8b3 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -50,10 +50,9 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None): elif not info.use_utc and not info.use_tzlocal: assert info.utcoffsets is not NULL assert info.positions is not NULL - pos2 = info.positions + pos2 = np.array(info.positions) assert pos2.max() < info.noffsets, (pos2.max(), info.noffsets) - for i in range(n): if stamps[i] == NPY_NAT: continue From e198dbd301b7463e14fcefed157797bc66c8258c Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 17:40:42 -0700 Subject: [PATCH 22/38] debuggina ssertions --- pandas/_libs/tslibs/resolution.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 4c6d6ac70b8b3..a5ca6515fbff5 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -69,7 +69,6 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None): if pos == -1: assert False delta = info.utcoffsets[pos] - assert False local_val = stamps[i] + delta dt64_to_dtstruct(local_val, &dts) From 681f5b85484b47028b17c74b50699959d7393b84 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 17:49:33 -0700 Subject: [PATCH 23/38] debuggina ssertions --- pandas/_libs/tslibs/resolution.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index a5ca6515fbff5..5ae8695c8e271 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -50,7 +50,7 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None): elif not info.use_utc and not info.use_tzlocal: assert info.utcoffsets is not NULL assert info.positions is not NULL - pos2 = np.array(info.positions) + pos2 = np.array(info.positions) assert pos2.max() < info.noffsets, (pos2.max(), info.noffsets) for i in range(n): From e93b96131dac8e92d2f9b0497110472a50ecb7e6 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 17:51:31 -0700 Subject: [PATCH 24/38] debuggina ssertions --- pandas/_libs/tslibs/resolution.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 5ae8695c8e271..58c691194270c 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -50,7 +50,7 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None): elif not info.use_utc and not info.use_tzlocal: assert info.utcoffsets is not NULL assert info.positions is not NULL - pos2 = np.array(info.positions) + pos2 = np.array(info.positions, dtype=np.intp) assert pos2.max() < info.noffsets, (pos2.max(), info.noffsets) for i in range(n): From 52af5e18353a585ca0abbd7574926667898eb68c Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 17:53:24 -0700 Subject: [PATCH 25/38] debuggina ssertions --- pandas/_libs/tslibs/timezones.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 68933dbc1510c..95c3ed09cd8ff 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -249,6 +249,8 @@ cdef TZConvertInfo get_tzconverter(tzinfo tz, const int64_t[:] values): else: info.utcoffsets = cnp.PyArray_DATA(deltas) pos = trans.searchsorted(values, side="right") - 1 + assert pos.flags["F_CONTIGUOUS"] + assert pos.flags["C_CONTIGUOUS"] assert (pos < len(deltas)).all(), (max(pos), len(deltas)) info.positions = cnp.PyArray_DATA(pos) From 26d0d3aed224e4ba4e6561e85af0aef14c5794f2 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 17:55:34 -0700 Subject: [PATCH 26/38] debuggina ssertions --- pandas/_libs/tslibs/timezones.pyx | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 95c3ed09cd8ff..4abaac9aa0f6d 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -254,6 +254,9 @@ cdef TZConvertInfo get_tzconverter(tzinfo tz, const int64_t[:] values): assert (pos < len(deltas)).all(), (max(pos), len(deltas)) info.positions = cnp.PyArray_DATA(pos) + + pos2 = np.array(info.positions, dtype=np.intp) + assert pos2.max() < info.noffsets, (pos2.max(), info.noffsets) for i in range(len(values)): p = info.positions[i] From 3b220fb7b23f18a9b1eca535189c68912dd3ae44 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 17:56:01 -0700 Subject: [PATCH 27/38] debuggina ssertions --- pandas/_libs/tslibs/timezones.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 4abaac9aa0f6d..e5ce321530587 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -228,6 +228,7 @@ cdef TZConvertInfo get_tzconverter(tzinfo tz, const int64_t[:] values): ndarray[int64_t, ndim=1] deltas, trans ndarray[intp_t, ndim=1] pos str typ + Py_ssize_t n = len(values) info.use_utc = info.use_tzlocal = info.use_fixed = False info.delta = NPY_NAT # placeholder From 3c1bf60f46cd6961bdc67347907dd6e4f2e826c1 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 17:59:48 -0700 Subject: [PATCH 28/38] debuggina ssertions --- pandas/_libs/tslibs/resolution.pyx | 8 ++++++++ pandas/_libs/tslibs/timezones.pxd | 1 + pandas/_libs/tslibs/timezones.pyx | 5 ++++- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 58c691194270c..cff7e4461892a 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -51,6 +51,14 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None): assert info.utcoffsets is not NULL assert info.positions is not NULL pos2 = np.array(info.positions, dtype=np.intp) + + for i in range(n): + v1 = info.positions[i] + v2 = info.positions2[i] + assert v1 == v2, (v1, v2) + v3 = pos2[i] + assert v3 == v2, (v3, v2) + assert pos2.max() < info.noffsets, (pos2.max(), info.noffsets) for i in range(n): diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd index 5fdf1c9b0aac6..680802444eff6 100644 --- a/pandas/_libs/tslibs/timezones.pxd +++ b/pandas/_libs/tslibs/timezones.pxd @@ -25,6 +25,7 @@ ctypedef struct TZConvertInfo: bint use_fixed int64_t* utcoffsets intp_t* positions + intp_t[:] positions2 int64_t delta int noffsets diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index e5ce321530587..8375afe514788 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -218,6 +218,7 @@ ctypedef struct TZConvertInfo: bint use_fixed int64_t* utcoffsets intp_t* positions + intp_t[:] positions2 int64_t delta int noffsets @@ -255,12 +256,14 @@ cdef TZConvertInfo get_tzconverter(tzinfo tz, const int64_t[:] values): assert (pos < len(deltas)).all(), (max(pos), len(deltas)) info.positions = cnp.PyArray_DATA(pos) - + info.positions2 = pos + pos2 = np.array(info.positions, dtype=np.intp) assert pos2.max() < info.noffsets, (pos2.max(), info.noffsets) for i in range(len(values)): p = info.positions[i] + assert p == info.positions2[i] assert p < info.noffsets, (p, info.noffsets) return info From a70ce3fc87f8d4bfba89b4ac72b4a9de36aa5b5a Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 18:00:41 -0700 Subject: [PATCH 29/38] debuggina ssertions --- pandas/_libs/tslibs/timezones.pxd | 2 +- pandas/_libs/tslibs/timezones.pyx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd index 680802444eff6..fa7742b861185 100644 --- a/pandas/_libs/tslibs/timezones.pxd +++ b/pandas/_libs/tslibs/timezones.pxd @@ -25,7 +25,7 @@ ctypedef struct TZConvertInfo: bint use_fixed int64_t* utcoffsets intp_t* positions - intp_t[:] positions2 + ndarray[intp_t] positions2 int64_t delta int noffsets diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 8375afe514788..901a205654b83 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -218,7 +218,7 @@ ctypedef struct TZConvertInfo: bint use_fixed int64_t* utcoffsets intp_t* positions - intp_t[:] positions2 + ndarray[intp_t] positions2 int64_t delta int noffsets From 2478ec31c6c5fb25cc0fa178b74038c9a4c4f2c2 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 18:01:40 -0700 Subject: [PATCH 30/38] debuggina ssertions --- pandas/_libs/tslibs/resolution.pyx | 4 +--- pandas/_libs/tslibs/timezones.pxd | 1 - pandas/_libs/tslibs/timezones.pyx | 3 --- 3 files changed, 1 insertion(+), 7 deletions(-) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index cff7e4461892a..cf1680f5ae618 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -54,10 +54,8 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None): for i in range(n): v1 = info.positions[i] - v2 = info.positions2[i] + v2 = pos2[i] assert v1 == v2, (v1, v2) - v3 = pos2[i] - assert v3 == v2, (v3, v2) assert pos2.max() < info.noffsets, (pos2.max(), info.noffsets) diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd index fa7742b861185..5fdf1c9b0aac6 100644 --- a/pandas/_libs/tslibs/timezones.pxd +++ b/pandas/_libs/tslibs/timezones.pxd @@ -25,7 +25,6 @@ ctypedef struct TZConvertInfo: bint use_fixed int64_t* utcoffsets intp_t* positions - ndarray[intp_t] positions2 int64_t delta int noffsets diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 901a205654b83..e94aeb47fe1e1 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -218,7 +218,6 @@ ctypedef struct TZConvertInfo: bint use_fixed int64_t* utcoffsets intp_t* positions - ndarray[intp_t] positions2 int64_t delta int noffsets @@ -256,14 +255,12 @@ cdef TZConvertInfo get_tzconverter(tzinfo tz, const int64_t[:] values): assert (pos < len(deltas)).all(), (max(pos), len(deltas)) info.positions = cnp.PyArray_DATA(pos) - info.positions2 = pos pos2 = np.array(info.positions, dtype=np.intp) assert pos2.max() < info.noffsets, (pos2.max(), info.noffsets) for i in range(len(values)): p = info.positions[i] - assert p == info.positions2[i] assert p < info.noffsets, (p, info.noffsets) return info From 21cbfc2ceb10a4fc524505f6c9fee1a64fa0b4c6 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 18:03:33 -0700 Subject: [PATCH 31/38] debuggina ssertions --- pandas/_libs/tslibs/resolution.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index cf1680f5ae618..2eec1cf8d8b03 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -56,8 +56,8 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None): v1 = info.positions[i] v2 = pos2[i] assert v1 == v2, (v1, v2) - - assert pos2.max() < info.noffsets, (pos2.max(), info.noffsets) + assert v1 < info.noffsets, (v1, info.noffsets) + #assert pos2.max() < info.noffsets, (pos2.max(), info.noffsets) for i in range(n): if stamps[i] == NPY_NAT: From 2aa256aa0f63fd253f2018d25031dcffb8ad50b0 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 18:05:51 -0700 Subject: [PATCH 32/38] debuggina ssertions --- pandas/_libs/tslibs/timezones.pyx | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index e94aeb47fe1e1..5fa9435e6ed08 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -253,13 +253,14 @@ cdef TZConvertInfo get_tzconverter(tzinfo tz, const int64_t[:] values): assert pos.flags["F_CONTIGUOUS"] assert pos.flags["C_CONTIGUOUS"] - assert (pos < len(deltas)).all(), (max(pos), len(deltas)) + assert (pos.max() < info.noffsets), (pos.max(), info.noffsets) + assert (pos < info.noffsets).all(), (max(pos), info.noffsets) info.positions = cnp.PyArray_DATA(pos) pos2 = np.array(info.positions, dtype=np.intp) assert pos2.max() < info.noffsets, (pos2.max(), info.noffsets) - for i in range(len(values)): + for i in range(n): p = info.positions[i] assert p < info.noffsets, (p, info.noffsets) From 7669dc26414afd819c53582c84e1de473240bef5 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 18:06:58 -0700 Subject: [PATCH 33/38] debuggina ssertions --- pandas/_libs/tslibs/resolution.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 2eec1cf8d8b03..bf3c67a03ab57 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -56,7 +56,7 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None): v1 = info.positions[i] v2 = pos2[i] assert v1 == v2, (v1, v2) - assert v1 < info.noffsets, (v1, info.noffsets) + assert v1 < info.noffsets, (v1, info.noffsets, i, values[i]) #assert pos2.max() < info.noffsets, (pos2.max(), info.noffsets) for i in range(n): From 2bfb9dfc87312c7b796d683d78a6563600830cbc Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 18:07:11 -0700 Subject: [PATCH 34/38] debuggina ssertions --- pandas/_libs/tslibs/resolution.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index bf3c67a03ab57..74f44566452d6 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -56,7 +56,7 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None): v1 = info.positions[i] v2 = pos2[i] assert v1 == v2, (v1, v2) - assert v1 < info.noffsets, (v1, info.noffsets, i, values[i]) + assert v1 < info.noffsets, (v1, info.noffsets, i, stamps[i]) #assert pos2.max() < info.noffsets, (pos2.max(), info.noffsets) for i in range(n): From 3dd295739a72ca1b242ce21e3836b0fd261199f7 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 18:47:27 -0700 Subject: [PATCH 35/38] CLN --- pandas/_libs/tslibs/resolution.pyx | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 74f44566452d6..6cf792e89a0ba 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -38,9 +38,8 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None): Py_ssize_t i, n = len(stamps) npy_datetimestruct dts int reso = RESO_DAY, curr_reso - int64_t local_val, delta + int64_t local_val TZConvertInfo info - intp_t pos ndarray[intp_t, ndim=1] pos2 info = get_tzconverter(tz, stamps) @@ -51,13 +50,12 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None): assert info.utcoffsets is not NULL assert info.positions is not NULL pos2 = np.array(info.positions, dtype=np.intp) - for i in range(n): v1 = info.positions[i] v2 = pos2[i] assert v1 == v2, (v1, v2) assert v1 < info.noffsets, (v1, info.noffsets, i, stamps[i]) - #assert pos2.max() < info.noffsets, (pos2.max(), info.noffsets) + assert pos2.max() < info.noffsets, (pos2.max(), info.noffsets) for i in range(n): if stamps[i] == NPY_NAT: @@ -70,12 +68,7 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None): elif info.use_fixed: local_val = stamps[i] + info.delta else: - pos = info.positions[i] - assert info.noffsets > pos, (info.noffsets, pos) - if pos == -1: - assert False - delta = info.utcoffsets[pos] - local_val = stamps[i] + delta + local_val = stamps[i] + info.utcoffsets[info.positions[i]] dt64_to_dtstruct(local_val, &dts) curr_reso = _reso_stamp(&dts) From b31c40e67ae1be0e58324276758e6394a8a3857b Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Jul 2020 18:53:18 -0700 Subject: [PATCH 36/38] debugging assertions --- pandas/_libs/tslibs/resolution.pyx | 10 +++++----- pandas/_libs/tslibs/timezones.pxd | 4 ++-- pandas/_libs/tslibs/timezones.pyx | 14 ++++++++------ 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 6cf792e89a0ba..615e13277c29a 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -40,7 +40,7 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None): int reso = RESO_DAY, curr_reso int64_t local_val TZConvertInfo info - ndarray[intp_t, ndim=1] pos2 + #ndarray[intp_t, ndim=1] pos2 info = get_tzconverter(tz, stamps) @@ -49,13 +49,13 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None): elif not info.use_utc and not info.use_tzlocal: assert info.utcoffsets is not NULL assert info.positions is not NULL - pos2 = np.array(info.positions, dtype=np.intp) + #pos2 = np.array(info.positions, dtype=np.intp) for i in range(n): v1 = info.positions[i] - v2 = pos2[i] - assert v1 == v2, (v1, v2) + #v2 = pos2[i] + #assert v1 == v2, (v1, v2) assert v1 < info.noffsets, (v1, info.noffsets, i, stamps[i]) - assert pos2.max() < info.noffsets, (pos2.max(), info.noffsets) + #assert pos2.max() < info.noffsets, (pos2.max(), info.noffsets) for i in range(n): if stamps[i] == NPY_NAT: diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd index 5fdf1c9b0aac6..40226011a2a18 100644 --- a/pandas/_libs/tslibs/timezones.pxd +++ b/pandas/_libs/tslibs/timezones.pxd @@ -23,8 +23,8 @@ ctypedef struct TZConvertInfo: bint use_utc bint use_tzlocal bint use_fixed - int64_t* utcoffsets - intp_t* positions + ndarray[int64_t, ndim=1]* utcoffsets + ndarray[intp_t, ndim=1]* positions int64_t delta int noffsets diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 5fa9435e6ed08..50221b0d9b427 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -216,8 +216,8 @@ ctypedef struct TZConvertInfo: bint use_utc bint use_tzlocal bint use_fixed - int64_t* utcoffsets - intp_t* positions + ndarray[int64_t, ndim=1]* utcoffsets + ndarray[intp_t, ndim=1]* positions int64_t delta int noffsets @@ -248,17 +248,19 @@ cdef TZConvertInfo get_tzconverter(tzinfo tz, const int64_t[:] values): info.use_fixed = True info.delta = deltas[0] else: - info.utcoffsets = cnp.PyArray_DATA(deltas) + info.utcoffsets = deltas + #info.utcoffsets = cnp.PyArray_DATA(deltas) pos = trans.searchsorted(values, side="right") - 1 assert pos.flags["F_CONTIGUOUS"] assert pos.flags["C_CONTIGUOUS"] assert (pos.max() < info.noffsets), (pos.max(), info.noffsets) assert (pos < info.noffsets).all(), (max(pos), info.noffsets) - info.positions = cnp.PyArray_DATA(pos) + #info.positions = cnp.PyArray_DATA(pos) + info.positions = pos - pos2 = np.array(info.positions, dtype=np.intp) - assert pos2.max() < info.noffsets, (pos2.max(), info.noffsets) + #pos2 = np.array(info.positions, dtype=np.intp) + #assert pos2.max() < info.noffsets, (pos2.max(), info.noffsets) for i in range(n): p = info.positions[i] From e5b73c78454744eff50904acb00927065837a416 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 7 Jul 2020 12:53:03 -0700 Subject: [PATCH 37/38] Implement TZ --- pandas/_libs/tslibs/resolution.pyx | 29 +------- pandas/_libs/tslibs/timezones.pxd | 21 +++--- pandas/_libs/tslibs/timezones.pyx | 106 ++++++++++++++--------------- 3 files changed, 66 insertions(+), 90 deletions(-) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 615e13277c29a..ec86837bd0639 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -8,7 +8,7 @@ from pandas._libs.tslibs.util cimport get_nat from pandas._libs.tslibs.dtypes import Resolution from pandas._libs.tslibs.np_datetime cimport ( npy_datetimestruct, dt64_to_dtstruct) -from pandas._libs.tslibs.timezones cimport get_tzconverter, TZConvertInfo +from pandas._libs.tslibs.timezones cimport TZ from pandas._libs.tslibs.ccalendar cimport get_days_in_month from pandas._libs.tslibs.tzconversion cimport tz_convert_utc_to_tzlocal @@ -39,36 +39,13 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None): npy_datetimestruct dts int reso = RESO_DAY, curr_reso int64_t local_val - TZConvertInfo info - #ndarray[intp_t, ndim=1] pos2 - - info = get_tzconverter(tz, stamps) - - if info.use_fixed: - assert info.delta != NPY_NAT - elif not info.use_utc and not info.use_tzlocal: - assert info.utcoffsets is not NULL - assert info.positions is not NULL - #pos2 = np.array(info.positions, dtype=np.intp) - for i in range(n): - v1 = info.positions[i] - #v2 = pos2[i] - #assert v1 == v2, (v1, v2) - assert v1 < info.noffsets, (v1, info.noffsets, i, stamps[i]) - #assert pos2.max() < info.noffsets, (pos2.max(), info.noffsets) + TZ localizer = TZ(tz, stamps) for i in range(n): if stamps[i] == NPY_NAT: continue - if info.use_utc: - local_val = stamps[i] - elif info.use_tzlocal: - local_val = tz_convert_utc_to_tzlocal(stamps[i], tz) - elif info.use_fixed: - local_val = stamps[i] + info.delta - else: - local_val = stamps[i] + info.utcoffsets[info.positions[i]] + local_val = localizer.get_local_timestamp(stamps[i], i) dt64_to_dtstruct(local_val, &dts) curr_reso = _reso_stamp(&dts) diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd index 69b567465d7eb..450976492d1eb 100644 --- a/pandas/_libs/tslibs/timezones.pxd +++ b/pandas/_libs/tslibs/timezones.pxd @@ -19,13 +19,14 @@ cdef bint is_fixed_offset(tzinfo tz) cdef object get_dst_info(tzinfo tz) -ctypedef struct TZConvertInfo: - bint use_utc - bint use_tzlocal - bint use_fixed - ndarray[int64_t, ndim=1]* utcoffsets - ndarray[intp_t, ndim=1]* positions - int64_t delta - int noffsets - -cdef TZConvertInfo get_tzconverter(tzinfo tz, const int64_t[:] values) +cdef class TZ: + cdef: + bint use_utc, use_tzlocal, use_fixed, use_pytz + int noffsets + int64_t* utcoffsets + intp_t* positions + ndarray positions_arr # needed to avoid segfault + int64_t delta + tzinfo tz + + cdef inline int64_t get_local_timestamp(self, int64_t utc_value, Py_ssize_t i) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index b46f2997d7998..9e39a92a83baf 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -17,7 +17,7 @@ UTC = pytz.utc import numpy as np cimport numpy as cnp -from numpy cimport int64_t, intp_t +from numpy cimport int64_t, intp_t, ndarray cnp.import_array() # ---------------------------------------------------------------------- @@ -209,61 +209,59 @@ cdef ndarray[int64_t, ndim=1] unbox_utcoffsets(object transinfo): # ---------------------------------------------------------------------- # Daylight Savings -ctypedef struct TZConvertInfo: - bint use_utc - bint use_tzlocal - bint use_fixed - ndarray[int64_t, ndim=1]* utcoffsets - ndarray[intp_t, ndim=1]* positions - int64_t delta - int noffsets - - -cdef TZConvertInfo get_tzconverter(tzinfo tz, const int64_t[:] values): +cdef class TZ: cdef: - TZConvertInfo info - ndarray[int64_t, ndim=1] deltas, trans - ndarray[intp_t, ndim=1] pos - str typ - Py_ssize_t n = len(values) - - info.use_utc = info.use_tzlocal = info.use_fixed = False - info.delta = NPY_NAT # placeholder - info.utcoffsets = NULL - info.positions = NULL - info.noffsets = 0 - - if tz is None or is_utc(tz): - info.use_utc = True - elif is_tzlocal(tz): - info.use_tzlocal = True - else: - trans, deltas, typ = get_dst_info(tz) - info.noffsets = len(deltas) - if typ not in ["pytz", "dateutil"]: - # Fixed Offset - info.use_fixed = True - info.delta = deltas[0] + bint use_utc, use_tzlocal, use_fixed, use_pytz + int noffsets + int64_t* utcoffsets + intp_t* positions + ndarray positions_arr # needed to avoid segfault + int64_t delta + tzinfo tz + + def __cinit__(self, tzinfo tz, int64_t[:] values): + cdef: + ndarray[intp_t, ndim=1] pos + ndarray[int64_t, ndim=1] deltas + + self.use_utc = self.use_tzlocal = self.use_fixed = self.use_pytz = False + self.delta = NPY_NAT # placeholder + self.utcoffsets = NULL + self.positions = NULL + self.noffsets = 0 + self.tz = tz + + if tz is None or is_utc(tz): + self.use_utc = True + elif is_tzlocal(tz): + self.use_tzlocal = True else: - info.utcoffsets = deltas - #info.utcoffsets = cnp.PyArray_DATA(deltas) - pos = trans.searchsorted(values, side="right") - 1 - assert pos.flags["F_CONTIGUOUS"] - assert pos.flags["C_CONTIGUOUS"] - - assert (pos.max() < info.noffsets), (pos.max(), info.noffsets) - assert (pos < info.noffsets).all(), (max(pos), info.noffsets) - #info.positions = cnp.PyArray_DATA(pos) - info.positions = pos - - #pos2 = np.array(info.positions, dtype=np.intp) - #assert pos2.max() < info.noffsets, (pos2.max(), info.noffsets) - - for i in range(n): - p = info.positions[i] - assert p < info.noffsets, (p, info.noffsets) - - return info + trans, deltas, typ = get_dst_info(tz) + self.noffsets = len(deltas) + if typ not in ["pytz", "dateutil"]: + # Fixed Offset + self.use_fixed = True + self.delta = deltas[0] + else: + self.utcoffsets = deltas.data + pos = trans.searchsorted(values, side="right") - 1 + self.positions_arr = pos + self.positions = pos.data + self.use_pytz = typ == "pytz" + + cdef inline int64_t get_local_timestamp(self, int64_t utc_value, Py_ssize_t i): + cdef: + int64_t local_val + + if self.use_utc: + local_val = utc_value + elif self.use_tzlocal: + local_val = tz_convert_utc_to_tzlocal(utc_value, self.tz) + elif self.use_fixed: + local_val = utc_value + self.delta + else: + local_val = utc_value + self.utcoffsets[self.positions[i]] + return local_val cdef object get_dst_info(tzinfo tz): From 184e188f1e5317b2762c898d82d6464ae2367cdf Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 7 Jul 2020 13:02:34 -0700 Subject: [PATCH 38/38] implement as cdef class --- pandas/_libs/tslibs/resolution.pyx | 5 +-- pandas/_libs/tslibs/timezones.pxd | 13 ------- pandas/_libs/tslibs/timezones.pyx | 55 ---------------------------- pandas/_libs/tslibs/tzconversion.pxd | 15 +++++++- pandas/_libs/tslibs/tzconversion.pyx | 55 ++++++++++++++++++++++++++++ 5 files changed, 71 insertions(+), 72 deletions(-) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index ec86837bd0639..d6c78cfe27ea0 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -8,9 +8,8 @@ from pandas._libs.tslibs.util cimport get_nat from pandas._libs.tslibs.dtypes import Resolution from pandas._libs.tslibs.np_datetime cimport ( npy_datetimestruct, dt64_to_dtstruct) -from pandas._libs.tslibs.timezones cimport TZ from pandas._libs.tslibs.ccalendar cimport get_days_in_month -from pandas._libs.tslibs.tzconversion cimport tz_convert_utc_to_tzlocal +from pandas._libs.tslibs.tzconversion cimport Localizer # ---------------------------------------------------------------------- # Constants @@ -39,7 +38,7 @@ def get_resolution(const int64_t[:] stamps, tzinfo tz=None): npy_datetimestruct dts int reso = RESO_DAY, curr_reso int64_t local_val - TZ localizer = TZ(tz, stamps) + Localizer localizer = Localizer(tz, stamps) for i in range(n): if stamps[i] == NPY_NAT: diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd index 450976492d1eb..aff62fa9bba52 100644 --- a/pandas/_libs/tslibs/timezones.pxd +++ b/pandas/_libs/tslibs/timezones.pxd @@ -17,16 +17,3 @@ cdef timedelta get_utcoffset(tzinfo tz, datetime obj) cdef bint is_fixed_offset(tzinfo tz) cdef object get_dst_info(tzinfo tz) - - -cdef class TZ: - cdef: - bint use_utc, use_tzlocal, use_fixed, use_pytz - int noffsets - int64_t* utcoffsets - intp_t* positions - ndarray positions_arr # needed to avoid segfault - int64_t delta - tzinfo tz - - cdef inline int64_t get_local_timestamp(self, int64_t utc_value, Py_ssize_t i) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 9e39a92a83baf..8a6b7e0743e90 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -209,61 +209,6 @@ cdef ndarray[int64_t, ndim=1] unbox_utcoffsets(object transinfo): # ---------------------------------------------------------------------- # Daylight Savings -cdef class TZ: - cdef: - bint use_utc, use_tzlocal, use_fixed, use_pytz - int noffsets - int64_t* utcoffsets - intp_t* positions - ndarray positions_arr # needed to avoid segfault - int64_t delta - tzinfo tz - - def __cinit__(self, tzinfo tz, int64_t[:] values): - cdef: - ndarray[intp_t, ndim=1] pos - ndarray[int64_t, ndim=1] deltas - - self.use_utc = self.use_tzlocal = self.use_fixed = self.use_pytz = False - self.delta = NPY_NAT # placeholder - self.utcoffsets = NULL - self.positions = NULL - self.noffsets = 0 - self.tz = tz - - if tz is None or is_utc(tz): - self.use_utc = True - elif is_tzlocal(tz): - self.use_tzlocal = True - else: - trans, deltas, typ = get_dst_info(tz) - self.noffsets = len(deltas) - if typ not in ["pytz", "dateutil"]: - # Fixed Offset - self.use_fixed = True - self.delta = deltas[0] - else: - self.utcoffsets = deltas.data - pos = trans.searchsorted(values, side="right") - 1 - self.positions_arr = pos - self.positions = pos.data - self.use_pytz = typ == "pytz" - - cdef inline int64_t get_local_timestamp(self, int64_t utc_value, Py_ssize_t i): - cdef: - int64_t local_val - - if self.use_utc: - local_val = utc_value - elif self.use_tzlocal: - local_val = tz_convert_utc_to_tzlocal(utc_value, self.tz) - elif self.use_fixed: - local_val = utc_value + self.delta - else: - local_val = utc_value + self.utcoffsets[self.positions[i]] - return local_val - - cdef object get_dst_info(tzinfo tz): """ Returns diff --git a/pandas/_libs/tslibs/tzconversion.pxd b/pandas/_libs/tslibs/tzconversion.pxd index 7d102868256de..3e1cbc9df0010 100644 --- a/pandas/_libs/tslibs/tzconversion.pxd +++ b/pandas/_libs/tslibs/tzconversion.pxd @@ -1,5 +1,5 @@ from cpython.datetime cimport tzinfo -from numpy cimport int64_t +from numpy cimport int64_t, intp_t, ndarray cdef int64_t tz_convert_utc_to_tzlocal(int64_t utc_val, tzinfo tz, bint* fold=*) @@ -7,3 +7,16 @@ cpdef int64_t tz_convert_single(int64_t val, tzinfo tz1, tzinfo tz2) cdef int64_t tz_localize_to_utc_single( int64_t val, tzinfo tz, object ambiguous=*, object nonexistent=* ) except? -1 + + +cdef class Localizer: + cdef: + bint use_utc, use_tzlocal, use_fixed, use_pytz + int noffsets + int64_t* utcoffsets + intp_t* positions + ndarray positions_arr # needed to avoid segfault + int64_t delta + tzinfo tz + + cdef inline int64_t get_local_timestamp(self, int64_t utc_value, Py_ssize_t i) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 98c40e109dbab..45dc2b1de39c3 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -29,6 +29,61 @@ from pandas._libs.tslibs.timezones cimport ( ) +cdef class Localizer: + # cdef: + # bint use_utc, use_tzlocal, use_fixed, use_pytz + # int noffsets + # int64_t* utcoffsets + # intp_t* positions + # ndarray positions_arr # needed to avoid segfault + # int64_t delta + # tzinfo tz + + def __cinit__(self, tzinfo tz, int64_t[:] values): + cdef: + ndarray[intp_t, ndim=1] pos + ndarray[int64_t, ndim=1] deltas + + self.use_utc = self.use_tzlocal = self.use_fixed = self.use_pytz = False + self.delta = NPY_NAT # placeholder + self.utcoffsets = NULL + self.positions = NULL + self.noffsets = 0 + self.tz = tz + + if tz is None or is_utc(tz): + self.use_utc = True + elif is_tzlocal(tz): + self.use_tzlocal = True + else: + trans, deltas, typ = get_dst_info(tz) + self.noffsets = len(deltas) + if typ not in ["pytz", "dateutil"]: + # Fixed Offset + self.use_fixed = True + self.delta = deltas[0] + else: + self.utcoffsets = deltas.data + pos = trans.searchsorted(values, side="right") - 1 + self.positions_arr = pos + self.positions = pos.data + self.use_pytz = typ == "pytz" + + cdef inline int64_t get_local_timestamp(self, int64_t utc_value, Py_ssize_t i): + cdef: + int64_t local_val + + if self.use_utc: + local_val = utc_value + elif self.use_tzlocal: + local_val = tz_convert_utc_to_tzlocal(utc_value, self.tz) + elif self.use_fixed: + local_val = utc_value + self.delta + else: + local_val = utc_value + self.utcoffsets[self.positions[i]] + return local_val + + cdef int64_t tz_localize_to_utc_single( int64_t val, tzinfo tz, object ambiguous=None, object nonexistent=None, ) except? -1: