From 1932996606ca82ef3ee8780c5b89921774e0debe Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 25 Sep 2017 11:05:24 -0700 Subject: [PATCH 01/11] move infer_tzinfo from tools.datetime to tslibs.timezones refactor a large timezone-specific chunk of tslib.tz_localize_to_utc to tslibs.timezones._infer_dst --- pandas/_libs/tslib.pyx | 43 +--------------- pandas/_libs/tslibs/timezones.pxd | 6 ++- pandas/_libs/tslibs/timezones.pyx | 81 +++++++++++++++++++++++++++++++ pandas/core/tools/datetimes.py | 18 +------ 4 files changed, 88 insertions(+), 60 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index d4ca5af09367e..3f6a4cb06fd63 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -4003,48 +4003,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, result_b[i] = v if infer_dst: - dst_hours = np.empty(n, dtype=np.int64) - dst_hours.fill(NPY_NAT) - - # Get the ambiguous hours (given the above, these are the hours - # where result_a != result_b and neither of them are NAT) - both_nat = np.logical_and(result_a != NPY_NAT, result_b != NPY_NAT) - both_eq = result_a == result_b - trans_idx = np.squeeze(np.nonzero(np.logical_and(both_nat, ~both_eq))) - if trans_idx.size == 1: - stamp = Timestamp(vals[trans_idx]) - raise pytz.AmbiguousTimeError( - "Cannot infer dst time from %s as there " - "are no repeated times" % stamp) - # Split the array into contiguous chunks (where the difference between - # indices is 1). These are effectively dst transitions in different - # years which is useful for checking that there is not an ambiguous - # transition in an individual year. - if trans_idx.size > 0: - one_diff = np.where(np.diff(trans_idx) != 1)[0] +1 - trans_grp = np.array_split(trans_idx, one_diff) - - # Iterate through each day, if there are no hours where the - # delta is negative (indicates a repeat of hour) the switch - # cannot be inferred - for grp in trans_grp: - - delta = np.diff(result_a[grp]) - if grp.size == 1 or np.all(delta > 0): - stamp = Timestamp(vals[grp[0]]) - raise pytz.AmbiguousTimeError(stamp) - - # Find the index for the switch and pull from a for dst and b - # for standard - switch_idx = (delta <= 0).nonzero()[0] - if switch_idx.size > 1: - raise pytz.AmbiguousTimeError( - "There are %i dst switches when " - "there should only be 1." % switch_idx.size) - switch_idx = switch_idx[0] + 1 # Pull the only index and adjust - a_idx = grp[:switch_idx] - b_idx = grp[switch_idx:] - dst_hours[grp] = np.hstack((result_a[a_idx], result_b[b_idx])) + dst_hours = _infer_dst(vals, result_a, result_b) for i in range(n): left = result_a[i] diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd index e5d1343e1c984..a70ce5d40e625 100644 --- a/pandas/_libs/tslibs/timezones.pxd +++ b/pandas/_libs/tslibs/timezones.pxd @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # cython: profile=False -from numpy cimport ndarray +from numpy cimport ndarray, int64_t cdef bint is_utc(object tz) cdef bint is_tzlocal(object tz) @@ -16,3 +16,7 @@ cpdef get_utcoffset(tzinfo, obj) cdef bint is_fixed_offset(object tz) cdef object get_dst_info(object tz) + +cdef ndarray[int64_t] _infer_dst(ndarray[int64_t] vals, + ndarray[int64_t] result_a, + ndarray[int64_t] result_b) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 48d82996a0bd0..f9a9cc1bf3d03 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -275,3 +275,84 @@ cdef object get_dst_info(object tz): dst_cache[cache_key] = (trans, deltas, typ) return dst_cache[cache_key] + + +def _infer_tzinfo(start, end): + def _infer(a, b): + tz = a.tzinfo + if b and b.tzinfo: + if not (get_timezone(tz) == get_timezone(b.tzinfo)): + raise AssertionError('Inputs must both have the same timezone,' + ' {timezone1} != {timezone2}' + .format(timezone1=tz, timezone2=b.tzinfo)) + return tz + + tz = None + if start is not None: + tz = _infer(start, end) + elif end is not None: + tz = _infer(end, start) + return tz + + +cdef ndarray[int64_t] _infer_dst(ndarray[int64_t] vals, + ndarray[int64_t] result_a, + ndarray[int64_t] result_b): + cdef: + Py_ssize_t n = len(vals) + ndarray[int64_t] dst_hours + + dst_hours = np.empty(n, dtype=np.int64) + dst_hours.fill(NPY_NAT) + + # Get the ambiguous hours (given the above, these are the hours + # where result_a != result_b and neither of them are NAT) + both_nat = np.logical_and(result_a != NPY_NAT, result_b != NPY_NAT) + both_eq = result_a == result_b + trans_idx = np.squeeze(np.nonzero(np.logical_and(both_nat, ~both_eq))) + if trans_idx.size == 1: + stamp = np.int64(vals[trans_idx]).astype('datetime64[ns]') + # Render `stamp` as e.g. '2017-08-30 07:59:23.123456' + # as opposed to str(stamp) which would + # be '2017-08-30T07:59:23.123456789' + stamp = str(stamp).replace('T', ' ')[:-3] + raise pytz.AmbiguousTimeError( + "Cannot infer dst time from %s as there " + "are no repeated times" % stamp) + + # Split the array into contiguous chunks (where the difference between + # indices is 1). These are effectively dst transitions in different + # years which is useful for checking that there is not an ambiguous + # transition in an individual year. + if trans_idx.size > 0: + one_diff = np.where(np.diff(trans_idx) != 1)[0] +1 + trans_grp = np.array_split(trans_idx, one_diff) + + # Iterate through each day, if there are no hours where the + # delta is negative (indicates a repeat of hour) the switch + # cannot be inferred + for grp in trans_grp: + + delta = np.diff(result_a[grp]) + if grp.size == 1 or np.all(delta > 0): + stamp = np.int64(vals[grp[0]]).astype('datetime64[ns]') + # Render `stamp` as e.g. '2017-08-30 07:59:23.123456' + # as opposed to str(stamp) which would + # be '2017-08-30T07:59:23.123456789' + stamp = str(stamp).replace('T', ' ')[:-3] + raise pytz.AmbiguousTimeError(stamp) + + # Find the index for the switch and pull from a for dst and b + # for standard + switch_idx = (delta <= 0).nonzero()[0] + if switch_idx.size > 1: + raise pytz.AmbiguousTimeError( + "There are %i dst switches when " + "there should only be 1." % switch_idx.size) + + switch_idx = switch_idx[0] + 1 # Pull the only index and adjust + a_idx = grp[:switch_idx] + b_idx = grp[switch_idx:] + dst_hours[grp] = np.hstack((result_a[a_idx], result_b[b_idx])) + + return dst_hours diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index bf89509fd1746..74409bb64c92e 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -4,7 +4,7 @@ from pandas._libs import lib, tslib from pandas._libs.tslibs.strptime import array_strptime -from pandas._libs.tslibs.timezones import get_timezone +from pandas._libs.tslibs.timezones import get_timezone, _infer_tzinfo # noqa from pandas.core.dtypes.common import ( _ensure_object, @@ -42,22 +42,6 @@ def _lexer_split_from_str(dt_str): pass -def _infer_tzinfo(start, end): - def _infer(a, b): - tz = a.tzinfo - if b and b.tzinfo: - if not (get_timezone(tz) == get_timezone(b.tzinfo)): - raise AssertionError('Inputs must both have the same timezone,' - ' {timezone1} != {timezone2}' - .format(timezone1=tz, timezone2=b.tzinfo)) - return tz - - tz = None - if start is not None: - tz = _infer(start, end) - elif end is not None: - tz = _infer(end, start) - return tz def _guess_datetime_format(dt_str, dayfirst=False, From 6ccbfb241390e2f80df1908660a886862fe7c7ef Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 25 Sep 2017 11:12:34 -0700 Subject: [PATCH 02/11] whitespace fixup --- pandas/_libs/tslibs/timezones.pxd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd index a70ce5d40e625..f979cc9ad705d 100644 --- a/pandas/_libs/tslibs/timezones.pxd +++ b/pandas/_libs/tslibs/timezones.pxd @@ -18,5 +18,5 @@ cdef bint is_fixed_offset(object tz) cdef object get_dst_info(object tz) cdef ndarray[int64_t] _infer_dst(ndarray[int64_t] vals, - ndarray[int64_t] result_a, - ndarray[int64_t] result_b) + ndarray[int64_t] result_a, + ndarray[int64_t] result_b) From adf909919b69cabd60343c3d909dd663b0d0dc7d Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 25 Sep 2017 14:36:31 -0700 Subject: [PATCH 03/11] fixup add missing cimport --- pandas/_libs/tslib.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 3f6a4cb06fd63..0d40a26023534 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -105,7 +105,7 @@ from tslibs.timezones cimport ( is_utc, is_tzlocal, is_fixed_offset, treat_tz_as_dateutil, treat_tz_as_pytz, get_timezone, get_utcoffset, maybe_get_tz, - get_dst_info + get_dst_info, _infer_dst ) From fcd26c1d518eb6821b0355f5e8ef88bd9ad0297f Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 25 Sep 2017 15:39:54 -0700 Subject: [PATCH 04/11] de-privatize infer_tzinfo, update imports --- pandas/_libs/tslibs/timezones.pyx | 2 +- pandas/core/indexes/datetimes.py | 2 +- pandas/core/tools/datetimes.py | 2 +- pandas/tests/tseries/test_timezones.py | 19 +++++++++---------- 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index f9a9cc1bf3d03..1700d3a29c1ce 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -277,7 +277,7 @@ cdef object get_dst_info(object tz): return dst_cache[cache_key] -def _infer_tzinfo(start, end): +def infer_tzinfo(start, end): def _infer(a, b): tz = a.tzinfo if b and b.tzinfo: diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 39dc24642235b..9127864eab8a1 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -443,7 +443,7 @@ def _generate(cls, start, end, periods, name, offset, raise ValueError("Closed has to be either 'left', 'right' or None") try: - inferred_tz = tools._infer_tzinfo(start, end) + inferred_tz = timezones.infer_tzinfo(start, end) except: raise TypeError('Start and end cannot both be tz-aware with ' 'different timezones') diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 74409bb64c92e..8e44794344b26 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -4,7 +4,7 @@ from pandas._libs import lib, tslib from pandas._libs.tslibs.strptime import array_strptime -from pandas._libs.tslibs.timezones import get_timezone, _infer_tzinfo # noqa +from pandas._libs.tslibs.timezones import get_timezone from pandas.core.dtypes.common import ( _ensure_object, diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index e7b470e01e2af..aa8fe90ea6500 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -12,7 +12,6 @@ from datetime import datetime, timedelta, tzinfo, date import pandas.util.testing as tm -import pandas.core.tools.datetimes as tools import pandas.tseries.offsets as offsets from pandas.compat import lrange, zip from pandas.core.indexes.datetimes import bdate_range, date_range @@ -646,20 +645,20 @@ def test_infer_tz(self): start = self.localize(eastern, _start) end = self.localize(eastern, _end) - assert (tools._infer_tzinfo(start, end) is self.localize( - eastern, _start).tzinfo) - assert (tools._infer_tzinfo(start, None) is self.localize( - eastern, _start).tzinfo) - assert (tools._infer_tzinfo(None, end) is self.localize(eastern, - _end).tzinfo) + assert (timezones.infer_tzinfo(start, end) is + self.localize(eastern, _start).tzinfo) + assert (timezones.infer_tzinfo(start, None) is + self.localize(eastern, _start).tzinfo) + assert (timezones.infer_tzinfo(None, end) is + self.localize(eastern, _end).tzinfo) start = utc.localize(_start) end = utc.localize(_end) - assert (tools._infer_tzinfo(start, end) is utc) + assert (timezones.infer_tzinfo(start, end) is utc) end = self.localize(eastern, _end) - pytest.raises(Exception, tools._infer_tzinfo, start, end) - pytest.raises(Exception, tools._infer_tzinfo, end, start) + pytest.raises(Exception, timezones.infer_tzinfo, start, end) + pytest.raises(Exception, timezones.infer_tzinfo, end, start) def test_tz_string(self): result = date_range('1/1/2000', periods=10, From 355cbe88542edf78b5de2e394a8f6c805aeee469 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 25 Sep 2017 17:21:20 -0700 Subject: [PATCH 05/11] rename _infer_dst --> infer_dst_transitions per reviewer request --- pandas/_libs/tslib.pyx | 4 ++-- pandas/_libs/tslibs/timezones.pxd | 2 +- pandas/_libs/tslibs/timezones.pyx | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 0d40a26023534..53dbe9b850bcc 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -105,7 +105,7 @@ from tslibs.timezones cimport ( is_utc, is_tzlocal, is_fixed_offset, treat_tz_as_dateutil, treat_tz_as_pytz, get_timezone, get_utcoffset, maybe_get_tz, - get_dst_info, _infer_dst + get_dst_info, infer_dst_transitions ) @@ -4003,7 +4003,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, result_b[i] = v if infer_dst: - dst_hours = _infer_dst(vals, result_a, result_b) + dst_hours = infer_dst_transitions(vals, result_a, result_b) for i in range(n): left = result_a[i] diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd index f979cc9ad705d..2737efb1033ef 100644 --- a/pandas/_libs/tslibs/timezones.pxd +++ b/pandas/_libs/tslibs/timezones.pxd @@ -17,6 +17,6 @@ cdef bint is_fixed_offset(object tz) cdef object get_dst_info(object tz) -cdef ndarray[int64_t] _infer_dst(ndarray[int64_t] vals, +cdef ndarray[int64_t] infer_dst_transitions(ndarray[int64_t] vals, ndarray[int64_t] result_a, ndarray[int64_t] result_b) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 1700d3a29c1ce..f3c1434cf786a 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -295,9 +295,9 @@ def infer_tzinfo(start, end): return tz -cdef ndarray[int64_t] _infer_dst(ndarray[int64_t] vals, - ndarray[int64_t] result_a, - ndarray[int64_t] result_b): +cdef ndarray[int64_t] infer_dst_transitions(ndarray[int64_t] vals, + ndarray[int64_t] result_a, + ndarray[int64_t] result_b): cdef: Py_ssize_t n = len(vals) ndarray[int64_t] dst_hours From e17089c8b5449aec8398db1d709a345a9d17c0e0 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 25 Sep 2017 17:24:01 -0700 Subject: [PATCH 06/11] Remove nested _infer func --- pandas/_libs/tslibs/timezones.pyx | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index f3c1434cf786a..c85b38311c14e 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -278,20 +278,18 @@ cdef object get_dst_info(object tz): def infer_tzinfo(start, end): - def _infer(a, b): - tz = a.tzinfo - if b and b.tzinfo: - if not (get_timezone(tz) == get_timezone(b.tzinfo)): - raise AssertionError('Inputs must both have the same timezone,' - ' {timezone1} != {timezone2}' - .format(timezone1=tz, timezone2=b.tzinfo)) - return tz - - tz = None - if start is not None: - tz = _infer(start, end) + if start is not None and end is not None: + tz = start.tzinfo + if end.tzinfo: + if not (get_timezone(tz) == get_timezone(end.tzinfo)): + msg = 'Inputs must both have the same timezone, {tz1} != {tz2}' + raise AssertionError(msg.format(tz1=tz, tz2=end.tzinfo)) + elif start is not None: + tz = start.tzinfo elif end is not None: - tz = _infer(end, start) + tz = end.tzinfo + else: + tz = None return tz From b82396d33d6b6e2abcce5ab30c36d1f167a2bcdd Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 26 Sep 2017 09:24:35 -0700 Subject: [PATCH 07/11] flake8 fixup --- pandas/core/tools/datetimes.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 8e44794344b26..306373269fa7b 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -4,7 +4,6 @@ from pandas._libs import lib, tslib from pandas._libs.tslibs.strptime import array_strptime -from pandas._libs.tslibs.timezones import get_timezone from pandas.core.dtypes.common import ( _ensure_object, @@ -42,8 +41,6 @@ def _lexer_split_from_str(dt_str): pass - - def _guess_datetime_format(dt_str, dayfirst=False, dt_str_parse=compat.parse_date, dt_str_split=_DATEUTIL_LEXER_SPLIT): From 926bb73b39f08321c14cb0111edc1c0b95ad7097 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 26 Sep 2017 20:59:33 -0700 Subject: [PATCH 08/11] fixup remove unused import --- pandas/core/tools/datetimes.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index a39d066847758..4f7f14cabee97 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -4,7 +4,6 @@ from pandas._libs import tslib from pandas._libs.tslibs.strptime import array_strptime -from pandas._libs.tslibs.timezones import get_timezone from pandas._libs.tslibs import parsing from pandas._libs.tslibs.parsing import ( # noqa parse_time_string, From 8f4b368a9938bba09a19fd7e0d8b374ef626953c Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 27 Sep 2017 20:14:42 -0700 Subject: [PATCH 09/11] Merge branch 'master' of https://github.com/pandas-dev/pandas into tslibs-timezones7 From 7b4c9b293b6094bf0dda1bafaef49220107a3f30 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 27 Sep 2017 20:17:01 -0700 Subject: [PATCH 10/11] rever infer_dst_transition --- pandas/_libs/tslib.pyx | 45 +++++++++++++++++++++- pandas/_libs/tslibs/timezones.pxd | 6 --- pandas/_libs/tslibs/timezones.pyx | 63 ------------------------------- 3 files changed, 43 insertions(+), 71 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index c77b0842f874f..4c34d0fcb1e5f 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -108,7 +108,7 @@ from tslibs.timezones cimport ( is_utc, is_tzlocal, is_fixed_offset, treat_tz_as_dateutil, treat_tz_as_pytz, get_timezone, get_utcoffset, maybe_get_tz, - get_dst_info, infer_dst_transitions + get_dst_info ) @@ -3735,7 +3735,48 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, result_b[i] = v if infer_dst: - dst_hours = infer_dst_transitions(vals, result_a, result_b) + dst_hours = np.empty(n, dtype=np.int64) + dst_hours.fill(NPY_NAT) + + # Get the ambiguous hours (given the above, these are the hours + # where result_a != result_b and neither of them are NAT) + both_nat = np.logical_and(result_a != NPY_NAT, result_b != NPY_NAT) + both_eq = result_a == result_b + trans_idx = np.squeeze(np.nonzero(np.logical_and(both_nat, ~both_eq))) + if trans_idx.size == 1: + stamp = Timestamp(vals[trans_idx]) + raise pytz.AmbiguousTimeError( + "Cannot infer dst time from %s as there " + "are no repeated times" % stamp) + # Split the array into contiguous chunks (where the difference between + # indices is 1). These are effectively dst transitions in different + # years which is useful for checking that there is not an ambiguous + # transition in an individual year. + if trans_idx.size > 0: + one_diff = np.where(np.diff(trans_idx) != 1)[0] +1 + trans_grp = np.array_split(trans_idx, one_diff) + + # Iterate through each day, if there are no hours where the + # delta is negative (indicates a repeat of hour) the switch + # cannot be inferred + for grp in trans_grp: + + delta = np.diff(result_a[grp]) + if grp.size == 1 or np.all(delta > 0): + stamp = Timestamp(vals[grp[0]]) + raise pytz.AmbiguousTimeError(stamp) + + # Find the index for the switch and pull from a for dst and b + # for standard + switch_idx = (delta <= 0).nonzero()[0] + if switch_idx.size > 1: + raise pytz.AmbiguousTimeError( + "There are %i dst switches when " + "there should only be 1." % switch_idx.size) + switch_idx = switch_idx[0] + 1 # Pull the only index and adjust + a_idx = grp[:switch_idx] + b_idx = grp[switch_idx:] + dst_hours[grp] = np.hstack((result_a[a_idx], result_b[b_idx])) for i in range(n): left = result_a[i] diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd index 2737efb1033ef..95e0474b3a174 100644 --- a/pandas/_libs/tslibs/timezones.pxd +++ b/pandas/_libs/tslibs/timezones.pxd @@ -1,8 +1,6 @@ # -*- coding: utf-8 -*- # cython: profile=False -from numpy cimport ndarray, int64_t - cdef bint is_utc(object tz) cdef bint is_tzlocal(object tz) @@ -16,7 +14,3 @@ cpdef get_utcoffset(tzinfo, obj) cdef bint is_fixed_offset(object tz) cdef object get_dst_info(object tz) - -cdef ndarray[int64_t] infer_dst_transitions(ndarray[int64_t] vals, - ndarray[int64_t] result_a, - ndarray[int64_t] result_b) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index c85b38311c14e..3a2167069f25b 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -291,66 +291,3 @@ def infer_tzinfo(start, end): else: tz = None return tz - - -cdef ndarray[int64_t] infer_dst_transitions(ndarray[int64_t] vals, - ndarray[int64_t] result_a, - ndarray[int64_t] result_b): - cdef: - Py_ssize_t n = len(vals) - ndarray[int64_t] dst_hours - - dst_hours = np.empty(n, dtype=np.int64) - dst_hours.fill(NPY_NAT) - - # Get the ambiguous hours (given the above, these are the hours - # where result_a != result_b and neither of them are NAT) - both_nat = np.logical_and(result_a != NPY_NAT, result_b != NPY_NAT) - both_eq = result_a == result_b - trans_idx = np.squeeze(np.nonzero(np.logical_and(both_nat, ~both_eq))) - if trans_idx.size == 1: - stamp = np.int64(vals[trans_idx]).astype('datetime64[ns]') - # Render `stamp` as e.g. '2017-08-30 07:59:23.123456' - # as opposed to str(stamp) which would - # be '2017-08-30T07:59:23.123456789' - stamp = str(stamp).replace('T', ' ')[:-3] - raise pytz.AmbiguousTimeError( - "Cannot infer dst time from %s as there " - "are no repeated times" % stamp) - - # Split the array into contiguous chunks (where the difference between - # indices is 1). These are effectively dst transitions in different - # years which is useful for checking that there is not an ambiguous - # transition in an individual year. - if trans_idx.size > 0: - one_diff = np.where(np.diff(trans_idx) != 1)[0] +1 - trans_grp = np.array_split(trans_idx, one_diff) - - # Iterate through each day, if there are no hours where the - # delta is negative (indicates a repeat of hour) the switch - # cannot be inferred - for grp in trans_grp: - - delta = np.diff(result_a[grp]) - if grp.size == 1 or np.all(delta > 0): - stamp = np.int64(vals[grp[0]]).astype('datetime64[ns]') - # Render `stamp` as e.g. '2017-08-30 07:59:23.123456' - # as opposed to str(stamp) which would - # be '2017-08-30T07:59:23.123456789' - stamp = str(stamp).replace('T', ' ')[:-3] - raise pytz.AmbiguousTimeError(stamp) - - # Find the index for the switch and pull from a for dst and b - # for standard - switch_idx = (delta <= 0).nonzero()[0] - if switch_idx.size > 1: - raise pytz.AmbiguousTimeError( - "There are %i dst switches when " - "there should only be 1." % switch_idx.size) - - switch_idx = switch_idx[0] + 1 # Pull the only index and adjust - a_idx = grp[:switch_idx] - b_idx = grp[switch_idx:] - dst_hours[grp] = np.hstack((result_a[a_idx], result_b[b_idx])) - - return dst_hours From ce701e5b75c1d84b15a752bf8f48959fbe629a4d Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 28 Sep 2017 21:03:41 -0700 Subject: [PATCH 11/11] dummy commit to force CI --- pandas/_libs/tslibs/timezones.pyx | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 3a2167069f25b..7f778dde86e23 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -1,5 +1,8 @@ # -*- coding: utf-8 -*- # cython: profile=False +# cython: linetrace=False +# distutils: define_macros=CYTHON_TRACE=0 +# distutils: define_macros=CYTHON_TRACE_NOGIL=0 cimport cython from cython cimport Py_ssize_t