From 84bfff2bf9d03054bcf33765392ece900bf4b9a8 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 30 Jan 2020 11:54:26 +0300 Subject: [PATCH 001/128] TST: add basic test for construction with fold --- pandas/_libs/tslibs/timestamps.pyx | 3 ++- pandas/tests/indexes/datetimes/test_constructors.py | 10 ++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 4915671aa6512..321ebe6fa58c4 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -351,7 +351,8 @@ class Timestamp(_Timestamp): second=None, microsecond=None, nanosecond=None, - tzinfo=None + tzinfo=None, + fold=None ): # The parameter list folds together legacy parameter names (the first # four) and positional and keyword parameter names from pydatetime. diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 68285d41bda70..39d7b47dd35a4 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -964,3 +964,13 @@ def test_timestamp_constructor_identity(): expected = pd.Timestamp("2017-01-01T12") result = pd.Timestamp(expected) assert result is expected + + +@pytest.mark.parametrize("tz", ["dateutil/Europe/London", "Europe/London"]) +@pytest.mark.parametrize("fold", [0, 1]) +def test_timestamp_constructor_fold(tz, fold): + # Test for #25057 + ts = pd.Timestamp(year=2019, month=10, day=27, hour=1, minute=30, tz=tz, fold=fold) + result = ts.fold + expected = fold + assert result == expected From ba7fcd53e7aa9038192dca19a964c2394758a662 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 30 Jan 2020 15:24:53 +0300 Subject: [PATCH 002/128] ENH: add basic fold support --- pandas/_libs/tslib.pyx | 20 ++++++++++---------- pandas/_libs/tslibs/timestamps.pxd | 2 +- pandas/_libs/tslibs/timestamps.pyx | 8 ++++---- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 53e3354ca8eb6..4dccf5cb25a01 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -49,7 +49,7 @@ from pandas._libs.tslibs.tzconversion cimport ( cdef inline object create_datetime_from_ts( int64_t value, npy_datetimestruct dts, - object tz, object freq): + object tz, object freq, bint fold): """ convenience routine to construct a datetime.datetime from its parts """ return datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz) @@ -57,14 +57,14 @@ cdef inline object create_datetime_from_ts( cdef inline object create_date_from_ts( int64_t value, npy_datetimestruct dts, - object tz, object freq): + object tz, object freq, bint fold): """ convenience routine to construct a datetime.date from its parts """ return date(dts.year, dts.month, dts.day) cdef inline object create_time_from_ts( int64_t value, npy_datetimestruct dts, - object tz, object freq): + object tz, object freq, bint fold): """ convenience routine to construct a datetime.time from its parts """ return time(dts.hour, dts.min, dts.sec, dts.us, tz) @@ -72,7 +72,7 @@ cdef inline object create_time_from_ts( @cython.wraparound(False) @cython.boundscheck(False) def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, - str box="datetime"): + bint fold=0, str box="datetime"): """ Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp @@ -104,7 +104,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, str typ int64_t value, delta, local_value ndarray[object] result = np.empty(n, dtype=object) - object (*func_create)(int64_t, npy_datetimestruct, object, object) + object (*func_create)(int64_t, npy_datetimestruct, object, object, bint) if box == "date": assert (tz is None), "tz should be None when converting to date" @@ -129,7 +129,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, result[i] = NaT else: dt64_to_dtstruct(value, &dts) - result[i] = func_create(value, dts, tz, freq) + result[i] = func_create(value, dts, tz, freq, fold) elif is_tzlocal(tz): for i in range(n): value = arr[i] @@ -141,7 +141,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, # using the i8 representation. local_value = tz_convert_utc_to_tzlocal(value, tz) dt64_to_dtstruct(local_value, &dts) - result[i] = func_create(value, dts, tz, freq) + result[i] = func_create(value, dts, tz, freq, fold) else: trans, deltas, typ = get_dst_info(tz) @@ -155,7 +155,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, else: # Adjust datetime64 timestamp, recompute datetimestruct dt64_to_dtstruct(value + delta, &dts) - result[i] = func_create(value, dts, tz, freq) + result[i] = func_create(value, dts, tz, freq, fold) elif typ == 'dateutil': # no zone-name change for dateutil tzs - dst etc @@ -168,7 +168,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, # Adjust datetime64 timestamp, recompute datetimestruct pos = trans.searchsorted(value, side='right') - 1 dt64_to_dtstruct(value + deltas[pos], &dts) - result[i] = func_create(value, dts, tz, freq) + result[i] = func_create(value, dts, tz, freq, fold) else: # pytz for i in range(n): @@ -182,7 +182,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, new_tz = tz._tzinfos[tz._transition_info[pos]] dt64_to_dtstruct(value + deltas[pos], &dts) - result[i] = func_create(value, dts, new_tz, freq) + result[i] = func_create(value, dts, new_tz, freq, fold) return result diff --git a/pandas/_libs/tslibs/timestamps.pxd b/pandas/_libs/tslibs/timestamps.pxd index b7282e02ff117..5e55e6e8d5297 100644 --- a/pandas/_libs/tslibs/timestamps.pxd +++ b/pandas/_libs/tslibs/timestamps.pxd @@ -5,4 +5,4 @@ from pandas._libs.tslibs.np_datetime cimport npy_datetimestruct cdef object create_timestamp_from_ts(int64_t value, npy_datetimestruct dts, - object tz, object freq) + object tz, object freq, bint fold) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 321ebe6fa58c4..f7da7613e605d 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -42,12 +42,12 @@ _no_input = object() cdef inline object create_timestamp_from_ts(int64_t value, npy_datetimestruct dts, - object tz, object freq): + object tz, object freq, bint fold): """ convenience routine to construct a Timestamp from its parts """ cdef _Timestamp ts_base ts_base = _Timestamp.__new__(Timestamp, dts.year, dts.month, dts.day, dts.hour, dts.min, - dts.sec, dts.us, tz) + dts.sec, dts.us, tz, fold=fold) ts_base.value = value ts_base.freq = freq ts_base.nanosecond = dts.ps // 1000 @@ -442,7 +442,7 @@ class Timestamp(_Timestamp): elif not is_offset_object(freq): freq = to_offset(freq) - return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq) + return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq, fold) def _round(self, freq, mode, ambiguous='raise', nonexistent='raise'): if self.tz is not None: @@ -986,7 +986,7 @@ default 'raise' if value != NPY_NAT: check_dts_bounds(&dts) - return create_timestamp_from_ts(value, dts, _tzinfo, self.freq) + return create_timestamp_from_ts(value, dts, _tzinfo, self.freq, fold) def isoformat(self, sep='T'): base = super(_Timestamp, self).isoformat(sep=sep) From 0b6f8940145564f3aeed30130136ad5fc6238c2d Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 30 Jan 2020 15:33:55 +0300 Subject: [PATCH 003/128] ENH: add fold to ts properties --- pandas/_libs/tslibs/conversion.pyx | 2 ++ pandas/_libs/tslibs/timestamps.pyx | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index e0862b9250045..9dfa176a5aa9b 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -246,6 +246,8 @@ cdef convert_to_tsobject(object ts, object tz, object unit, tz = maybe_get_tz(tz) obj = _TSObject() + # TODO: remove after moving to localize_tso and convert_str_to_tsobject + obj.fold = None if isinstance(ts, str): return convert_str_to_tsobject(ts, tz, unit, dayfirst, yearfirst) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index f7da7613e605d..166837a3c8961 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -442,7 +442,10 @@ class Timestamp(_Timestamp): elif not is_offset_object(freq): freq = to_offset(freq) - return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq, fold) + if ts.fold is None: + ts.fold = fold + + return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq, ts.fold) def _round(self, freq, mode, ambiguous='raise', nonexistent='raise'): if self.tz is not None: From 5c58b3a6f75fcee42b09504b2492da30c0bdf927 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 30 Jan 2020 16:01:21 +0300 Subject: [PATCH 004/128] baseline fold in conversion --- pandas/_libs/tslibs/conversion.pyx | 2 +- pandas/_libs/tslibs/timestamps.pyx | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 9dfa176a5aa9b..9795769fc9610 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -247,7 +247,7 @@ cdef convert_to_tsobject(object ts, object tz, object unit, obj = _TSObject() # TODO: remove after moving to localize_tso and convert_str_to_tsobject - obj.fold = None + # obj.fold = None if isinstance(ts, str): return convert_str_to_tsobject(ts, tz, unit, dayfirst, yearfirst) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 166837a3c8961..ac0657dcbcd0a 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -442,10 +442,10 @@ class Timestamp(_Timestamp): elif not is_offset_object(freq): freq = to_offset(freq) - if ts.fold is None: - ts.fold = fold + #if ts.fold is None: + # ts.fold = fold - return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq, ts.fold) + return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq, fold) def _round(self, freq, mode, ambiguous='raise', nonexistent='raise'): if self.tz is not None: From 546789a54bc3de113e28df890880720dec15f034 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 30 Jan 2020 16:20:49 +0300 Subject: [PATCH 005/128] add fold placeholder to conversion --- pandas/_libs/tslibs/conversion.pxd | 1 + pandas/_libs/tslibs/conversion.pyx | 2 +- pandas/_libs/tslibs/timestamps.pyx | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index c74307a3d2887..bb20296e24587 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -12,6 +12,7 @@ cdef class _TSObject: npy_datetimestruct dts # npy_datetimestruct int64_t value # numpy dt64 object tzinfo + bint fold cdef convert_to_tsobject(object ts, object tz, object unit, diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 9795769fc9610..cab773a08de0b 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -247,7 +247,7 @@ cdef convert_to_tsobject(object ts, object tz, object unit, obj = _TSObject() # TODO: remove after moving to localize_tso and convert_str_to_tsobject - # obj.fold = None + obj.fold = 0 if isinstance(ts, str): return convert_str_to_tsobject(ts, tz, unit, dayfirst, yearfirst) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index ac0657dcbcd0a..a7a7973d96438 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -442,8 +442,8 @@ class Timestamp(_Timestamp): elif not is_offset_object(freq): freq = to_offset(freq) - #if ts.fold is None: - # ts.fold = fold + if ts.fold == 0 and fold == 1: + ts.fold = fold return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq, fold) From fc69bbb41c9dee4dcf28e1049ce61b01a3708c0f Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 30 Jan 2020 16:43:46 +0300 Subject: [PATCH 006/128] add fold to convert_to_tsobject --- pandas/_libs/tslibs/conversion.pxd | 2 +- pandas/_libs/tslibs/conversion.pyx | 3 ++- pandas/_libs/tslibs/timestamps.pyx | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index bb20296e24587..51db4a8e2e3c8 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -17,7 +17,7 @@ cdef class _TSObject: cdef convert_to_tsobject(object ts, object tz, object unit, bint dayfirst, bint yearfirst, - int32_t nanos=*) + int32_t nanos=*, bint fold=*) cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, int32_t nanos=*) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index cab773a08de0b..fe02f471df572 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -224,7 +224,8 @@ cdef class _TSObject: cdef convert_to_tsobject(object ts, object tz, object unit, - bint dayfirst, bint yearfirst, int32_t nanos=0): + bint dayfirst, bint yearfirst, int32_t nanos=0, + bint fold=0): """ Extract datetime and int64 from any of: - np.int64 (with unit providing a possible modifier) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index a7a7973d96438..526965d5a44ba 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -431,7 +431,7 @@ class Timestamp(_Timestamp): raise ValueError("Cannot pass a datetime or Timestamp with tzinfo with " "the tz parameter. Use tz_convert instead.") - ts = convert_to_tsobject(ts_input, tz, unit, 0, 0, nanosecond or 0) + ts = convert_to_tsobject(ts_input, tz, unit, 0, 0, nanosecond or 0, fold) if ts.value == NPY_NAT: return NaT @@ -442,6 +442,7 @@ class Timestamp(_Timestamp): elif not is_offset_object(freq): freq = to_offset(freq) + # TODO: remove after incorporating fold into conversion if ts.fold == 0 and fold == 1: ts.fold = fold From 57d42b30e124c7d6e1832fcf3cb2f4f4cb1ab635 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 30 Jan 2020 16:59:50 +0300 Subject: [PATCH 007/128] TST: add test to infer fold from value --- .../tests/indexes/datetimes/test_constructors.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 39d7b47dd35a4..61095c9962c7d 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -968,9 +968,23 @@ def test_timestamp_constructor_identity(): @pytest.mark.parametrize("tz", ["dateutil/Europe/London", "Europe/London"]) @pytest.mark.parametrize("fold", [0, 1]) -def test_timestamp_constructor_fold(tz, fold): +def test_timestamp_constructor_retain_fold(tz, fold): # Test for #25057 + # Check that we retain fold ts = pd.Timestamp(year=2019, month=10, day=27, hour=1, minute=30, tz=tz, fold=fold) result = ts.fold expected = fold assert result == expected + + +@pytest.mark.parametrize("tz", ["dateutil/Europe/London", "Europe/London"]) +@pytest.mark.parametrize( + "value_fold", [(1572139800000000000, 0), (1572143400000000000, 1)] +) +def test_timestamp_constructor_infer_fold_from_value(tz, value_fold): + # Test for #25057 + # Check that we infer fold correctly based on timestamps since utc + ts = pd.Timestamp(value_fold[0], tz=tz) + result = ts.fold + expected = value_fold[1] + assert result == expected From f2ad196dad87fbf331dc0c2d084c8996851e172d Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 30 Jan 2020 19:26:46 +0300 Subject: [PATCH 008/128] ENH: infer fold from value for dateutil --- pandas/_libs/tslibs/conversion.pyx | 12 ++++++++++-- pandas/_libs/tslibs/timestamps.pyx | 2 +- pandas/tests/indexes/datetimes/test_constructors.py | 2 +- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index fe02f471df572..0f64182aedc1a 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -292,7 +292,7 @@ cdef convert_to_tsobject(object ts, object tz, object unit, f'Timestamp') if tz is not None: - localize_tso(obj, tz) + localize_tso(obj, tz, fold) if obj.value != NPY_NAT: # check_overflows needs to run after localize_tso @@ -523,7 +523,7 @@ cdef inline check_overflows(_TSObject obj): # ---------------------------------------------------------------------- # Localization -cdef inline void localize_tso(_TSObject obj, tzinfo tz): +cdef inline void localize_tso(_TSObject obj, tzinfo tz, bint fold): """ Given the UTC nanosecond timestamp in obj.value, find the wall-clock representation of that timestamp in the given timezone. @@ -532,6 +532,8 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz): ---------- obj : _TSObject tz : tzinfo + fold: bint + TODO: Update docstring with info on how we infer or update fold Returns ------- @@ -574,6 +576,12 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz): # i.e. treat_tz_as_dateutil(tz) pos = trans.searchsorted(obj.value, side='right') - 1 dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) + # Check if we are in a fold + tti = tz._get_ttinfo(pos - 1) + if not(tti.isdst): + od = deltas[pos - 1] - deltas[pos] + if obj.value < (trans[pos] + od): + obj.fold = 1 else: # Note: as of 2018-07-17 all tzinfo objects that are _not_ # either pytz or dateutil have is_fixed_offset(tz) == True, diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 526965d5a44ba..40de2c065a726 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -446,7 +446,7 @@ class Timestamp(_Timestamp): if ts.fold == 0 and fold == 1: ts.fold = fold - return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq, fold) + return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq, ts.fold) def _round(self, freq, mode, ambiguous='raise', nonexistent='raise'): if self.tz is not None: diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 61095c9962c7d..0eb77b337c856 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -979,7 +979,7 @@ def test_timestamp_constructor_retain_fold(tz, fold): @pytest.mark.parametrize("tz", ["dateutil/Europe/London", "Europe/London"]) @pytest.mark.parametrize( - "value_fold", [(1572139800000000000, 0), (1572143400000000000, 1)] + "value_fold", [(1572136200000000000, 0), (1572139800000000000, 1)] ) def test_timestamp_constructor_infer_fold_from_value(tz, value_fold): # Test for #25057 From 935a3ecc623c1e8a76a32798fadd76170d7e68bc Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 30 Jan 2020 19:32:31 +0300 Subject: [PATCH 009/128] ENH: infer fold for pytz (loss in performance) --- pandas/_libs/tslibs/conversion.pyx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 0f64182aedc1a..1cd012de4373f 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -572,6 +572,10 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz, bint fold): pos = trans.searchsorted(obj.value, side='right') - 1 tz = tz._tzinfos[tz._transition_info[pos]] dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) + # Check if we are in a fold + od = deltas[pos - 1] - deltas[pos] + if obj.value < (trans[pos] + od): + obj.fold = 1 elif typ == 'dateutil': # i.e. treat_tz_as_dateutil(tz) pos = trans.searchsorted(obj.value, side='right') - 1 From d35af8fd9020eef9e7d504ad1694e89e204c4b51 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 30 Jan 2020 19:54:34 +0300 Subject: [PATCH 010/128] PERF: remove unnecessary ifs --- pandas/_libs/tslibs/conversion.pyx | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 1cd012de4373f..992744878ee63 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -581,11 +581,9 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz, bint fold): pos = trans.searchsorted(obj.value, side='right') - 1 dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) # Check if we are in a fold - tti = tz._get_ttinfo(pos - 1) - if not(tti.isdst): - od = deltas[pos - 1] - deltas[pos] - if obj.value < (trans[pos] + od): - obj.fold = 1 + od = deltas[pos - 1] - deltas[pos] + if obj.value < (trans[pos] + od): + obj.fold = 1 else: # Note: as of 2018-07-17 all tzinfo objects that are _not_ # either pytz or dateutil have is_fixed_offset(tz) == True, From e6d4aaa39d66f470638e25096643c3a6409169f1 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 30 Jan 2020 20:08:34 +0300 Subject: [PATCH 011/128] check that we are not at the left edge of deltas --- pandas/_libs/tslibs/conversion.pyx | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 992744878ee63..d0b33e43ffa67 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -559,6 +559,7 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz, bint fold): elif is_tzlocal(tz): local_val = _tz_convert_tzlocal_utc(obj.value, tz, to_utc=False) dt64_to_dtstruct(local_val, &obj.dts) + # TODO: think on how we can infer fold for local Timezone else: # Adjust datetime64 timestamp, recompute datetimestruct trans, deltas, typ = get_dst_info(tz) @@ -573,17 +574,19 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz, bint fold): tz = tz._tzinfos[tz._transition_info[pos]] dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) # Check if we are in a fold - od = deltas[pos - 1] - deltas[pos] - if obj.value < (trans[pos] + od): - obj.fold = 1 + if pos > 0: + od = deltas[pos - 1] - deltas[pos] + if obj.value < (trans[pos] + od): + obj.fold = 1 elif typ == 'dateutil': # i.e. treat_tz_as_dateutil(tz) pos = trans.searchsorted(obj.value, side='right') - 1 dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) # Check if we are in a fold - od = deltas[pos - 1] - deltas[pos] - if obj.value < (trans[pos] + od): - obj.fold = 1 + if pos > 0: + od = deltas[pos - 1] - deltas[pos] + if obj.value < (trans[pos] + od): + obj.fold = 1 else: # Note: as of 2018-07-17 all tzinfo objects that are _not_ # either pytz or dateutil have is_fixed_offset(tz) == True, From fd98b27b0fc36841db1d7e18b42c3e4fc1ee8075 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 31 Jan 2020 09:19:31 +0300 Subject: [PATCH 012/128] ENH: adjust Timestamp.value for fold (if not from datetime or string) --- pandas/_libs/tslibs/conversion.pyx | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index d0b33e43ffa67..b627b9820bf2c 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -572,20 +572,34 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz, bint fold): # i.e. treat_tz_as_pytz(tz) pos = trans.searchsorted(obj.value, side='right') - 1 tz = tz._tzinfos[tz._transition_info[pos]] + # Adjust value if fold was supplied + if fold == 1: + # Check if valid fold value + if pos < len(deltas): + fold_delta = deltas[pos] - deltas[pos + 1] + if obj.value + fold_delta > trans[pos + 1]: + obj.value += fold_delta dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) # Check if we are in a fold if pos > 0: - od = deltas[pos - 1] - deltas[pos] - if obj.value < (trans[pos] + od): + fold_delta = deltas[pos - 1] - deltas[pos] + if obj.value < (trans[pos] + fold_delta): obj.fold = 1 elif typ == 'dateutil': # i.e. treat_tz_as_dateutil(tz) pos = trans.searchsorted(obj.value, side='right') - 1 + # Adjust value if fold was supplied + if fold == 1: + # Check if valid fold value + if pos < len(deltas): + fold_delta = deltas[pos] - deltas[pos + 1] + if obj.value + fold_delta > trans[pos + 1]: + obj.value += fold_delta dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) # Check if we are in a fold if pos > 0: - od = deltas[pos - 1] - deltas[pos] - if obj.value < (trans[pos] + od): + fold_delta = deltas[pos - 1] - deltas[pos] + if obj.value < (trans[pos] + fold_delta): obj.fold = 1 else: # Note: as of 2018-07-17 all tzinfo objects that are _not_ From af86f7923ff2067381cc1c458b1143b7ff4d0ec7 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 31 Jan 2020 09:21:30 +0300 Subject: [PATCH 013/128] CLN: remove extra spaces --- pandas/_libs/tslibs/conversion.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index b627b9820bf2c..9eafda2d1ad5f 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -578,7 +578,7 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz, bint fold): if pos < len(deltas): fold_delta = deltas[pos] - deltas[pos + 1] if obj.value + fold_delta > trans[pos + 1]: - obj.value += fold_delta + obj.value += fold_delta dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) # Check if we are in a fold if pos > 0: @@ -594,7 +594,7 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz, bint fold): if pos < len(deltas): fold_delta = deltas[pos] - deltas[pos + 1] if obj.value + fold_delta > trans[pos + 1]: - obj.value += fold_delta + obj.value += fold_delta dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) # Check if we are in a fold if pos > 0: From a6965e96e09b941cdea1e90607c90aff276c6120 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 31 Jan 2020 09:26:50 +0300 Subject: [PATCH 014/128] TST: add test for adjusting value for fold --- .../indexes/datetimes/test_constructors.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 0eb77b337c856..302146b5a96e7 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -988,3 +988,21 @@ def test_timestamp_constructor_infer_fold_from_value(tz, value_fold): result = ts.fold expected = value_fold[1] assert result == expected + + +@pytest.mark.parametrize("tz", ["dateutil/Europe/London", "Europe/London"]) +@pytest.mark.parametrize( + "value_fold", + [ + (1572136200000000000, 1, 1572139800000000000), + (1572139800000000000, 1, 1572139800000000000), + ], +) +def test_timestamp_constructor_adjust_value_for_fold(tz, value_fold): + # Test for #25057 + # Check that we adjust value for fold correctly + # based on timestamps since utc + ts = pd.Timestamp(value_fold[0], tz=tz, fold=value_fold[1]) + result = ts.value + expected = value_fold[2] + assert result == expected From 4caf9bb98c8a03db063eef77e225b76167b124e3 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 31 Jan 2020 09:31:10 +0300 Subject: [PATCH 015/128] DOC: add to comment for local timezone --- pandas/_libs/tslibs/conversion.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 9eafda2d1ad5f..a2884b86dc41b 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -560,6 +560,7 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz, bint fold): local_val = _tz_convert_tzlocal_utc(obj.value, tz, to_utc=False) dt64_to_dtstruct(local_val, &obj.dts) # TODO: think on how we can infer fold for local Timezone + # and adjust value for fold else: # Adjust datetime64 timestamp, recompute datetimestruct trans, deltas, typ = get_dst_info(tz) From 6843ed286091b0170422f0cefcf6cf8b65f3a153 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 31 Jan 2020 09:38:35 +0300 Subject: [PATCH 016/128] TST: add string case to fold inferring test --- pandas/tests/indexes/datetimes/test_constructors.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 302146b5a96e7..9a6791a140bcb 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -979,11 +979,18 @@ def test_timestamp_constructor_retain_fold(tz, fold): @pytest.mark.parametrize("tz", ["dateutil/Europe/London", "Europe/London"]) @pytest.mark.parametrize( - "value_fold", [(1572136200000000000, 0), (1572139800000000000, 1)] + "value_fold", + [ + (1572136200000000000, 0), + (1572139800000000000, 1), + ("2019-10-27 01:30:00+01:00", 0), + ("2019-10-27 01:30:00+00:00", 1), + ], ) def test_timestamp_constructor_infer_fold_from_value(tz, value_fold): # Test for #25057 # Check that we infer fold correctly based on timestamps since utc + # or strings ts = pd.Timestamp(value_fold[0], tz=tz) result = ts.fold expected = value_fold[1] From ebbf21fa79f34e46f1920ce402efcb481bfd75cc Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 31 Jan 2020 14:06:03 +0300 Subject: [PATCH 017/128] ENH: infer fold for timestamp from string --- pandas/_libs/tslibs/conversion.pxd | 3 ++- pandas/_libs/tslibs/conversion.pyx | 30 ++++++++++++++++++++++++++++-- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index 51db4a8e2e3c8..93577692e48c8 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -20,7 +20,8 @@ cdef convert_to_tsobject(object ts, object tz, object unit, int32_t nanos=*, bint fold=*) cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, - int32_t nanos=*) + int32_t nanos=*, bint fold=*, + int64_t fold_delta=*) cdef int64_t get_datetime64_nanos(object val) except? -1 diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index a2884b86dc41b..fef13de8b38f0 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -302,7 +302,8 @@ cdef convert_to_tsobject(object ts, object tz, object unit, cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, - int32_t nanos=0): + int32_t nanos=0, bint fold=0, + int64_t fold_delta=0): """ Convert a datetime (or Timestamp) input `ts`, along with optional timezone object `tz` to a _TSObject. @@ -366,6 +367,10 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, obj.value += nanos obj.dts.ps = nanos * 1000 + if fold_delta: + obj.value += fold_delta + obj.fold = fold + check_dts_bounds(&obj.dts) check_overflows(obj) return obj @@ -401,12 +406,33 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, check_overflows(obj) return obj + # Offset supplied, so infer fold + obj.fold = 0 + fold_delta = 0 + if is_utc(tz): + pass + elif is_tzlocal(tz): + pass + # TODO: think on how we can infer fold for local Timezone + # and adjust value for fold + else: + # Adjust datetime64 timestamp, recompute datetimestruct + trans, deltas, typ = get_dst_info(tz) + + if typ == 'pytz' or typ == 'dateutil': + pos = trans.searchsorted(obj.value, side='right') - 1 + # Check if we are in a fold + if pos > 0: + fold_delta = deltas[pos - 1] - deltas[pos] + if obj.value < (trans[pos] + fold_delta): + obj.fold = 1 + # Keep the converter same as PyDateTime's dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day, obj.dts.hour, obj.dts.min, obj.dts.sec, obj.dts.us, obj.tzinfo) obj = convert_datetime_to_tsobject( - dt, tz, nanos=obj.dts.ps // 1000) + dt, tz, nanos=obj.dts.ps // 1000, fold=obj.fold, fold_delta=fold_delta) return obj From 4f436387a70ea0561ca88122babb64583da80348 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 31 Jan 2020 14:11:35 +0300 Subject: [PATCH 018/128] TST: add fold value adjustment from string --- pandas/tests/indexes/datetimes/test_constructors.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 9a6791a140bcb..46f178e120961 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -1003,6 +1003,8 @@ def test_timestamp_constructor_infer_fold_from_value(tz, value_fold): [ (1572136200000000000, 1, 1572139800000000000), (1572139800000000000, 1, 1572139800000000000), + ("2019-10-27 01:30:00", 0, 1572136200000000000), + ("2019-10-27 01:30:00", 1, 1572139800000000000), ], ) def test_timestamp_constructor_adjust_value_for_fold(tz, value_fold): From 6238f9bc67ad071b18e059c5820b8a917e1df9d1 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 31 Jan 2020 14:16:29 +0300 Subject: [PATCH 019/128] ENH: adjust value for fold from string --- pandas/_libs/tslibs/conversion.pyx | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index fef13de8b38f0..d188f4aa1155e 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -251,7 +251,7 @@ cdef convert_to_tsobject(object ts, object tz, object unit, obj.fold = 0 if isinstance(ts, str): - return convert_str_to_tsobject(ts, tz, unit, dayfirst, yearfirst) + return convert_str_to_tsobject(ts, tz, unit, dayfirst, yearfirst, fold) if ts is None or ts is NaT: obj.value = NPY_NAT @@ -438,7 +438,7 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit, bint dayfirst=False, - bint yearfirst=False): + bint yearfirst=False, bint fold=0): """ Convert a string input `ts`, along with optional timezone object`tz` to a _TSObject. @@ -499,8 +499,9 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit, ts = dtstruct_to_dt64(&dts) if tz is not None: # shift for localize_tso + # TODO: maybe change fold type to object to allow None ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz, - ambiguous='raise')[0] + ambiguous=not(fold))[0] except OutOfBoundsDatetime: # GH#19382 for just-barely-OutOfBounds falling back to dateutil From 358479145af8f1619cf74e4ae76c7c9a6340c2c0 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 31 Jan 2020 15:38:33 +0300 Subject: [PATCH 020/128] basic from datetime fold support with bugs --- pandas/_libs/tslibs/conversion.pyx | 30 +++++++++++++++++-- .../indexes/datetimes/test_constructors.py | 2 ++ 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index d188f4aa1155e..3bbf3cd9652ce 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -279,7 +279,7 @@ cdef convert_to_tsobject(object ts, object tz, object unit, obj.value = ts dt64_to_dtstruct(ts, &obj.dts) elif PyDateTime_Check(ts): - return convert_datetime_to_tsobject(ts, tz, nanos) + return convert_datetime_to_tsobject(ts, tz, nanos, fold) elif PyDate_Check(ts): # Keep the converter same as PyDateTime's ts = datetime.combine(ts, datetime_time()) @@ -367,9 +367,33 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, obj.value += nanos obj.dts.ps = nanos * 1000 - if fold_delta: + obj.fold = fold + if fold_delta > 0: obj.value += fold_delta - obj.fold = fold + + # Datetime puts us into a fold for an ambiguous timestamp + # adjust as necessary + print("Started printout") + print(obj.value) + if obj.fold == 0: + if tz is not None: + if is_utc(tz): + pass + elif is_tzlocal(tz): + pass + # TODO: think on how we can infer fold for local Timezone + # and adjust value for fold + else: + # Adjust datetime64 timestamp, recompute datetimestruct + trans, deltas, typ = get_dst_info(tz) + + if typ == 'pytz' or typ == 'dateutil': + pos = trans.searchsorted(obj.value, side='right') - 1 + # Check if we are in a fold + if pos > 0: + fold_delta = deltas[pos - 1] - deltas[pos] + if obj.value - fold_delta < trans[pos]: + obj.value -= fold_delta check_dts_bounds(&obj.dts) check_overflows(obj) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 46f178e120961..ed0bae112331d 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -1005,6 +1005,8 @@ def test_timestamp_constructor_infer_fold_from_value(tz, value_fold): (1572139800000000000, 1, 1572139800000000000), ("2019-10-27 01:30:00", 0, 1572136200000000000), ("2019-10-27 01:30:00", 1, 1572139800000000000), + (datetime(2019, 10, 27, 1, 30, 0, 0), 0, 1572136200000000000), + (datetime(2019, 10, 27, 1, 30, 0, 0), 1, 1572139800000000000), ], ) def test_timestamp_constructor_adjust_value_for_fold(tz, value_fold): From 237341b4b97023df44ff573da3d640af01c8f80a Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 31 Jan 2020 15:56:52 +0300 Subject: [PATCH 021/128] complete adjust value for fold from datetime --- pandas/_libs/tslibs/conversion.pyx | 50 +++++++++++++++++------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 3bbf3cd9652ce..175d17a40a3f0 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -327,6 +327,9 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, cdef: _TSObject obj = _TSObject() + # TODO: get fold from datetime if it isn't supplied + # change fold to object type first + if tz is not None: tz = maybe_get_tz(tz) @@ -373,27 +376,31 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, # Datetime puts us into a fold for an ambiguous timestamp # adjust as necessary - print("Started printout") - print(obj.value) - if obj.fold == 0: - if tz is not None: - if is_utc(tz): - pass - elif is_tzlocal(tz): - pass - # TODO: think on how we can infer fold for local Timezone - # and adjust value for fold - else: - # Adjust datetime64 timestamp, recompute datetimestruct - trans, deltas, typ = get_dst_info(tz) - - if typ == 'pytz' or typ == 'dateutil': - pos = trans.searchsorted(obj.value, side='right') - 1 - # Check if we are in a fold - if pos > 0: - fold_delta = deltas[pos - 1] - deltas[pos] - if obj.value - fold_delta < trans[pos]: - obj.value -= fold_delta + if tz is not None: + if is_utc(tz): + pass + elif is_tzlocal(tz): + pass + # TODO: think on how we can infer fold for local Timezone + # and adjust value for fold + else: + trans, deltas, typ = get_dst_info(tz) + + # pytz assumes we are in a fold, dateutil - that we are not + if typ == 'pytz' and obj.fold == 0: + pos = trans.searchsorted(obj.value, side='right') - 1 + # Check if we are in a fold + if pos > 0: + fold_delta = deltas[pos - 1] - deltas[pos] + if obj.value - fold_delta < trans[pos]: + obj.value -= fold_delta + elif typ == 'dateutil' and obj.fold == 1: + pos = trans.searchsorted(obj.value, side='right') - 1 + # Check if we are before a fold + if pos < len(deltas): + fold_delta = deltas[pos] - deltas[pos + 1] + if obj.value + fold_delta > trans[pos + 1]: + obj.value += fold_delta check_dts_bounds(&obj.dts) check_overflows(obj) @@ -440,7 +447,6 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, # TODO: think on how we can infer fold for local Timezone # and adjust value for fold else: - # Adjust datetime64 timestamp, recompute datetimestruct trans, deltas, typ = get_dst_info(tz) if typ == 'pytz' or typ == 'dateutil': From 12b8b4ea72a96687bbb30ab119e3d8bdb6634ab7 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 31 Jan 2020 16:10:40 +0300 Subject: [PATCH 022/128] use input fold as default --- pandas/_libs/tslibs/timestamps.pyx | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 40de2c065a726..d66e1a8f33f35 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -431,6 +431,9 @@ class Timestamp(_Timestamp): raise ValueError("Cannot pass a datetime or Timestamp with tzinfo with " "the tz parameter. Use tz_convert instead.") + if getattr(ts_input, 'fold', None) is not None and fold is None: + fold = ts_input.fold + ts = convert_to_tsobject(ts_input, tz, unit, 0, 0, nanosecond or 0, fold) if ts.value == NPY_NAT: From 92e990e6f784c43c5c393092c818812f906a76a9 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 31 Jan 2020 16:12:02 +0300 Subject: [PATCH 023/128] TST: adjust datetime for included fold --- pandas/tests/indexes/datetimes/test_constructors.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index ed0bae112331d..3316115d4084d 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -1007,6 +1007,8 @@ def test_timestamp_constructor_infer_fold_from_value(tz, value_fold): ("2019-10-27 01:30:00", 1, 1572139800000000000), (datetime(2019, 10, 27, 1, 30, 0, 0), 0, 1572136200000000000), (datetime(2019, 10, 27, 1, 30, 0, 0), 1, 1572139800000000000), + (datetime(2019, 10, 27, 1, 30, 0, 0, fold=0), None, 1572136200000000000), + (datetime(2019, 10, 27, 1, 30, 0, 0, fold=1), None, 1572139800000000000), ], ) def test_timestamp_constructor_adjust_value_for_fold(tz, value_fold): From c2189d3c208fd65b3f63d9e471d4816569cef1b5 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 31 Jan 2020 16:13:09 +0300 Subject: [PATCH 024/128] TST: infer fold from fold included in datetime --- pandas/tests/indexes/datetimes/test_constructors.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 3316115d4084d..64836e4d34993 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -985,6 +985,8 @@ def test_timestamp_constructor_retain_fold(tz, fold): (1572139800000000000, 1), ("2019-10-27 01:30:00+01:00", 0), ("2019-10-27 01:30:00+00:00", 1), + (datetime(2019, 10, 27, 1, 30, 0, 0, fold=0), 0), + (datetime(2019, 10, 27, 1, 30, 0, 0, fold=1), 1), ], ) def test_timestamp_constructor_infer_fold_from_value(tz, value_fold): From 9c9c2dd174ac235e7a139616130fb8d8701944be Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 31 Jan 2020 19:05:16 +0300 Subject: [PATCH 025/128] FIX: fix bool condition for inferring fold for str --- pandas/_libs/tslibs/conversion.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 175d17a40a3f0..c4a1ea4917f5a 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -371,7 +371,7 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, obj.dts.ps = nanos * 1000 obj.fold = fold - if fold_delta > 0: + if obj.fold == 1: obj.value += fold_delta # Datetime puts us into a fold for an ambiguous timestamp From f0bbbcb9fa1902d1226435756faed1bf966976d4 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 31 Jan 2020 20:32:47 +0300 Subject: [PATCH 026/128] remove unnecessary value shift for inferring fold from str --- pandas/_libs/tslibs/conversion.pxd | 3 +-- pandas/_libs/tslibs/conversion.pyx | 15 ++++----------- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index 93577692e48c8..ed933fd76cf89 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -20,8 +20,7 @@ cdef convert_to_tsobject(object ts, object tz, object unit, int32_t nanos=*, bint fold=*) cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, - int32_t nanos=*, bint fold=*, - int64_t fold_delta=*) + int32_t nanos=*, bint fold=*) cdef int64_t get_datetime64_nanos(object val) except? -1 diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index c4a1ea4917f5a..2339c96acbf5e 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -302,8 +302,7 @@ cdef convert_to_tsobject(object ts, object tz, object unit, cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, - int32_t nanos=0, bint fold=0, - int64_t fold_delta=0): + int32_t nanos=0, bint fold=0): """ Convert a datetime (or Timestamp) input `ts`, along with optional timezone object `tz` to a _TSObject. @@ -327,9 +326,6 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, cdef: _TSObject obj = _TSObject() - # TODO: get fold from datetime if it isn't supplied - # change fold to object type first - if tz is not None: tz = maybe_get_tz(tz) @@ -371,8 +367,6 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, obj.dts.ps = nanos * 1000 obj.fold = fold - if obj.fold == 1: - obj.value += fold_delta # Datetime puts us into a fold for an ambiguous timestamp # adjust as necessary @@ -438,8 +432,7 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, return obj # Offset supplied, so infer fold - obj.fold = 0 - fold_delta = 0 + fold = 0 if is_utc(tz): pass elif is_tzlocal(tz): @@ -455,14 +448,14 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, if pos > 0: fold_delta = deltas[pos - 1] - deltas[pos] if obj.value < (trans[pos] + fold_delta): - obj.fold = 1 + fold = 1 # Keep the converter same as PyDateTime's dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day, obj.dts.hour, obj.dts.min, obj.dts.sec, obj.dts.us, obj.tzinfo) obj = convert_datetime_to_tsobject( - dt, tz, nanos=obj.dts.ps // 1000, fold=obj.fold, fold_delta=fold_delta) + dt, tz, nanos=obj.dts.ps // 1000, fold=fold) return obj From ca460786b7e0e4f10b248a4a56e746f1e0d1db42 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 31 Jan 2020 23:58:40 +0300 Subject: [PATCH 027/128] remove unnecessary GH24329 fix --- pandas/_libs/tslibs/conversion.pyx | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 2339c96acbf5e..761e3a437a625 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -344,14 +344,6 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, obj.tzinfo = tz else: obj.value = pydatetime_to_dt64(ts, &obj.dts) - # GH 24329 When datetime is ambiguous, - # pydatetime_to_dt64 doesn't take DST into account - # but with dateutil timezone, get_utcoffset does - # so we need to correct for it - if treat_tz_as_dateutil(ts.tzinfo): - if ts.tzinfo.is_ambiguous(ts): - dst_offset = ts.tzinfo.dst(ts) - obj.value += int(dst_offset.total_seconds() * 1e9) obj.tzinfo = ts.tzinfo if obj.tzinfo is not None and not is_utc(obj.tzinfo): From 411b0360587c0db0de44db3d6e6329858ac30f55 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sat, 1 Feb 2020 09:19:52 +0300 Subject: [PATCH 028/128] pass fold through Timstamp.replace --- pandas/_libs/tslibs/timestamps.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index d66e1a8f33f35..8b9a70e1aa9be 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -988,7 +988,7 @@ default 'raise' 'fold': fold} ts_input = datetime(**kwargs) - ts = convert_datetime_to_tsobject(ts_input, _tzinfo) + ts = convert_datetime_to_tsobject(ts_input, _tzinfo, nanos=0, fold=fold) value = ts.value + (dts.ps // 1000) if value != NPY_NAT: check_dts_bounds(&dts) From 464dadf186f24b8e7df50c40be1244c0e349bd8a Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sat, 1 Feb 2020 09:45:12 +0300 Subject: [PATCH 029/128] TST:fix ambiguous compatibility test in scalar test_timezones --- pandas/tests/scalar/timestamp/test_timezones.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 6537f6ccd8432..cfa7da810ada1 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -140,7 +140,7 @@ def test_tz_localize_ambiguous_compat(self): # see gh-14621 assert result_pytz.to_pydatetime().tzname() == "GMT" assert result_dateutil.to_pydatetime().tzname() == "BST" - assert str(result_pytz) != str(result_dateutil) + assert str(result_pytz) == str(result_dateutil) # 1 hour difference result_pytz = naive.tz_localize(pytz_zone, ambiguous=1) From 2b9f2f6c4538781f97f9d2051db7d025cac9b832 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sat, 1 Feb 2020 09:48:23 +0300 Subject: [PATCH 030/128] TST: remove test for ambiguous error near dst boundary Default behaviour near DST boundary is now for fold=0 --- pandas/tests/scalar/timestamp/test_timezones.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index cfa7da810ada1..912904c62fe95 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -327,9 +327,6 @@ def test_timestamp_constructor_near_dst_boundary(self): expected = Timestamp("2015-10-25 01:00").tz_localize(tz) assert result == expected - with pytest.raises(pytz.AmbiguousTimeError): - Timestamp("2015-10-25 02:00", tz=tz) - result = Timestamp("2017-03-26 01:00", tz="Europe/Paris") expected = Timestamp("2017-03-26 01:00").tz_localize("Europe/Paris") assert result == expected From ef87010ae057aa0a5996112c546201d655343b1a Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sat, 1 Feb 2020 10:54:12 +0300 Subject: [PATCH 031/128] DOC: add fold arg description to functions and methods --- pandas/_libs/tslibs/conversion.pyx | 21 +++++++++++++++++++-- pandas/_libs/tslibs/timestamps.pyx | 6 ++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 761e3a437a625..43ca43bd5ade2 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -235,6 +235,7 @@ cdef convert_to_tsobject(object ts, object tz, object unit, - iso8601 string object - python datetime object - another timestamp object + TODO: update docstring in general and with information on fold Raises ------ @@ -318,6 +319,12 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, timezone for the timezone-aware output nanos : int32_t, default is 0 nanoseconds supplement the precision of the datetime input ts + fold : bint, default is 0 + whether we are in a fold or not. Due to daylight saving time, + one wall clock time can occur twice when shifting from summer to + winter time; fold describes whether the datetime-like corresponds + to the first (0) or the second time (1) the wall clock hits the + ambiguous time Returns ------- @@ -473,6 +480,12 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit, yearfirst : bool, default False When parsing an ambiguous date string, interpret e.g. "01/05/09" as "May 9, 2001", as opposed to the default "Jan 5, 2009" + fold : bint, default is 0 + whether we are in a fold or not. Due to daylight saving time, + one wall clock time can occur twice when shifting from summer to + winter time; fold describes whether the datetime-like corresponds + to the first (0) or the second time (1) the wall clock hits the + ambiguous time Returns ------- @@ -574,8 +587,12 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz, bint fold): ---------- obj : _TSObject tz : tzinfo - fold: bint - TODO: Update docstring with info on how we infer or update fold + fold : bint + whether we are in a fold or not. Due to daylight saving time, + one wall clock time can occur twice when shifting from summer to + winter time; fold describes whether the datetime-like corresponds + to the first (0) or the second time (1) the wall clock hits the + ambiguous time Returns ------- diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 8b9a70e1aa9be..d793891396613 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -196,6 +196,12 @@ class Timestamp(_Timestamp): nanosecond : int, optional, default 0 .. versionadded:: 0.23.0 tzinfo : datetime.tzinfo, optional, default None + fold : int, default is None + whether we are in a fold or not. Due to daylight saving time, + one wall clock time can occur twice when shifting from summer to + winter time; fold describes whether the datetime-like corresponds + to the first (0) or the second time (1) the wall clock hits the + ambiguous time Notes ----- From ecfde580de4f0e2ad8d11f6e8e038ad7b748377f Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sat, 1 Feb 2020 11:05:19 +0300 Subject: [PATCH 032/128] REFACTOR: remove code duplication in localize_tso --- pandas/_libs/tslibs/conversion.pyx | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 43ca43bd5ade2..80e1412bdd105 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -628,26 +628,11 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz, bint fold): # static/fixed tzinfo; in this case we know len(deltas) == 1 # This can come back with `typ` of either "fixed" or None dt64_to_dtstruct(obj.value + deltas[0], &obj.dts) - elif typ == 'pytz': + elif typ == 'pytz' or typ == 'dateutil': # i.e. treat_tz_as_pytz(tz) pos = trans.searchsorted(obj.value, side='right') - 1 - tz = tz._tzinfos[tz._transition_info[pos]] - # Adjust value if fold was supplied - if fold == 1: - # Check if valid fold value - if pos < len(deltas): - fold_delta = deltas[pos] - deltas[pos + 1] - if obj.value + fold_delta > trans[pos + 1]: - obj.value += fold_delta - dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) - # Check if we are in a fold - if pos > 0: - fold_delta = deltas[pos - 1] - deltas[pos] - if obj.value < (trans[pos] + fold_delta): - obj.fold = 1 - elif typ == 'dateutil': - # i.e. treat_tz_as_dateutil(tz) - pos = trans.searchsorted(obj.value, side='right') - 1 + if typ == 'pytz': + tz = tz._tzinfos[tz._transition_info[pos]] # Adjust value if fold was supplied if fold == 1: # Check if valid fold value From 6970ed12142fd8a2c3ae199d62dd2b6063ea5e25 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sat, 1 Feb 2020 11:33:23 +0300 Subject: [PATCH 033/128] DOC: add fold description to ints_to_pydatetime --- pandas/_libs/tslib.pyx | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 4dccf5cb25a01..ce50692d09638 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -83,6 +83,12 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, convert to this timezone freq : str/Offset, default None freq to convert + fold : bint, default is 0 + whether we are in a fold or not. Due to daylight saving time, + one wall clock time can occur twice when shifting from summer to + winter time; fold describes whether the datetime-like corresponds + to the first (0) or the second time (1) the wall clock hits the + ambiguous time box : {'datetime', 'timestamp', 'date', 'time'}, default 'datetime' If datetime, convert to datetime.datetime If date, convert to datetime.date From 353e554665c28e5df73af401731385ab3cf0b34d Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sat, 1 Feb 2020 12:13:08 +0300 Subject: [PATCH 034/128] REFACTOR: make obj.fold assignment process more transparent --- pandas/_libs/tslibs/conversion.pyx | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 80e1412bdd105..b72ef9b65f2a1 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -248,7 +248,7 @@ cdef convert_to_tsobject(object ts, object tz, object unit, tz = maybe_get_tz(tz) obj = _TSObject() - # TODO: remove after moving to localize_tso and convert_str_to_tsobject + obj.fold = 0 if isinstance(ts, str): @@ -365,8 +365,7 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, obj.value += nanos obj.dts.ps = nanos * 1000 - obj.fold = fold - + obj.fold = 0 # Datetime puts us into a fold for an ambiguous timestamp # adjust as necessary if tz is not None: @@ -380,14 +379,14 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, trans, deltas, typ = get_dst_info(tz) # pytz assumes we are in a fold, dateutil - that we are not - if typ == 'pytz' and obj.fold == 0: + if typ == 'pytz' and fold == 0: pos = trans.searchsorted(obj.value, side='right') - 1 # Check if we are in a fold if pos > 0: fold_delta = deltas[pos - 1] - deltas[pos] if obj.value - fold_delta < trans[pos]: obj.value -= fold_delta - elif typ == 'dateutil' and obj.fold == 1: + elif typ == 'dateutil' and fold == 1: pos = trans.searchsorted(obj.value, side='right') - 1 # Check if we are before a fold if pos < len(deltas): @@ -395,6 +394,15 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, if obj.value + fold_delta > trans[pos + 1]: obj.value += fold_delta + # Check if we are in a fold + if typ == 'pytz' or typ == 'dateutil': + pos = trans.searchsorted(obj.value, side='right') - 1 + + if pos > 0: + fold_delta = deltas[pos - 1] - deltas[pos] + if obj.value < (trans[pos] + fold_delta): + obj.fold = 1 + check_dts_bounds(&obj.dts) check_overflows(obj) return obj @@ -611,6 +619,7 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz, bint fold): assert obj.tzinfo is None + obj.fold = 0 if is_utc(tz): pass elif obj.value == NPY_NAT: From bce8f0de78464df25bb8316dc0878b947615e3db Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sat, 1 Feb 2020 12:21:26 +0300 Subject: [PATCH 035/128] DOC: clarify comments for fold adjustment and inferring --- pandas/_libs/tslibs/conversion.pyx | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index b72ef9b65f2a1..42386413a8a6f 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -378,6 +378,7 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, else: trans, deltas, typ = get_dst_info(tz) + # adjust value for fold # pytz assumes we are in a fold, dateutil - that we are not if typ == 'pytz' and fold == 0: pos = trans.searchsorted(obj.value, side='right') - 1 @@ -394,7 +395,7 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, if obj.value + fold_delta > trans[pos + 1]: obj.value += fold_delta - # Check if we are in a fold + # Infer fold if typ == 'pytz' or typ == 'dateutil': pos = trans.searchsorted(obj.value, side='right') - 1 @@ -451,7 +452,7 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, if typ == 'pytz' or typ == 'dateutil': pos = trans.searchsorted(obj.value, side='right') - 1 - # Check if we are in a fold + # Infer fold if pos > 0: fold_delta = deltas[pos - 1] - deltas[pos] if obj.value < (trans[pos] + fold_delta): @@ -650,7 +651,7 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz, bint fold): if obj.value + fold_delta > trans[pos + 1]: obj.value += fold_delta dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) - # Check if we are in a fold + # Infer fold if pos > 0: fold_delta = deltas[pos - 1] - deltas[pos] if obj.value < (trans[pos] + fold_delta): From 558c2375e21770e30f0c3be38a6f7ff0ec480f09 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sat, 1 Feb 2020 12:54:12 +0300 Subject: [PATCH 036/128] REFACTOR: compact datetime_to_tsobject and refactor checks --- pandas/_libs/tslibs/conversion.pyx | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 42386413a8a6f..7a9df97cc0466 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -378,30 +378,26 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, else: trans, deltas, typ = get_dst_info(tz) - # adjust value for fold - # pytz assumes we are in a fold, dateutil - that we are not - if typ == 'pytz' and fold == 0: + if typ == 'pytz' or typ == 'dateutil': pos = trans.searchsorted(obj.value, side='right') - 1 - # Check if we are in a fold - if pos > 0: + + # Adjust for fold + # pytz assumes we are in a fold, dateutil - that we are not + if typ == 'pytz' and fold == 0 and pos > 0: fold_delta = deltas[pos - 1] - deltas[pos] if obj.value - fold_delta < trans[pos]: obj.value -= fold_delta - elif typ == 'dateutil' and fold == 1: - pos = trans.searchsorted(obj.value, side='right') - 1 - # Check if we are before a fold - if pos < len(deltas): + pos -= 1 + elif typ == 'dateutil' and fold == 1 and pos < len(deltas): fold_delta = deltas[pos] - deltas[pos + 1] if obj.value + fold_delta > trans[pos + 1]: obj.value += fold_delta + pos += 1 - # Infer fold - if typ == 'pytz' or typ == 'dateutil': - pos = trans.searchsorted(obj.value, side='right') - 1 - + # Infer fold if pos > 0: fold_delta = deltas[pos - 1] - deltas[pos] - if obj.value < (trans[pos] + fold_delta): + if obj.value - fold_delta < trans[pos]: obj.fold = 1 check_dts_bounds(&obj.dts) @@ -654,7 +650,7 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz, bint fold): # Infer fold if pos > 0: fold_delta = deltas[pos - 1] - deltas[pos] - if obj.value < (trans[pos] + fold_delta): + if obj.value - fold_delta < trans[pos]: obj.fold = 1 else: # Note: as of 2018-07-17 all tzinfo objects that are _not_ From 7b88ffdf048fac6fcacddb0c25c9841f521cda8d Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sat, 1 Feb 2020 12:56:55 +0300 Subject: [PATCH 037/128] DOC: clarify comments for datetime_to_tsobject --- pandas/_libs/tslibs/conversion.pyx | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 7a9df97cc0466..7b294ea045437 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -366,8 +366,6 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, obj.dts.ps = nanos * 1000 obj.fold = 0 - # Datetime puts us into a fold for an ambiguous timestamp - # adjust as necessary if tz is not None: if is_utc(tz): pass @@ -381,7 +379,7 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, if typ == 'pytz' or typ == 'dateutil': pos = trans.searchsorted(obj.value, side='right') - 1 - # Adjust for fold + # obj.value includes tz assumptions, need to adjust # pytz assumes we are in a fold, dateutil - that we are not if typ == 'pytz' and fold == 0 and pos > 0: fold_delta = deltas[pos - 1] - deltas[pos] From 9621c0ab50695ee4686c3f8e7026c382d013695c Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sat, 1 Feb 2020 13:48:11 +0300 Subject: [PATCH 038/128] add pos shift for adjusting value for fold in localize_tso --- pandas/_libs/tslibs/conversion.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 7b294ea045437..7f4347398ff46 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -433,7 +433,7 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, check_overflows(obj) return obj - # Offset supplied, so infer fold + # Can infer fold from offset-adjusted obj.value fold = 0 if is_utc(tz): pass @@ -530,7 +530,6 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit, ts = dtstruct_to_dt64(&dts) if tz is not None: # shift for localize_tso - # TODO: maybe change fold type to object to allow None ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz, ambiguous=not(fold))[0] @@ -644,6 +643,7 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz, bint fold): fold_delta = deltas[pos] - deltas[pos + 1] if obj.value + fold_delta > trans[pos + 1]: obj.value += fold_delta + pos += 1 dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) # Infer fold if pos > 0: From 6bf58b53d145fc0665ebf445be33d256dac17eb7 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sat, 1 Feb 2020 15:34:36 +0300 Subject: [PATCH 039/128] CLN: remove unnecessary comment --- pandas/_libs/tslibs/conversion.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 7f4347398ff46..a522a974f9661 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -632,7 +632,6 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz, bint fold): # This can come back with `typ` of either "fixed" or None dt64_to_dtstruct(obj.value + deltas[0], &obj.dts) elif typ == 'pytz' or typ == 'dateutil': - # i.e. treat_tz_as_pytz(tz) pos = trans.searchsorted(obj.value, side='right') - 1 if typ == 'pytz': tz = tz._tzinfos[tz._transition_info[pos]] From 73364af02681062d4c1d79e2595f97946c07a1d5 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sun, 2 Feb 2020 09:42:58 +0300 Subject: [PATCH 040/128] try to speed up datetime processing --- pandas/_libs/tslibs/conversion.pyx | 52 ++++++++++++++---------------- 1 file changed, 24 insertions(+), 28 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index a522a974f9661..7657dad9d3b63 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -29,7 +29,8 @@ from pandas._libs.tslibs.util cimport ( from pandas._libs.tslibs.timedeltas cimport cast_from_unit from pandas._libs.tslibs.timezones cimport ( is_utc, is_tzlocal, is_fixed_offset, get_utcoffset, get_dst_info, - get_timezone, maybe_get_tz, tz_compare, treat_tz_as_dateutil) + get_timezone, maybe_get_tz, tz_compare, treat_tz_as_dateutil, + treat_tz_as_pytz) from pandas._libs.tslibs.timezones import UTC from pandas._libs.tslibs.parsing import parse_datetime_string @@ -367,36 +368,31 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, obj.fold = 0 if tz is not None: - if is_utc(tz): - pass - elif is_tzlocal(tz): - pass - # TODO: think on how we can infer fold for local Timezone - # and adjust value for fold - else: + # TODO: think on how we can infer fold for local Timezone + # and adjust value for fold + if treat_tz_as_dateutil(tz) or treat_tz_as_pytz(tz): trans, deltas, typ = get_dst_info(tz) - if typ == 'pytz' or typ == 'dateutil': - pos = trans.searchsorted(obj.value, side='right') - 1 - - # obj.value includes tz assumptions, need to adjust - # pytz assumes we are in a fold, dateutil - that we are not - if typ == 'pytz' and fold == 0 and pos > 0: - fold_delta = deltas[pos - 1] - deltas[pos] - if obj.value - fold_delta < trans[pos]: - obj.value -= fold_delta - pos -= 1 - elif typ == 'dateutil' and fold == 1 and pos < len(deltas): - fold_delta = deltas[pos] - deltas[pos + 1] - if obj.value + fold_delta > trans[pos + 1]: - obj.value += fold_delta - pos += 1 + pos = trans.searchsorted(obj.value, side='right') - 1 - # Infer fold - if pos > 0: - fold_delta = deltas[pos - 1] - deltas[pos] - if obj.value - fold_delta < trans[pos]: - obj.fold = 1 + # obj.value includes tz assumptions, need to adjust + # pytz assumes we are in a fold, dateutil - that we are not + if typ == 'pytz' and fold == 0 and pos > 0: + fold_delta = deltas[pos - 1] - deltas[pos] + if obj.value - fold_delta < trans[pos]: + obj.value -= fold_delta + pos -= 1 + elif typ == 'dateutil' and fold == 1 and pos < len(deltas): + fold_delta = deltas[pos] - deltas[pos + 1] + if obj.value + fold_delta > trans[pos + 1]: + obj.value += fold_delta + pos += 1 + + # Infer fold + if pos > 0: + fold_delta = deltas[pos - 1] - deltas[pos] + if obj.value - fold_delta < trans[pos]: + obj.fold = 1 check_dts_bounds(&obj.dts) check_overflows(obj) From a8ad96caecfec345168d5c8d52e4fab825dfd200 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sun, 2 Feb 2020 09:54:14 +0300 Subject: [PATCH 041/128] Revert "try to speed up datetime processing" - does not work --- pandas/_libs/tslibs/conversion.pyx | 52 ++++++++++++++++-------------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 7657dad9d3b63..a522a974f9661 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -29,8 +29,7 @@ from pandas._libs.tslibs.util cimport ( from pandas._libs.tslibs.timedeltas cimport cast_from_unit from pandas._libs.tslibs.timezones cimport ( is_utc, is_tzlocal, is_fixed_offset, get_utcoffset, get_dst_info, - get_timezone, maybe_get_tz, tz_compare, treat_tz_as_dateutil, - treat_tz_as_pytz) + get_timezone, maybe_get_tz, tz_compare, treat_tz_as_dateutil) from pandas._libs.tslibs.timezones import UTC from pandas._libs.tslibs.parsing import parse_datetime_string @@ -368,31 +367,36 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, obj.fold = 0 if tz is not None: - # TODO: think on how we can infer fold for local Timezone - # and adjust value for fold - if treat_tz_as_dateutil(tz) or treat_tz_as_pytz(tz): + if is_utc(tz): + pass + elif is_tzlocal(tz): + pass + # TODO: think on how we can infer fold for local Timezone + # and adjust value for fold + else: trans, deltas, typ = get_dst_info(tz) - pos = trans.searchsorted(obj.value, side='right') - 1 - - # obj.value includes tz assumptions, need to adjust - # pytz assumes we are in a fold, dateutil - that we are not - if typ == 'pytz' and fold == 0 and pos > 0: - fold_delta = deltas[pos - 1] - deltas[pos] - if obj.value - fold_delta < trans[pos]: - obj.value -= fold_delta - pos -= 1 - elif typ == 'dateutil' and fold == 1 and pos < len(deltas): - fold_delta = deltas[pos] - deltas[pos + 1] - if obj.value + fold_delta > trans[pos + 1]: - obj.value += fold_delta - pos += 1 + if typ == 'pytz' or typ == 'dateutil': + pos = trans.searchsorted(obj.value, side='right') - 1 + + # obj.value includes tz assumptions, need to adjust + # pytz assumes we are in a fold, dateutil - that we are not + if typ == 'pytz' and fold == 0 and pos > 0: + fold_delta = deltas[pos - 1] - deltas[pos] + if obj.value - fold_delta < trans[pos]: + obj.value -= fold_delta + pos -= 1 + elif typ == 'dateutil' and fold == 1 and pos < len(deltas): + fold_delta = deltas[pos] - deltas[pos + 1] + if obj.value + fold_delta > trans[pos + 1]: + obj.value += fold_delta + pos += 1 - # Infer fold - if pos > 0: - fold_delta = deltas[pos - 1] - deltas[pos] - if obj.value - fold_delta < trans[pos]: - obj.fold = 1 + # Infer fold + if pos > 0: + fold_delta = deltas[pos - 1] - deltas[pos] + if obj.value - fold_delta < trans[pos]: + obj.fold = 1 check_dts_bounds(&obj.dts) check_overflows(obj) From 104c97d4c58186b80eb67975d9f0fb2b2cccfbe7 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sun, 2 Feb 2020 10:06:04 +0300 Subject: [PATCH 042/128] CLN: remove unnecessary fold assignment in timestamps --- pandas/_libs/tslibs/timestamps.pyx | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index d793891396613..5bfa1bf6e4c86 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -451,10 +451,6 @@ class Timestamp(_Timestamp): elif not is_offset_object(freq): freq = to_offset(freq) - # TODO: remove after incorporating fold into conversion - if ts.fold == 0 and fold == 1: - ts.fold = fold - return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq, ts.fold) def _round(self, freq, mode, ambiguous='raise', nonexistent='raise'): From 1f9e81020a243ce6ff65e7aa685b40df3f87e6af Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sun, 2 Feb 2020 10:28:41 +0300 Subject: [PATCH 043/128] DOC: add whatsnew entry --- doc/source/whatsnew/v1.1.0.rst | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index e07a8fa0469f4..c65b48a40ad92 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -36,8 +36,27 @@ For example: ser["2014"] ser.loc["May 2015"] +.. _whatsnew_110.timestamp_fold_support: + +Fold argument support in Timestamp constructor +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:class:`Timestamp: now supports the fold argument according to PEP 495 similar to parent `pydatetime` class. It supports both accepting fold as an initialization argument and inferring fold from other constructor arguments. + +For example: + .. _whatsnew_110.enhancements.other: +.. ipython:: python + + ts = Timestamp("2019-10-27 01:30:00+00:00") + ts.fold + +.. ipython:: python + + ts = Timestamp("2019-10-27 01:30:00", fold=1) + ts + Other enhancements ^^^^^^^^^^^^^^^^^^ From 55f0b8ad022f390b0060599b0f0d5d7343af1440 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sun, 2 Feb 2020 11:11:58 +0300 Subject: [PATCH 044/128] DOC: fix whatsnew --- doc/source/whatsnew/v1.1.0.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index c65b48a40ad92..2aeeb4d757fcf 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -45,8 +45,6 @@ Fold argument support in Timestamp constructor For example: -.. _whatsnew_110.enhancements.other: - .. ipython:: python ts = Timestamp("2019-10-27 01:30:00+00:00") @@ -57,6 +55,8 @@ For example: ts = Timestamp("2019-10-27 01:30:00", fold=1) ts +.. _whatsnew_110.enhancements.other: + Other enhancements ^^^^^^^^^^^^^^^^^^ From 106508585f08796ebd799f65ed7d25b352467c24 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Mon, 3 Feb 2020 09:38:14 +0300 Subject: [PATCH 045/128] CLN: finish merging --- .../tests/indexes/datetimes/test_constructors.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index dec0369cbe895..44c18f2ef6e8f 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -950,21 +950,6 @@ def test_datetimeindex_constructor_misc(self): ) assert len(idx1) == len(idx2) assert idx1.freq == idx2.freq -<<<<<<< HEAD - - -def test_timedelta_constructor_identity(): - # Test for #30543 - expected = pd.Timedelta(np.timedelta64(1, "s")) - result = pd.Timedelta(expected) - assert result is expected - - -def test_timestamp_constructor_identity(): - # Test for #30543 - expected = pd.Timestamp("2017-01-01T12") - result = pd.Timestamp(expected) - assert result is expected @pytest.mark.parametrize("tz", ["dateutil/Europe/London", "Europe/London"]) From f9c69566657e20a0e132768db3daf51d2ec57a14 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Mon, 3 Feb 2020 11:11:33 +0300 Subject: [PATCH 046/128] cut the logic to benchmark function signatures --- pandas/_libs/tslibs/conversion.pyx | 60 ------------------------------ 1 file changed, 60 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 2d0ba15503016..97b0d2e45fb8d 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -366,37 +366,6 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, obj.dts.ps = nanos * 1000 obj.fold = 0 - if tz is not None: - if is_utc(tz): - pass - elif is_tzlocal(tz): - pass - # TODO: think on how we can infer fold for local Timezone - # and adjust value for fold - else: - trans, deltas, typ = get_dst_info(tz) - - if typ == 'pytz' or typ == 'dateutil': - pos = trans.searchsorted(obj.value, side='right') - 1 - - # obj.value includes tz assumptions, need to adjust - # pytz assumes we are in a fold, dateutil - that we are not - if typ == 'pytz' and fold == 0 and pos > 0: - fold_delta = deltas[pos - 1] - deltas[pos] - if obj.value - fold_delta < trans[pos]: - obj.value -= fold_delta - pos -= 1 - elif typ == 'dateutil' and fold == 1 and pos < len(deltas): - fold_delta = deltas[pos] - deltas[pos + 1] - if obj.value + fold_delta > trans[pos + 1]: - obj.value += fold_delta - pos += 1 - - # Infer fold - if pos > 0: - fold_delta = deltas[pos - 1] - deltas[pos] - if obj.value - fold_delta < trans[pos]: - obj.fold = 1 check_dts_bounds(&obj.dts) check_overflows(obj) @@ -435,22 +404,6 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, # Can infer fold from offset-adjusted obj.value fold = 0 - if is_utc(tz): - pass - elif is_tzlocal(tz): - pass - # TODO: think on how we can infer fold for local Timezone - # and adjust value for fold - else: - trans, deltas, typ = get_dst_info(tz) - - if typ == 'pytz' or typ == 'dateutil': - pos = trans.searchsorted(obj.value, side='right') - 1 - # Infer fold - if pos > 0: - fold_delta = deltas[pos - 1] - deltas[pos] - if obj.value < (trans[pos] + fold_delta): - fold = 1 # Keep the converter same as PyDateTime's dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day, @@ -635,20 +588,7 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz, bint fold): pos = trans.searchsorted(obj.value, side='right') - 1 if typ == 'pytz': tz = tz._tzinfos[tz._transition_info[pos]] - # Adjust value if fold was supplied - if fold == 1: - # Check if valid fold value - if pos < len(deltas): - fold_delta = deltas[pos] - deltas[pos + 1] - if obj.value + fold_delta > trans[pos + 1]: - obj.value += fold_delta - pos += 1 dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) - # Infer fold - if pos > 0: - fold_delta = deltas[pos - 1] - deltas[pos] - if obj.value - fold_delta < trans[pos]: - obj.fold = 1 else: # Note: as of 2018-07-17 all tzinfo objects that are _not_ # either pytz or dateutil have is_fixed_offset(tz) == True, From 62d5d6bbdff28f444fa3dcd491a177d50d194c89 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Mon, 3 Feb 2020 13:40:03 +0300 Subject: [PATCH 047/128] Revert "cut the logic to benchmark function signatures" - return to original --- pandas/_libs/tslibs/conversion.pyx | 60 ++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 97b0d2e45fb8d..2d0ba15503016 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -366,6 +366,37 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, obj.dts.ps = nanos * 1000 obj.fold = 0 + if tz is not None: + if is_utc(tz): + pass + elif is_tzlocal(tz): + pass + # TODO: think on how we can infer fold for local Timezone + # and adjust value for fold + else: + trans, deltas, typ = get_dst_info(tz) + + if typ == 'pytz' or typ == 'dateutil': + pos = trans.searchsorted(obj.value, side='right') - 1 + + # obj.value includes tz assumptions, need to adjust + # pytz assumes we are in a fold, dateutil - that we are not + if typ == 'pytz' and fold == 0 and pos > 0: + fold_delta = deltas[pos - 1] - deltas[pos] + if obj.value - fold_delta < trans[pos]: + obj.value -= fold_delta + pos -= 1 + elif typ == 'dateutil' and fold == 1 and pos < len(deltas): + fold_delta = deltas[pos] - deltas[pos + 1] + if obj.value + fold_delta > trans[pos + 1]: + obj.value += fold_delta + pos += 1 + + # Infer fold + if pos > 0: + fold_delta = deltas[pos - 1] - deltas[pos] + if obj.value - fold_delta < trans[pos]: + obj.fold = 1 check_dts_bounds(&obj.dts) check_overflows(obj) @@ -404,6 +435,22 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, # Can infer fold from offset-adjusted obj.value fold = 0 + if is_utc(tz): + pass + elif is_tzlocal(tz): + pass + # TODO: think on how we can infer fold for local Timezone + # and adjust value for fold + else: + trans, deltas, typ = get_dst_info(tz) + + if typ == 'pytz' or typ == 'dateutil': + pos = trans.searchsorted(obj.value, side='right') - 1 + # Infer fold + if pos > 0: + fold_delta = deltas[pos - 1] - deltas[pos] + if obj.value < (trans[pos] + fold_delta): + fold = 1 # Keep the converter same as PyDateTime's dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day, @@ -588,7 +635,20 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz, bint fold): pos = trans.searchsorted(obj.value, side='right') - 1 if typ == 'pytz': tz = tz._tzinfos[tz._transition_info[pos]] + # Adjust value if fold was supplied + if fold == 1: + # Check if valid fold value + if pos < len(deltas): + fold_delta = deltas[pos] - deltas[pos + 1] + if obj.value + fold_delta > trans[pos + 1]: + obj.value += fold_delta + pos += 1 dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) + # Infer fold + if pos > 0: + fold_delta = deltas[pos - 1] - deltas[pos] + if obj.value - fold_delta < trans[pos]: + obj.fold = 1 else: # Note: as of 2018-07-17 all tzinfo objects that are _not_ # either pytz or dateutil have is_fixed_offset(tz) == True, From 6294ee99e6a59b92feb61c36c010629c5b11c592 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Mon, 3 Feb 2020 15:27:30 +0300 Subject: [PATCH 048/128] PERF: change fold to fold or 0 --- pandas/_libs/tslibs/timestamps.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 5bfa1bf6e4c86..800b6eacd6976 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -440,7 +440,8 @@ class Timestamp(_Timestamp): if getattr(ts_input, 'fold', None) is not None and fold is None: fold = ts_input.fold - ts = convert_to_tsobject(ts_input, tz, unit, 0, 0, nanosecond or 0, fold) + ts = convert_to_tsobject(ts_input, tz, unit, 0, 0, nanosecond or 0, + fold or 0) if ts.value == NPY_NAT: return NaT From f13e3d73746e685a127d0d2e9ca2d91e6f1127a0 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Mon, 3 Feb 2020 19:21:04 +0300 Subject: [PATCH 049/128] CLN: clean up TODOs --- pandas/_libs/tslibs/conversion.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 2d0ba15503016..c3555e21fb61c 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -235,7 +235,6 @@ cdef convert_to_tsobject(object ts, object tz, object unit, - iso8601 string object - python datetime object - another timestamp object - TODO: update docstring in general and with information on fold Raises ------ From eade807a9224694fd1ad643b3add73a360ad75a0 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 4 Feb 2020 10:12:19 +0300 Subject: [PATCH 050/128] CLN: trim comments, add issues to whatsnew --- doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/_libs/tslibs/conversion.pyx | 8 ++++---- pandas/_libs/tslibs/timestamps.pyx | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 966a01b7814dd..ca0f096926259 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -41,7 +41,7 @@ For example: Fold argument support in Timestamp constructor ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -:class:`Timestamp: now supports the fold argument according to PEP 495 similar to parent `pydatetime` class. It supports both accepting fold as an initialization argument and inferring fold from other constructor arguments. +:class:`Timestamp: now supports the fold argument according to PEP 495 similar to parent `pydatetime` class. It supports both accepting fold as an initialization argument and inferring fold from other constructor arguments (:issue:`25057`, :issue:`31338`). For example: diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index c3555e21fb61c..d7cfe6a3a1fdd 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -319,7 +319,7 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, nanos : int32_t, default is 0 nanoseconds supplement the precision of the datetime input ts fold : bint, default is 0 - whether we are in a fold or not. Due to daylight saving time, + Due to daylight saving time, one wall clock time can occur twice when shifting from summer to winter time; fold describes whether the datetime-like corresponds to the first (0) or the second time (1) the wall clock hits the @@ -483,7 +483,7 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit, When parsing an ambiguous date string, interpret e.g. "01/05/09" as "May 9, 2001", as opposed to the default "Jan 5, 2009" fold : bint, default is 0 - whether we are in a fold or not. Due to daylight saving time, + Due to daylight saving time, one wall clock time can occur twice when shifting from summer to winter time; fold describes whether the datetime-like corresponds to the first (0) or the second time (1) the wall clock hits the @@ -530,7 +530,7 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit, if tz is not None: # shift for localize_tso ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz, - ambiguous=not(fold))[0] + ambiguous=not fold)[0] except OutOfBoundsDatetime: # GH#19382 for just-barely-OutOfBounds falling back to dateutil @@ -589,7 +589,7 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz, bint fold): obj : _TSObject tz : tzinfo fold : bint - whether we are in a fold or not. Due to daylight saving time, + Due to daylight saving time, one wall clock time can occur twice when shifting from summer to winter time; fold describes whether the datetime-like corresponds to the first (0) or the second time (1) the wall clock hits the diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 800b6eacd6976..4e3390eff16b3 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -197,7 +197,7 @@ class Timestamp(_Timestamp): .. versionadded:: 0.23.0 tzinfo : datetime.tzinfo, optional, default None fold : int, default is None - whether we are in a fold or not. Due to daylight saving time, + Due to daylight saving time, one wall clock time can occur twice when shifting from summer to winter time; fold describes whether the datetime-like corresponds to the first (0) or the second time (1) the wall clock hits the From 7269f9acfea2ed2135dc51a5fd3cd7b41fd48103 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 4 Feb 2020 10:16:28 +0300 Subject: [PATCH 051/128] fold: set Tiemstamp default to 0, reformat description --- pandas/_libs/tslibs/conversion.pyx | 27 ++++++++++++--------------- pandas/_libs/tslibs/timestamps.pyx | 4 ++-- 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index d7cfe6a3a1fdd..c612415b35644 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -319,11 +319,10 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, nanos : int32_t, default is 0 nanoseconds supplement the precision of the datetime input ts fold : bint, default is 0 - Due to daylight saving time, - one wall clock time can occur twice when shifting from summer to - winter time; fold describes whether the datetime-like corresponds - to the first (0) or the second time (1) the wall clock hits the - ambiguous time + Due to daylight saving time, one wall clock time can occur twice + when shifting from summer to winter time; fold describes whether the + datetime-like corresponds to the first (0) or the second time (1) + the wall clock hits the ambiguous time Returns ------- @@ -483,11 +482,10 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit, When parsing an ambiguous date string, interpret e.g. "01/05/09" as "May 9, 2001", as opposed to the default "Jan 5, 2009" fold : bint, default is 0 - Due to daylight saving time, - one wall clock time can occur twice when shifting from summer to - winter time; fold describes whether the datetime-like corresponds - to the first (0) or the second time (1) the wall clock hits the - ambiguous time + Due to daylight saving time, one wall clock time can occur twice + when shifting from summer to winter time; fold describes whether the + datetime-like corresponds to the first (0) or the second time (1) + the wall clock hits the ambiguous time Returns ------- @@ -589,11 +587,10 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz, bint fold): obj : _TSObject tz : tzinfo fold : bint - Due to daylight saving time, - one wall clock time can occur twice when shifting from summer to - winter time; fold describes whether the datetime-like corresponds - to the first (0) or the second time (1) the wall clock hits the - ambiguous time + Due to daylight saving time, one wall clock time can occur twice + when shifting from summer to winter time; fold describes whether the + datetime-like corresponds to the first (0) or the second time (1) + the wall clock hits the ambiguous time Returns ------- diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 4e3390eff16b3..92a6080a1888f 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -196,7 +196,7 @@ class Timestamp(_Timestamp): nanosecond : int, optional, default 0 .. versionadded:: 0.23.0 tzinfo : datetime.tzinfo, optional, default None - fold : int, default is None + fold : int, default is 0 Due to daylight saving time, one wall clock time can occur twice when shifting from summer to winter time; fold describes whether the datetime-like corresponds @@ -358,7 +358,7 @@ class Timestamp(_Timestamp): microsecond=None, nanosecond=None, tzinfo=None, - fold=None + fold=0 ): # The parameter list folds together legacy parameter names (the first # four) and positional and keyword parameter names from pydatetime. From d650086fde6dddac2652863d8df735b6542c7c3e Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 4 Feb 2020 10:21:06 +0300 Subject: [PATCH 052/128] rollback Timestamp fold default to None Default 0 breaks tests --- pandas/_libs/tslibs/timestamps.pyx | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 92a6080a1888f..4469a680bbeee 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -196,12 +196,11 @@ class Timestamp(_Timestamp): nanosecond : int, optional, default 0 .. versionadded:: 0.23.0 tzinfo : datetime.tzinfo, optional, default None - fold : int, default is 0 - Due to daylight saving time, - one wall clock time can occur twice when shifting from summer to - winter time; fold describes whether the datetime-like corresponds - to the first (0) or the second time (1) the wall clock hits the - ambiguous time + fold : int, default is None + Due to daylight saving time, one wall clock time can occur twice + when shifting from summer to winter time; fold describes whether the + datetime-like corresponds to the first (0) or the second time (1) + the wall clock hits the ambiguous time Notes ----- @@ -358,7 +357,7 @@ class Timestamp(_Timestamp): microsecond=None, nanosecond=None, tzinfo=None, - fold=0 + fold=None ): # The parameter list folds together legacy parameter names (the first # four) and positional and keyword parameter names from pydatetime. From 693cb6c68f7fd31350777bb5403ab31e0d33e0f6 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 4 Feb 2020 11:45:46 +0300 Subject: [PATCH 053/128] REFACT: move value adjustment for fold to function --- pandas/_libs/tslibs/conversion.pyx | 69 +++++++++++++++++++++++------- 1 file changed, 53 insertions(+), 16 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index c612415b35644..6aefaae5df08d 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -379,16 +379,12 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, # obj.value includes tz assumptions, need to adjust # pytz assumes we are in a fold, dateutil - that we are not - if typ == 'pytz' and fold == 0 and pos > 0: - fold_delta = deltas[pos - 1] - deltas[pos] - if obj.value - fold_delta < trans[pos]: - obj.value -= fold_delta - pos -= 1 - elif typ == 'dateutil' and fold == 1 and pos < len(deltas): - fold_delta = deltas[pos] - deltas[pos + 1] - if obj.value + fold_delta > trans[pos + 1]: - obj.value += fold_delta - pos += 1 + if (typ == 'pytz' and fold == 0): + pos = _adjust_tsobject_for_fold(obj, trans, deltas, pos, + fold) + elif (typ == 'dateutil' and fold == 1): + pos = _adjust_tsobject_for_fold(obj, trans, deltas, pos, + fold) # Infer fold if pos > 0: @@ -633,12 +629,7 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz, bint fold): tz = tz._tzinfos[tz._transition_info[pos]] # Adjust value if fold was supplied if fold == 1: - # Check if valid fold value - if pos < len(deltas): - fold_delta = deltas[pos] - deltas[pos + 1] - if obj.value + fold_delta > trans[pos + 1]: - obj.value += fold_delta - pos += 1 + pos = _adjust_tsobject_for_fold(obj, trans, deltas, pos, fold) dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) # Infer fold if pos > 0: @@ -654,6 +645,52 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz, bint fold): obj.tzinfo = tz +cdef inline int32_t _adjust_tsobject_for_fold(_TSObject obj, object trans, + object deltas, int32_t pos, + bint fold): + """ + Adjust _TSObject value for fold is possible. Return updated last offset + transition position in the trans list. + + Parameters + ---------- + obj : _TSObject + trans : object + List of offset transition points in nanoseconds since epoch. + deltas : object + List of offsets corresponding to transition points in trans. + pos : int32_t + Position of the last transition point before taking fold into account. + fold : bint + Due to daylight saving time, one wall clock time can occur twice + when shifting from summer to winter time; fold describes whether the + datetime-like corresponds to the first (0) or the second time (1) + the wall clock hits the ambiguous time + + Returns + ------- + int32_t + Position of the last transition point after taking fold into account. + + Notes + ----- + Alters obj.value inplace. + """ + if fold == 0: + if pos > 0: + fold_delta = deltas[pos - 1] - deltas[pos] + if obj.value - fold_delta < trans[pos]: + obj.value -= fold_delta + pos -= 1 + elif fold == 1: + if pos < len(deltas): + fold_delta = deltas[pos] - deltas[pos + 1] + if obj.value + fold_delta > trans[pos + 1]: + obj.value += fold_delta + pos += 1 + + return pos + cdef inline datetime _localize_pydatetime(datetime dt, tzinfo tz): """ Take a datetime/Timestamp in UTC and localizes to timezone tz. From 2fe9ce7e8baa3319781872d90f1e62a7b9af6642 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 4 Feb 2020 12:52:04 +0300 Subject: [PATCH 054/128] REFACT: move inferring fold to a function --- pandas/_libs/tslibs/conversion.pyx | 54 +++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 6aefaae5df08d..ff884eefd5390 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -386,11 +386,7 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, pos = _adjust_tsobject_for_fold(obj, trans, deltas, pos, fold) - # Infer fold - if pos > 0: - fold_delta = deltas[pos - 1] - deltas[pos] - if obj.value - fold_delta < trans[pos]: - obj.fold = 1 + obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos) check_dts_bounds(&obj.dts) check_overflows(obj) @@ -440,11 +436,7 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, if typ == 'pytz' or typ == 'dateutil': pos = trans.searchsorted(obj.value, side='right') - 1 - # Infer fold - if pos > 0: - fold_delta = deltas[pos - 1] - deltas[pos] - if obj.value < (trans[pos] + fold_delta): - fold = 1 + fold = _infer_tsobject_fold(obj, trans, deltas, pos) # Keep the converter same as PyDateTime's dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day, @@ -631,11 +623,8 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz, bint fold): if fold == 1: pos = _adjust_tsobject_for_fold(obj, trans, deltas, pos, fold) dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) - # Infer fold - if pos > 0: - fold_delta = deltas[pos - 1] - deltas[pos] - if obj.value - fold_delta < trans[pos]: - obj.fold = 1 + + obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos) else: # Note: as of 2018-07-17 all tzinfo objects that are _not_ # either pytz or dateutil have is_fixed_offset(tz) == True, @@ -691,6 +680,41 @@ cdef inline int32_t _adjust_tsobject_for_fold(_TSObject obj, object trans, return pos + +cdef inline bint _infer_tsobject_fold(_TSObject obj, object trans, + object deltas, int32_t pos): + """ + Infer _TSObject fold property from value by assuming 0 and then setting + to 1 if necessary. + + Parameters + ---------- + obj : _TSObject + trans : object + List of offset transition points in nanoseconds since epoch. + deltas : object + List of offsets corresponding to transition points in trans. + pos : int32_t + Position of the last transition point before taking fold into account. + + Returns + ------- + bint + Due to daylight saving time, one wall clock time can occur twice + when shifting from summer to winter time; fold describes whether the + datetime-like corresponds to the first (0) or the second time (1) + the wall clock hits the ambiguous time + """ + cdef: + bint fold = 0 + + if pos > 0: + fold_delta = deltas[pos - 1] - deltas[pos] + if obj.value - fold_delta < trans[pos]: + fold = 1 + + return fold + cdef inline datetime _localize_pydatetime(datetime dt, tzinfo tz): """ Take a datetime/Timestamp in UTC and localizes to timezone tz. From 3e2c76c61713db28990020375a6c57e36b3a8340 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 4 Feb 2020 12:53:19 +0300 Subject: [PATCH 055/128] CLN: remove unnecessary whitespace --- pandas/_libs/tslibs/conversion.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index ff884eefd5390..87258c0bdba17 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -640,14 +640,14 @@ cdef inline int32_t _adjust_tsobject_for_fold(_TSObject obj, object trans, """ Adjust _TSObject value for fold is possible. Return updated last offset transition position in the trans list. - + Parameters ---------- obj : _TSObject trans : object List of offset transition points in nanoseconds since epoch. deltas : object - List of offsets corresponding to transition points in trans. + List of offsets corresponding to transition points in trans. pos : int32_t Position of the last transition point before taking fold into account. fold : bint From 2f4fdda6f1391a06c328f62db7fa944dd0771dc9 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Wed, 5 Feb 2020 11:02:51 +0300 Subject: [PATCH 056/128] DOC: trim fold arg description in tslib.pyx --- pandas/_libs/tslib.pyx | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index ce50692d09638..5de6fc8976a35 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -84,11 +84,10 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, freq : str/Offset, default None freq to convert fold : bint, default is 0 - whether we are in a fold or not. Due to daylight saving time, - one wall clock time can occur twice when shifting from summer to - winter time; fold describes whether the datetime-like corresponds - to the first (0) or the second time (1) the wall clock hits the - ambiguous time + Due to daylight saving time, one wall clock time can occur twice + when shifting from summer to winter time; fold describes whether the + datetime-like corresponds to the first (0) or the second time (1) + the wall clock hits the ambiguous time box : {'datetime', 'timestamp', 'date', 'time'}, default 'datetime' If datetime, convert to datetime.datetime If date, convert to datetime.date From 9f7a16ebbbf98028ad3763d51f8c6b554a94725f Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Wed, 5 Feb 2020 11:30:38 +0300 Subject: [PATCH 057/128] REFACT: tighten code in datetime_to_tsobject --- pandas/_libs/tslibs/conversion.pyx | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 87258c0bdba17..665d63d3d87a3 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -376,16 +376,12 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, if typ == 'pytz' or typ == 'dateutil': pos = trans.searchsorted(obj.value, side='right') - 1 - - # obj.value includes tz assumptions, need to adjust - # pytz assumes we are in a fold, dateutil - that we are not - if (typ == 'pytz' and fold == 0): - pos = _adjust_tsobject_for_fold(obj, trans, deltas, pos, - fold) - elif (typ == 'dateutil' and fold == 1): + # pytz assumes fold == 1, dateutil fold == 0 + # adjust only if necessary + if (typ == 'pytz' and fold == 0) or \ + (typ == 'dateutil' and fold == 1): pos = _adjust_tsobject_for_fold(obj, trans, deltas, pos, fold) - obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos) check_dts_bounds(&obj.dts) From a6d37ea86a40950c1a519de9e79ce8cdc06a5cf3 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Wed, 5 Feb 2020 11:51:03 +0300 Subject: [PATCH 058/128] REFACT: set TSObject default fold to 0 --- pandas/_libs/tslibs/conversion.pyx | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 665d63d3d87a3..0a8051a3f4426 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -216,6 +216,11 @@ cdef class _TSObject: # npy_datetimestruct dts # npy_datetimestruct # int64_t value # numpy dt64 # object tzinfo + # bint fold, default 0 + + # Set fold to 0 by default + def __cinit__(self): + self.fold = 0 @property def value(self): @@ -248,8 +253,6 @@ cdef convert_to_tsobject(object ts, object tz, object unit, obj = _TSObject() - obj.fold = 0 - if isinstance(ts, str): return convert_str_to_tsobject(ts, tz, unit, dayfirst, yearfirst, fold) @@ -363,7 +366,6 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, obj.value += nanos obj.dts.ps = nanos * 1000 - obj.fold = 0 if tz is not None: if is_utc(tz): pass @@ -593,7 +595,6 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz, bint fold): assert obj.tzinfo is None - obj.fold = 0 if is_utc(tz): pass elif obj.value == NPY_NAT: From a0179538a53e6a74b6545a5c2ded4c3e258b6ad9 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Wed, 5 Feb 2020 11:59:48 +0300 Subject: [PATCH 059/128] CLN: lint multiline if in datetime_to_tsobject --- pandas/_libs/tslibs/conversion.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 0a8051a3f4426..5ab0abc5a988e 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -380,8 +380,8 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, pos = trans.searchsorted(obj.value, side='right') - 1 # pytz assumes fold == 1, dateutil fold == 0 # adjust only if necessary - if (typ == 'pytz' and fold == 0) or \ - (typ == 'dateutil' and fold == 1): + if ((typ == 'pytz' and fold == 0) or + (typ == 'dateutil' and fold == 1)): pos = _adjust_tsobject_for_fold(obj, trans, deltas, pos, fold) obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos) From 1d716e7ff46a0d5f018d38fe9140bafc7fa550cc Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 6 Feb 2020 09:27:39 +0300 Subject: [PATCH 060/128] TST: reparametrize tests --- .../tests/indexes/datetimes/test_constructors.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 44c18f2ef6e8f..656bda17a9ef0 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -965,7 +965,7 @@ def test_timestamp_constructor_retain_fold(tz, fold): @pytest.mark.parametrize("tz", ["dateutil/Europe/London", "Europe/London"]) @pytest.mark.parametrize( - "value_fold", + "ts_input,fold_out", [ (1572136200000000000, 0), (1572139800000000000, 1), @@ -975,19 +975,19 @@ def test_timestamp_constructor_retain_fold(tz, fold): (datetime(2019, 10, 27, 1, 30, 0, 0, fold=1), 1), ], ) -def test_timestamp_constructor_infer_fold_from_value(tz, value_fold): +def test_timestamp_constructor_infer_fold_from_value(tz, ts_input, fold_out): # Test for #25057 # Check that we infer fold correctly based on timestamps since utc # or strings - ts = pd.Timestamp(value_fold[0], tz=tz) + ts = pd.Timestamp(ts_input, tz=tz) result = ts.fold - expected = value_fold[1] + expected = fold_out assert result == expected @pytest.mark.parametrize("tz", ["dateutil/Europe/London", "Europe/London"]) @pytest.mark.parametrize( - "value_fold", + "ts_input,fold,value_out", [ (1572136200000000000, 1, 1572139800000000000), (1572139800000000000, 1, 1572139800000000000), @@ -999,11 +999,11 @@ def test_timestamp_constructor_infer_fold_from_value(tz, value_fold): (datetime(2019, 10, 27, 1, 30, 0, 0, fold=1), None, 1572139800000000000), ], ) -def test_timestamp_constructor_adjust_value_for_fold(tz, value_fold): +def test_timestamp_constructor_adjust_value_for_fold(tz, ts_input, fold, value_out): # Test for #25057 # Check that we adjust value for fold correctly # based on timestamps since utc - ts = pd.Timestamp(value_fold[0], tz=tz, fold=value_fold[1]) + ts = pd.Timestamp(ts_input, tz=tz, fold=fold) result = ts.value - expected = value_fold[2] + expected = value_out assert result == expected From b47efe007025108fd204cbba76ec9e33f3198730 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 6 Feb 2020 11:08:30 +0300 Subject: [PATCH 061/128] raise if ts_input.fold and fold do not match --- pandas/_libs/tslibs/timestamps.pyx | 11 +++++++--- .../indexes/datetimes/test_constructors.py | 22 +++++++++++++++++-- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 4469a680bbeee..e4b311ddb54a5 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -397,6 +397,14 @@ class Timestamp(_Timestamp): # User passed tzinfo instead of tz; avoid silently ignoring tz, tzinfo = tzinfo, None + if getattr(ts_input, 'fold', None) is not None: + if fold is not None: + if ts_input.fold != fold: + raise ValueError("Cannot pass datetime or Timestamp with fold " + "attribute no matching passed fold argument.") + else: + fold = ts_input.fold + # GH 30543 if pd.Timestamp already passed, return it # check that only ts_input is passed # checking verbosely, because cython doesn't optimize @@ -436,9 +444,6 @@ class Timestamp(_Timestamp): raise ValueError("Cannot pass a datetime or Timestamp with tzinfo with " "the tz parameter. Use tz_convert instead.") - if getattr(ts_input, 'fold', None) is not None and fold is None: - fold = ts_input.fold - ts = convert_to_tsobject(ts_input, tz, unit, 0, 0, nanosecond or 0, fold or 0) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 656bda17a9ef0..586c4083df6a4 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -952,6 +952,25 @@ def test_datetimeindex_constructor_misc(self): assert idx1.freq == idx2.freq +@pytest.mark.parametrize("tz", ["dateutil/Europe/London", "Europe/London"]) +@pytest.mark.parametrize( + "ts_input,fold", + [ + (datetime(2019, 10, 27, 1, 30, 0, 0, fold=0), 1), + (datetime(2019, 10, 27, 1, 30, 0, 0, fold=1), 0), + ], +) +def test_timestamp_constructor_fold_conflict(tz, ts_input, fold): + # Test for #25057 + # Check that we raise on fold conflict + msg = ( + "Cannot pass datetime or Timestamp with fold " + "attribute no matching passed fold argument." + ) + with pytest.raises(ValueError, match=msg): + Timestamp(ts_input, tz=tz, fold=fold) + + @pytest.mark.parametrize("tz", ["dateutil/Europe/London", "Europe/London"]) @pytest.mark.parametrize("fold", [0, 1]) def test_timestamp_constructor_retain_fold(tz, fold): @@ -993,8 +1012,7 @@ def test_timestamp_constructor_infer_fold_from_value(tz, ts_input, fold_out): (1572139800000000000, 1, 1572139800000000000), ("2019-10-27 01:30:00", 0, 1572136200000000000), ("2019-10-27 01:30:00", 1, 1572139800000000000), - (datetime(2019, 10, 27, 1, 30, 0, 0), 0, 1572136200000000000), - (datetime(2019, 10, 27, 1, 30, 0, 0), 1, 1572139800000000000), + (datetime(2019, 10, 27, 1, 30, 0, 0, fold=0), 0, 1572136200000000000), (datetime(2019, 10, 27, 1, 30, 0, 0, fold=0), None, 1572136200000000000), (datetime(2019, 10, 27, 1, 30, 0, 0, fold=1), None, 1572139800000000000), ], From cf7c091f23c8c495e6de661369318675ef2fc10c Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 6 Feb 2020 15:30:35 +0300 Subject: [PATCH 062/128] restart tests From 75e1633a84b28c4b12b7fe85d1f1dd67c5733208 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 7 Feb 2020 10:49:58 +0300 Subject: [PATCH 063/128] add ambiguous timezone to whatsnew example --- doc/source/whatsnew/v1.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 8baf1a0247986..5773bbb9c4407 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -52,7 +52,7 @@ For example: .. ipython:: python - ts = Timestamp("2019-10-27 01:30:00", fold=1) + ts = Timestamp("2019-10-27 01:30:00", fold=1, tz="Europe/London") ts .. _whatsnew_110.enhancements.other: From 21883bee4ded1d1c8c9e6270a4efbcce5d74eebe Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 7 Feb 2020 10:52:06 +0300 Subject: [PATCH 064/128] combine is_utc and is_tzlocal --- pandas/_libs/tslibs/conversion.pyx | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 5ab0abc5a988e..8e4e8076ea118 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -367,12 +367,10 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, obj.dts.ps = nanos * 1000 if tz is not None: - if is_utc(tz): - pass - elif is_tzlocal(tz): - pass + if is_utc(tz) or is_tzlocal(tz): # TODO: think on how we can infer fold for local Timezone # and adjust value for fold + pass else: trans, deltas, typ = get_dst_info(tz) @@ -423,12 +421,10 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, # Can infer fold from offset-adjusted obj.value fold = 0 - if is_utc(tz): - pass - elif is_tzlocal(tz): - pass + if is_utc(tz) or is_tzlocal(tz): # TODO: think on how we can infer fold for local Timezone # and adjust value for fold + pass else: trans, deltas, typ = get_dst_info(tz) From b21cb4729f17dbbe7d8328f9fa1bf0ca3e5acac5 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 7 Feb 2020 10:56:07 +0300 Subject: [PATCH 065/128] remove inner parenthesis --- pandas/_libs/tslibs/conversion.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 8e4e8076ea118..ab786a27a4aa0 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -378,8 +378,8 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, pos = trans.searchsorted(obj.value, side='right') - 1 # pytz assumes fold == 1, dateutil fold == 0 # adjust only if necessary - if ((typ == 'pytz' and fold == 0) or - (typ == 'dateutil' and fold == 1)): + if (typ == 'pytz' and fold == 0 or + typ == 'dateutil' and fold == 1): pos = _adjust_tsobject_for_fold(obj, trans, deltas, pos, fold) obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos) From 3ca1fc379835ea45fcb72de28236b2237aab1ea6 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 7 Feb 2020 11:07:56 +0300 Subject: [PATCH 066/128] combine if statements in Timestamp constructor --- pandas/_libs/tslibs/timestamps.pyx | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 07ab35f6c7882..9010774a01ef7 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -397,10 +397,9 @@ class Timestamp(_Timestamp): tz, tzinfo = tzinfo, None if getattr(ts_input, 'fold', None) is not None: - if fold is not None: - if ts_input.fold != fold: - raise ValueError("Cannot pass datetime or Timestamp with fold " - "attribute no matching passed fold argument.") + if fold is not None and ts_input.fold != fold: + raise ValueError("Cannot pass datetime or Timestamp with fold " + "attribute no matching passed fold argument.") else: fold = ts_input.fold From edde4451b76dd493398f5d5b075c7a9765f11eda Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Mon, 10 Feb 2020 09:15:07 +0300 Subject: [PATCH 067/128] CLN: replace "typ == or typ ==" with "typ in [...]" --- pandas/_libs/tslibs/conversion.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index ab786a27a4aa0..25221cec9240c 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -374,7 +374,7 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, else: trans, deltas, typ = get_dst_info(tz) - if typ == 'pytz' or typ == 'dateutil': + if typ in ['pytz', 'dateutil']: pos = trans.searchsorted(obj.value, side='right') - 1 # pytz assumes fold == 1, dateutil fold == 0 # adjust only if necessary @@ -428,7 +428,7 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, else: trans, deltas, typ = get_dst_info(tz) - if typ == 'pytz' or typ == 'dateutil': + if typ in ['pytz', 'dateutil']: pos = trans.searchsorted(obj.value, side='right') - 1 fold = _infer_tsobject_fold(obj, trans, deltas, pos) @@ -608,7 +608,7 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz, bint fold): # static/fixed tzinfo; in this case we know len(deltas) == 1 # This can come back with `typ` of either "fixed" or None dt64_to_dtstruct(obj.value + deltas[0], &obj.dts) - elif typ == 'pytz' or typ == 'dateutil': + elif typ in ['pytz', 'dateutil']: pos = trans.searchsorted(obj.value, side='right') - 1 if typ == 'pytz': tz = tz._tzinfos[tz._transition_info[pos]] From d5925af78950e4e664187b9055da1d57ddaca684 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Mon, 10 Feb 2020 09:17:48 +0300 Subject: [PATCH 068/128] REFACT: move fold assignment to cdef --- pandas/_libs/tslibs/conversion.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 25221cec9240c..a2c60541cab50 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -410,6 +410,7 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, _TSObject obj = _TSObject() int64_t value # numpy dt64 datetime dt + bint fold = 0 value = dtstruct_to_dt64(&dts) obj.dts = dts @@ -419,8 +420,7 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, check_overflows(obj) return obj - # Can infer fold from offset-adjusted obj.value - fold = 0 + # Infer fold from offset-adjusted obj.value if is_utc(tz) or is_tzlocal(tz): # TODO: think on how we can infer fold for local Timezone # and adjust value for fold From b128cde23cad84669910dda7b75102b7943084b7 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Mon, 10 Feb 2020 10:17:13 +0300 Subject: [PATCH 069/128] DOC: add more note and examples for Timestamp.fold --- doc/source/user_guide/timeseries.rst | 11 +++++++++++ pandas/_libs/tslibs/timestamps.pyx | 10 +++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index 3fdab0fd26643..4acbb4953b13c 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -2220,6 +2220,17 @@ you can use the ``tz_convert`` method. rng_pytz.tz_convert('US/Eastern') +.. versionadded:: 1.1.0 + +For ambiguous times, pandas supports explicitly specifying the fold argument. +Due to daylight saving time, one wall clock time can occur twice when shifting +from summer to winter time; fold describes whether the datetime-like corresponds +to the first (0) or the second time (1) the wall clock hits the ambiguous time. + +.. ipython:: python + + pd.Timestamp("2019-10-27 01:30:00", tz='Europe/London', fold=1) + .. note:: When using ``pytz`` time zones, :class:`DatetimeIndex` will construct a different diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 9010774a01ef7..c359615ce9aab 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -195,12 +195,14 @@ class Timestamp(_Timestamp): nanosecond : int, optional, default 0 .. versionadded:: 0.23.0 tzinfo : datetime.tzinfo, optional, default None - fold : int, default is None + fold : {0, 1}, default None Due to daylight saving time, one wall clock time can occur twice when shifting from summer to winter time; fold describes whether the datetime-like corresponds to the first (0) or the second time (1) the wall clock hits the ambiguous time + .. versionadded:: 1.1.0 + Notes ----- There are essentially three calling conventions for the constructor. The @@ -230,6 +232,12 @@ class Timestamp(_Timestamp): >>> pd.Timestamp(1513393355, unit='s', tz='US/Pacific') Timestamp('2017-12-15 19:02:35-0800', tz='US/Pacific') + This converts a datetime-like string representing an ambiguous time + in a particular timezone with fold explicitly supplied: + + >>> pd.Timestamp('2019-10-27 01:30:00', tz='Europe/London', fold=1) + Timestamp('2019-10-27 01:30:00+0000', tz='Europe/London') + Using the other two forms that mimic the API for ``datetime.datetime``: >>> pd.Timestamp(2017, 1, 1, 12) From a673b6539da45f3747cccee47b857a9504d8c8a0 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Mon, 10 Feb 2020 10:23:15 +0300 Subject: [PATCH 070/128] add valid fold value check to Timestamp constructor --- pandas/_libs/tslibs/timestamps.pyx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index c359615ce9aab..3c878b4c4207a 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -411,6 +411,10 @@ class Timestamp(_Timestamp): else: fold = ts_input.fold + if fold is not None and fold not in [0, 1]: + raise ValueError("Valid values for the fold argument are None, 0, " + "or 1.") + # GH 30543 if pd.Timestamp already passed, return it # check that only ts_input is passed # checking verbosely, because cython doesn't optimize From 276fad7c3b46861df09d75a7ef8ddc333eb72831 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Mon, 10 Feb 2020 10:29:23 +0300 Subject: [PATCH 071/128] REFACT: move fold default from _TSObject cinit to explicit Move fold default assignment out of _TSObject __cinit__ method and assign it explicitly whenever we need to infer fold --- pandas/_libs/tslibs/conversion.pyx | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index a2c60541cab50..dba8e551f6f87 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -216,11 +216,7 @@ cdef class _TSObject: # npy_datetimestruct dts # npy_datetimestruct # int64_t value # numpy dt64 # object tzinfo - # bint fold, default 0 - - # Set fold to 0 by default - def __cinit__(self): - self.fold = 0 + # bint fold @property def value(self): @@ -366,6 +362,7 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, obj.value += nanos obj.dts.ps = nanos * 1000 + obj.fold = 0 if tz is not None: if is_utc(tz) or is_tzlocal(tz): # TODO: think on how we can infer fold for local Timezone @@ -591,6 +588,7 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz, bint fold): assert obj.tzinfo is None + obj.fold = 0 if is_utc(tz): pass elif obj.value == NPY_NAT: From 5540de1bdc5f844e8c0455498add413f48afcbfd Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 11 Feb 2020 16:13:56 +0300 Subject: [PATCH 072/128] DOC: fix typo in error message --- pandas/_libs/tslibs/timestamps.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 3c878b4c4207a..add34022af026 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -407,7 +407,7 @@ class Timestamp(_Timestamp): if getattr(ts_input, 'fold', None) is not None: if fold is not None and ts_input.fold != fold: raise ValueError("Cannot pass datetime or Timestamp with fold " - "attribute no matching passed fold argument.") + "attribute not matching passed fold argument.") else: fold = ts_input.fold From 30eef0130447e8b163b76a7ae40e11fccbe822a5 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 11 Feb 2020 16:58:31 +0300 Subject: [PATCH 073/128] TST: fix the rest of typo --- pandas/tests/indexes/datetimes/test_constructors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 4adcbffb9d1cd..96400afb1d8e0 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -974,7 +974,7 @@ def test_timestamp_constructor_fold_conflict(tz, ts_input, fold): # Check that we raise on fold conflict msg = ( "Cannot pass datetime or Timestamp with fold " - "attribute no matching passed fold argument." + "attribute not matching passed fold argument." ) with pytest.raises(ValueError, match=msg): Timestamp(ts_input, tz=tz, fold=fold) From 68b05fc6cc2b7a794ff1ec98b4177bb5eb3a851d Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 11 Feb 2020 19:12:45 +0300 Subject: [PATCH 074/128] change raise bahvior to allow overriding naive datetime --- pandas/_libs/tslibs/timestamps.pyx | 10 ++++++---- .../indexes/datetimes/test_constructors.py | 18 ++++++++---------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index add34022af026..d733e9287d031 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -405,10 +405,12 @@ class Timestamp(_Timestamp): tz, tzinfo = tzinfo, None if getattr(ts_input, 'fold', None) is not None: - if fold is not None and ts_input.fold != fold: - raise ValueError("Cannot pass datetime or Timestamp with fold " - "attribute not matching passed fold argument.") - else: + if (fold is not None and ts_input.fold != fold + and ts_input.tzinfo is not None): + raise ValueError("Cannot pass timezone-aware datetime or " + "Timestamp with fold attribute not matching " + "passed fold argument.") + elif fold is None: fold = ts_input.fold if fold is not None and fold not in [0, 1]: diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 96400afb1d8e0..80e93a868c087 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -961,23 +961,21 @@ def test_pass_datetimeindex_to_index(self): tm.assert_numpy_array_equal(idx.values, expected.values) -@pytest.mark.parametrize("tz", ["dateutil/Europe/London", "Europe/London"]) @pytest.mark.parametrize( - "ts_input,fold", - [ - (datetime(2019, 10, 27, 1, 30, 0, 0, fold=0), 1), - (datetime(2019, 10, 27, 1, 30, 0, 0, fold=1), 0), - ], + "tz", [pytz.timezone("Europe/London"), dateutil.tz.gettz("Europe/London")] ) -def test_timestamp_constructor_fold_conflict(tz, ts_input, fold): +@pytest.mark.parametrize("fold_dt,fold_ts", [(0, 1), (1, 0)]) +def test_timestamp_constructor_fold_conflict(tz, fold_dt, fold_ts): # Test for #25057 # Check that we raise on fold conflict + dt = datetime(2019, 10, 27, 1, 30, 0, 0, fold=fold_dt, tzinfo=tz) msg = ( - "Cannot pass datetime or Timestamp with fold " - "attribute not matching passed fold argument." + "Cannot pass timezone-aware datetime or " + "Timestamp with fold attribute not matching " + "passed fold argument." ) with pytest.raises(ValueError, match=msg): - Timestamp(ts_input, tz=tz, fold=fold) + Timestamp(ts_input=dt, tz=tz, fold=fold_ts) @pytest.mark.parametrize("tz", ["dateutil/Europe/London", "Europe/London"]) From d67ec4f3c164c24d30a1a171463fd64afdfc0ed1 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Wed, 12 Feb 2020 09:56:30 +0300 Subject: [PATCH 075/128] DOC: update localize_tso docstring --- pandas/_libs/tslibs/conversion.pyx | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index dba8e551f6f87..3656df7d68a0d 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -559,7 +559,9 @@ cdef inline check_overflows(_TSObject obj): cdef inline void localize_tso(_TSObject obj, tzinfo tz, bint fold): """ Given the UTC nanosecond timestamp in obj.value, find the wall-clock - representation of that timestamp in the given timezone. + representation of that timestamp in the given timezone. Attempt to shift + over the daylight saving time transition point according to the fold + argument. Parameters ---------- @@ -577,7 +579,7 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz, bint fold): Notes ----- - Sets obj.tzinfo inplace, alters obj.dts inplace. + Sets obj.tzinfo inplace, alters obj.dts inplace, alters obj.value inplace. """ cdef: ndarray[int64_t] trans From 3262085ef0031a08f20c49c8a495581d1b0bc337 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 14 Feb 2020 10:33:47 +0300 Subject: [PATCH 076/128] change override default to raise --- pandas/_libs/tslibs/timestamps.pyx | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index d733e9287d031..2c3eb47dba057 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -11,7 +11,7 @@ from cpython.datetime cimport (datetime, PyDateTime_IMPORT from pandas._libs.tslibs.util cimport ( - is_integer_object, is_offset_object) + is_datetime64_object, is_float_object, is_integer_object, is_offset_object) from pandas._libs.tslibs.c_timestamp cimport _Timestamp cimport pandas._libs.tslibs.ccalendar as ccalendar @@ -404,14 +404,17 @@ class Timestamp(_Timestamp): # User passed tzinfo instead of tz; avoid silently ignoring tz, tzinfo = tzinfo, None - if getattr(ts_input, 'fold', None) is not None: - if (fold is not None and ts_input.fold != fold - and ts_input.tzinfo is not None): - raise ValueError("Cannot pass timezone-aware datetime or " - "Timestamp with fold attribute not matching " - "passed fold argument.") - elif fold is None: - fold = ts_input.fold + # Allow fold only for unambiguous input + if (fold is not None and ts_input is not _no_input and + (is_integer_object(ts_input) or is_float_object(ts_input) or + is_datetime64_object(ts_input) or isinstance(ts_input, str) + or getattr(ts_input, 'tzinfo', None) is not None + )): + raise ValueError( + "Cannot pass fold with possibly unambiguous input: int, float, " + "numpy.datetime64, str, or timezone-aware datetime-like. " + "Pass naive datetime-like or build Timestamp from components." + ) if fold is not None and fold not in [0, 1]: raise ValueError("Valid values for the fold argument are None, 0, " From 4790b762d089c2dbfb8d7d8c1666938b5dca1010 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 14 Feb 2020 14:47:57 +0300 Subject: [PATCH 077/128] finish transition to new behavior --- doc/source/user_guide/timeseries.rst | 7 ++- doc/source/whatsnew/v1.1.0.rst | 9 ++-- pandas/_libs/tslibs/conversion.pxd | 2 +- pandas/_libs/tslibs/conversion.pyx | 52 +++++++------------ pandas/_libs/tslibs/timestamps.pyx | 13 ++--- .../indexes/datetimes/test_constructors.py | 30 +++++------ 6 files changed, 49 insertions(+), 64 deletions(-) diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index 4acbb4953b13c..9c4ca630defd0 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -2226,10 +2226,15 @@ For ambiguous times, pandas supports explicitly specifying the fold argument. Due to daylight saving time, one wall clock time can occur twice when shifting from summer to winter time; fold describes whether the datetime-like corresponds to the first (0) or the second time (1) the wall clock hits the ambiguous time. +Fold is supported only for constructing from naive datetime or :class:`Timestamp` +or for constructing from components (see below). .. ipython:: python - pd.Timestamp("2019-10-27 01:30:00", tz='Europe/London', fold=1) + pd.Timestamp(datetime(2019, 10, 27, 1, 30, 0, 0), tz='dateutil/Europe/London', + fold=0) + pd.Timestamp(year=2019, month=10, day=27, hour=1, minute=30, + tz='dateutil/Europe/London', fold=1) .. note:: diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 5773bbb9c4407..e2378a0adf63b 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -47,13 +47,14 @@ For example: .. ipython:: python - ts = Timestamp("2019-10-27 01:30:00+00:00") - ts.fold + ts = Timestamp("2019-10-27 01:30:00+00:00") + ts.fold .. ipython:: python - ts = Timestamp("2019-10-27 01:30:00", fold=1, tz="Europe/London") - ts + ts = Timestamp(datetime(2019, 10, 27, 1, 30, 0, 0), + tz="dateutil/Europe/London", fold=1) + ts .. _whatsnew_110.enhancements.other: diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index ed933fd76cf89..51db4a8e2e3c8 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -20,7 +20,7 @@ cdef convert_to_tsobject(object ts, object tz, object unit, int32_t nanos=*, bint fold=*) cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, - int32_t nanos=*, bint fold=*) + int32_t nanos=*) cdef int64_t get_datetime64_nanos(object val) except? -1 diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 3656df7d68a0d..49e8f9802d0d8 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -226,7 +226,7 @@ cdef class _TSObject: cdef convert_to_tsobject(object ts, object tz, object unit, bint dayfirst, bint yearfirst, int32_t nanos=0, - bint fold=0): + bint fold=1): """ Extract datetime and int64 from any of: - np.int64 (with unit providing a possible modifier) @@ -248,9 +248,10 @@ cdef convert_to_tsobject(object ts, object tz, object unit, tz = maybe_get_tz(tz) obj = _TSObject() + obj.fold = fold if isinstance(ts, str): - return convert_str_to_tsobject(ts, tz, unit, dayfirst, yearfirst, fold) + return convert_str_to_tsobject(ts, tz, unit, dayfirst, yearfirst) if ts is None or ts is NaT: obj.value = NPY_NAT @@ -278,10 +279,12 @@ cdef convert_to_tsobject(object ts, object tz, object unit, obj.value = ts dt64_to_dtstruct(ts, &obj.dts) elif PyDateTime_Check(ts): - return convert_datetime_to_tsobject(ts, tz, nanos, fold) + ts = ts.replace(fold=obj.fold) + return convert_datetime_to_tsobject(ts, tz, nanos) elif PyDate_Check(ts): # Keep the converter same as PyDateTime's ts = datetime.combine(ts, datetime_time()) + ts = ts.replace(fold=obj.fold) return convert_datetime_to_tsobject(ts, tz) elif getattr(ts, '_typ', None) == 'period': raise ValueError("Cannot convert Period to Timestamp " @@ -291,7 +294,7 @@ cdef convert_to_tsobject(object ts, object tz, object unit, f'Timestamp') if tz is not None: - localize_tso(obj, tz, fold) + localize_tso(obj, tz) if obj.value != NPY_NAT: # check_overflows needs to run after localize_tso @@ -301,7 +304,7 @@ cdef convert_to_tsobject(object ts, object tz, object unit, cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, - int32_t nanos=0, bint fold=0): + int32_t nanos=0): """ Convert a datetime (or Timestamp) input `ts`, along with optional timezone object `tz` to a _TSObject. @@ -317,11 +320,6 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, timezone for the timezone-aware output nanos : int32_t, default is 0 nanoseconds supplement the precision of the datetime input ts - fold : bint, default is 0 - Due to daylight saving time, one wall clock time can occur twice - when shifting from summer to winter time; fold describes whether the - datetime-like corresponds to the first (0) or the second time (1) - the wall clock hits the ambiguous time Returns ------- @@ -330,6 +328,7 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, cdef: _TSObject obj = _TSObject() + obj.fold = ts.fold if tz is not None: tz = maybe_get_tz(tz) @@ -362,7 +361,6 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, obj.value += nanos obj.dts.ps = nanos * 1000 - obj.fold = 0 if tz is not None: if is_utc(tz) or is_tzlocal(tz): # TODO: think on how we can infer fold for local Timezone @@ -375,10 +373,10 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, pos = trans.searchsorted(obj.value, side='right') - 1 # pytz assumes fold == 1, dateutil fold == 0 # adjust only if necessary - if (typ == 'pytz' and fold == 0 or - typ == 'dateutil' and fold == 1): + if (typ == 'pytz' and obj.fold == 0 or + typ == 'dateutil' and obj.fold == 1): pos = _adjust_tsobject_for_fold(obj, trans, deltas, pos, - fold) + obj.fold) obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos) check_dts_bounds(&obj.dts) @@ -407,7 +405,7 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, _TSObject obj = _TSObject() int64_t value # numpy dt64 datetime dt - bint fold = 0 + bint fold value = dtstruct_to_dt64(&dts) obj.dts = dts @@ -432,15 +430,15 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, # Keep the converter same as PyDateTime's dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day, obj.dts.hour, obj.dts.min, obj.dts.sec, - obj.dts.us, obj.tzinfo) + obj.dts.us, obj.tzinfo, fold=fold) obj = convert_datetime_to_tsobject( - dt, tz, nanos=obj.dts.ps // 1000, fold=fold) + dt, tz, nanos=obj.dts.ps // 1000) return obj cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit, bint dayfirst=False, - bint yearfirst=False, bint fold=0): + bint yearfirst=False): """ Convert a string input `ts`, along with optional timezone object`tz` to a _TSObject. @@ -460,11 +458,6 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit, yearfirst : bool, default False When parsing an ambiguous date string, interpret e.g. "01/05/09" as "May 9, 2001", as opposed to the default "Jan 5, 2009" - fold : bint, default is 0 - Due to daylight saving time, one wall clock time can occur twice - when shifting from summer to winter time; fold describes whether the - datetime-like corresponds to the first (0) or the second time (1) - the wall clock hits the ambiguous time Returns ------- @@ -507,7 +500,7 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit, if tz is not None: # shift for localize_tso ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz, - ambiguous=not fold)[0] + ambiguous='raise')[0] except OutOfBoundsDatetime: # GH#19382 for just-barely-OutOfBounds falling back to dateutil @@ -556,7 +549,7 @@ cdef inline check_overflows(_TSObject obj): # ---------------------------------------------------------------------- # Localization -cdef inline void localize_tso(_TSObject obj, tzinfo tz, bint fold): +cdef inline void localize_tso(_TSObject obj, tzinfo tz): """ Given the UTC nanosecond timestamp in obj.value, find the wall-clock representation of that timestamp in the given timezone. Attempt to shift @@ -567,11 +560,6 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz, bint fold): ---------- obj : _TSObject tz : tzinfo - fold : bint - Due to daylight saving time, one wall clock time can occur twice - when shifting from summer to winter time; fold describes whether the - datetime-like corresponds to the first (0) or the second time (1) - the wall clock hits the ambiguous time Returns ------- @@ -590,7 +578,6 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz, bint fold): assert obj.tzinfo is None - obj.fold = 0 if is_utc(tz): pass elif obj.value == NPY_NAT: @@ -612,9 +599,6 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz, bint fold): pos = trans.searchsorted(obj.value, side='right') - 1 if typ == 'pytz': tz = tz._tzinfos[tz._transition_info[pos]] - # Adjust value if fold was supplied - if fold == 1: - pos = _adjust_tsobject_for_fold(obj, trans, deltas, pos, fold) dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 2c3eb47dba057..bb15218e5bc84 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -232,12 +232,6 @@ class Timestamp(_Timestamp): >>> pd.Timestamp(1513393355, unit='s', tz='US/Pacific') Timestamp('2017-12-15 19:02:35-0800', tz='US/Pacific') - This converts a datetime-like string representing an ambiguous time - in a particular timezone with fold explicitly supplied: - - >>> pd.Timestamp('2019-10-27 01:30:00', tz='Europe/London', fold=1) - Timestamp('2019-10-27 01:30:00+0000', tz='Europe/London') - Using the other two forms that mimic the API for ``datetime.datetime``: >>> pd.Timestamp(2017, 1, 1, 12) @@ -416,6 +410,9 @@ class Timestamp(_Timestamp): "Pass naive datetime-like or build Timestamp from components." ) + if getattr(ts_input, 'fold', None) is not None and fold is None: + fold = getattr(ts_input, 'fold', None) + if fold is not None and fold not in [0, 1]: raise ValueError("Valid values for the fold argument are None, 0, " "or 1.") @@ -460,7 +457,7 @@ class Timestamp(_Timestamp): "the tz parameter. Use tz_convert instead.") ts = convert_to_tsobject(ts_input, tz, unit, 0, 0, nanosecond or 0, - fold or 0) + fold) if ts.value == NPY_NAT: return NaT @@ -1010,7 +1007,7 @@ default 'raise' 'fold': fold} ts_input = datetime(**kwargs) - ts = convert_datetime_to_tsobject(ts_input, _tzinfo, nanos=0, fold=fold) + ts = convert_datetime_to_tsobject(ts_input, _tzinfo, nanos=0) value = ts.value + (dts.ps // 1000) if value != NPY_NAT: check_dts_bounds(&dts) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 80e93a868c087..dd0210554bd6b 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -1,4 +1,4 @@ -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from functools import partial from operator import attrgetter @@ -962,20 +962,23 @@ def test_pass_datetimeindex_to_index(self): @pytest.mark.parametrize( - "tz", [pytz.timezone("Europe/London"), dateutil.tz.gettz("Europe/London")] + "ts_input,fold", + [ + (1572136200000000000, 0), + ("2019-10-27 01:30:00+01:00", 0), + (datetime(2019, 10, 27, 0, 30, 0, 0, tzinfo=timezone.utc), 0) + ] ) -@pytest.mark.parametrize("fold_dt,fold_ts", [(0, 1), (1, 0)]) -def test_timestamp_constructor_fold_conflict(tz, fold_dt, fold_ts): +def test_timestamp_constructor_fold_conflict(ts_input, fold): # Test for #25057 # Check that we raise on fold conflict - dt = datetime(2019, 10, 27, 1, 30, 0, 0, fold=fold_dt, tzinfo=tz) msg = ( - "Cannot pass timezone-aware datetime or " - "Timestamp with fold attribute not matching " - "passed fold argument." + "Cannot pass fold with possibly unambiguous input: int, float, " + "numpy.datetime64, str, or timezone-aware datetime-like. " + "Pass naive datetime-like or build Timestamp from components." ) with pytest.raises(ValueError, match=msg): - Timestamp(ts_input=dt, tz=tz, fold=fold_ts) + Timestamp(ts_input=ts_input, fold=fold) @pytest.mark.parametrize("tz", ["dateutil/Europe/London", "Europe/London"]) @@ -1015,13 +1018,8 @@ def test_timestamp_constructor_infer_fold_from_value(tz, ts_input, fold_out): @pytest.mark.parametrize( "ts_input,fold,value_out", [ - (1572136200000000000, 1, 1572139800000000000), - (1572139800000000000, 1, 1572139800000000000), - ("2019-10-27 01:30:00", 0, 1572136200000000000), - ("2019-10-27 01:30:00", 1, 1572139800000000000), - (datetime(2019, 10, 27, 1, 30, 0, 0, fold=0), 0, 1572136200000000000), - (datetime(2019, 10, 27, 1, 30, 0, 0, fold=0), None, 1572136200000000000), - (datetime(2019, 10, 27, 1, 30, 0, 0, fold=1), None, 1572139800000000000), + (datetime(2019, 10, 27, 1, 30, 0, 0), 0, 1572136200000000000), + (datetime(2019, 10, 27, 1, 30, 0, 0), 1, 1572139800000000000), ], ) def test_timestamp_constructor_adjust_value_for_fold(tz, ts_input, fold, value_out): From 353bd8743b01b9a9d88b70f627f3e3631e012d2b Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 14 Feb 2020 14:50:40 +0300 Subject: [PATCH 078/128] DOC: revert localize_tso docstring --- pandas/_libs/tslibs/conversion.pyx | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 49e8f9802d0d8..205596f74bb1f 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -552,9 +552,7 @@ cdef inline check_overflows(_TSObject obj): cdef inline void localize_tso(_TSObject obj, tzinfo tz): """ Given the UTC nanosecond timestamp in obj.value, find the wall-clock - representation of that timestamp in the given timezone. Attempt to shift - over the daylight saving time transition point according to the fold - argument. + representation of that timestamp in the given timezone. Parameters ---------- From 94e9e65044ae0b17bcabe55f9445b442ccd72df6 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 14 Feb 2020 14:54:12 +0300 Subject: [PATCH 079/128] CLN: remove unnecessary change --- pandas/_libs/tslibs/timestamps.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index bb15218e5bc84..dac9679919e87 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1007,7 +1007,7 @@ default 'raise' 'fold': fold} ts_input = datetime(**kwargs) - ts = convert_datetime_to_tsobject(ts_input, _tzinfo, nanos=0) + ts = convert_datetime_to_tsobject(ts_input, _tzinfo) value = ts.value + (dts.ps // 1000) if value != NPY_NAT: check_dts_bounds(&dts) From a69833a3292e98b80f2b2c9075b2b8c99cf0761f Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 14 Feb 2020 15:00:53 +0300 Subject: [PATCH 080/128] restore pytz ambiguous test --- pandas/tests/scalar/timestamp/test_timezones.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 912904c62fe95..cfa7da810ada1 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -327,6 +327,9 @@ def test_timestamp_constructor_near_dst_boundary(self): expected = Timestamp("2015-10-25 01:00").tz_localize(tz) assert result == expected + with pytest.raises(pytz.AmbiguousTimeError): + Timestamp("2015-10-25 02:00", tz=tz) + result = Timestamp("2017-03-26 01:00", tz="Europe/Paris") expected = Timestamp("2017-03-26 01:00").tz_localize("Europe/Paris") assert result == expected From e58ecb9bbc1fa5c9f00c231c7e404fac1182ed3f Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 14 Feb 2020 15:22:30 +0300 Subject: [PATCH 081/128] CLN: lint the tests --- pandas/tests/indexes/datetimes/test_constructors.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index dd0210554bd6b..48ca2f9f9d89d 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -966,8 +966,8 @@ def test_pass_datetimeindex_to_index(self): [ (1572136200000000000, 0), ("2019-10-27 01:30:00+01:00", 0), - (datetime(2019, 10, 27, 0, 30, 0, 0, tzinfo=timezone.utc), 0) - ] + (datetime(2019, 10, 27, 0, 30, 0, 0, tzinfo=timezone.utc), 0), + ], ) def test_timestamp_constructor_fold_conflict(ts_input, fold): # Test for #25057 From e1ffa8dabea1ec4ee624591f5dad2f21485ed2ea Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 14 Feb 2020 15:43:56 +0300 Subject: [PATCH 082/128] set fold in Timestamp.replace after pytz resets it --- pandas/_libs/tslibs/timestamps.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index dac9679919e87..06d0606f9c57c 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -999,6 +999,8 @@ default 'raise' dts.hour, dts.min, dts.sec, dts.us), is_dst=not bool(fold)) + # set fold after pytz resets it to 0 (GH 25057) + ts_input = ts_input.replace(fold=fold) _tzinfo = ts_input.tzinfo else: kwargs = {'year': dts.year, 'month': dts.month, 'day': dts.day, From c57ec65debd19154e9d7045c9fdebead9b1fc9fe Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 14 Feb 2020 16:39:18 +0300 Subject: [PATCH 083/128] fix datetime call in docs --- doc/source/user_guide/timeseries.rst | 6 +++--- doc/source/whatsnew/v1.1.0.rst | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index 9c4ca630defd0..3064301cc8bc5 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -2231,10 +2231,10 @@ or for constructing from components (see below). .. ipython:: python - pd.Timestamp(datetime(2019, 10, 27, 1, 30, 0, 0), tz='dateutil/Europe/London', - fold=0) + pd.Timestamp(datetime.datetime(2019, 10, 27, 1, 30, 0, 0), + tz='dateutil/Europe/London', fold=0) pd.Timestamp(year=2019, month=10, day=27, hour=1, minute=30, - tz='dateutil/Europe/London', fold=1) + tz='dateutil/Europe/London', fold=1) .. note:: diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index e2378a0adf63b..295c847a5e6bc 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -52,7 +52,7 @@ For example: .. ipython:: python - ts = Timestamp(datetime(2019, 10, 27, 1, 30, 0, 0), + ts = Timestamp(datetime.datetime(2019, 10, 27, 1, 30, 0, 0), tz="dateutil/Europe/London", fold=1) ts From f3f869052c8097ab08039d52c90f7e155a2f0166 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 14 Feb 2020 17:12:40 +0300 Subject: [PATCH 084/128] fix doc error --- doc/source/whatsnew/v1.1.0.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 295c847a5e6bc..453ab14c002ee 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -47,13 +47,13 @@ For example: .. ipython:: python - ts = Timestamp("2019-10-27 01:30:00+00:00") + ts = pd.Timestamp("2019-10-27 01:30:00+00:00") ts.fold .. ipython:: python - ts = Timestamp(datetime.datetime(2019, 10, 27, 1, 30, 0, 0), - tz="dateutil/Europe/London", fold=1) + ts = pd.Timestamp(year=2019, month=10, day=27, hour=1, minute=30, + tz="dateutil/Europe/London", fold=1) ts .. _whatsnew_110.enhancements.other: From 476c4a481d25101fc3f3b0f496518596e1b6c3e7 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 14 Feb 2020 18:00:20 +0300 Subject: [PATCH 085/128] for naive datetime, set default to 0 --- pandas/_libs/tslibs/conversion.pyx | 2 +- pandas/_libs/tslibs/timestamps.pyx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 205596f74bb1f..ba635ce703491 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -226,7 +226,7 @@ cdef class _TSObject: cdef convert_to_tsobject(object ts, object tz, object unit, bint dayfirst, bint yearfirst, int32_t nanos=0, - bint fold=1): + bint fold=0): """ Extract datetime and int64 from any of: - np.int64 (with unit providing a possible modifier) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 06d0606f9c57c..7d357e8677f8f 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -457,7 +457,7 @@ class Timestamp(_Timestamp): "the tz parameter. Use tz_convert instead.") ts = convert_to_tsobject(ts_input, tz, unit, 0, 0, nanosecond or 0, - fold) + fold or 0) if ts.value == NPY_NAT: return NaT From afaeb882f38e94c7c34669a51e283b71db477465 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 14 Feb 2020 19:07:13 +0300 Subject: [PATCH 086/128] TST: expand tests --- pandas/tests/indexes/datetimes/test_constructors.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 48ca2f9f9d89d..ebaa21de0b2c0 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -961,12 +961,15 @@ def test_pass_datetimeindex_to_index(self): tm.assert_numpy_array_equal(idx.values, expected.values) +@pytest.mark.parametrize("fold", [0, 1]) @pytest.mark.parametrize( - "ts_input,fold", + "ts_input", [ - (1572136200000000000, 0), - ("2019-10-27 01:30:00+01:00", 0), - (datetime(2019, 10, 27, 0, 30, 0, 0, tzinfo=timezone.utc), 0), + 1572136200000000000, + 1572136200000000000.0, + np.datetime64(1572136200000000000, "ns"), + "2019-10-27 01:30:00+01:00", + datetime(2019, 10, 27, 0, 30, 0, 0, tzinfo=timezone.utc), ], ) def test_timestamp_constructor_fold_conflict(ts_input, fold): @@ -981,7 +984,7 @@ def test_timestamp_constructor_fold_conflict(ts_input, fold): Timestamp(ts_input=ts_input, fold=fold) -@pytest.mark.parametrize("tz", ["dateutil/Europe/London", "Europe/London"]) +@pytest.mark.parametrize("tz", ["dateutil/Europe/London", "Europe/London", None]) @pytest.mark.parametrize("fold", [0, 1]) def test_timestamp_constructor_retain_fold(tz, fold): # Test for #25057 From 4a33d36a241f01ea4b9d588780a75eef36f2d3fd Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 14 Feb 2020 19:36:06 +0300 Subject: [PATCH 087/128] REFACT: bundle fold in ts_input --- pandas/_libs/tslibs/conversion.pxd | 2 +- pandas/_libs/tslibs/conversion.pyx | 6 +----- pandas/_libs/tslibs/timestamps.pyx | 13 ++++++------- 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index 51db4a8e2e3c8..bb20296e24587 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -17,7 +17,7 @@ cdef class _TSObject: cdef convert_to_tsobject(object ts, object tz, object unit, bint dayfirst, bint yearfirst, - int32_t nanos=*, bint fold=*) + int32_t nanos=*) cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, int32_t nanos=*) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index ba635ce703491..05521f8f047a9 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -225,8 +225,7 @@ cdef class _TSObject: cdef convert_to_tsobject(object ts, object tz, object unit, - bint dayfirst, bint yearfirst, int32_t nanos=0, - bint fold=0): + bint dayfirst, bint yearfirst, int32_t nanos=0): """ Extract datetime and int64 from any of: - np.int64 (with unit providing a possible modifier) @@ -248,7 +247,6 @@ cdef convert_to_tsobject(object ts, object tz, object unit, tz = maybe_get_tz(tz) obj = _TSObject() - obj.fold = fold if isinstance(ts, str): return convert_str_to_tsobject(ts, tz, unit, dayfirst, yearfirst) @@ -279,12 +277,10 @@ cdef convert_to_tsobject(object ts, object tz, object unit, obj.value = ts dt64_to_dtstruct(ts, &obj.dts) elif PyDateTime_Check(ts): - ts = ts.replace(fold=obj.fold) return convert_datetime_to_tsobject(ts, tz, nanos) elif PyDate_Check(ts): # Keep the converter same as PyDateTime's ts = datetime.combine(ts, datetime_time()) - ts = ts.replace(fold=obj.fold) return convert_datetime_to_tsobject(ts, tz) elif getattr(ts, '_typ', None) == 'period': raise ValueError("Cannot convert Period to Timestamp " diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 7d357e8677f8f..7903b2ba439bf 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -410,13 +410,13 @@ class Timestamp(_Timestamp): "Pass naive datetime-like or build Timestamp from components." ) - if getattr(ts_input, 'fold', None) is not None and fold is None: - fold = getattr(ts_input, 'fold', None) - if fold is not None and fold not in [0, 1]: raise ValueError("Valid values for the fold argument are None, 0, " "or 1.") + if getattr(ts_input, 'fold', None) is not None and fold is not None: + ts_input = ts_input.replace(fold=fold) + # GH 30543 if pd.Timestamp already passed, return it # check that only ts_input is passed # checking verbosely, because cython doesn't optimize @@ -441,13 +441,13 @@ class Timestamp(_Timestamp): # User passed keyword arguments. ts_input = datetime(year, month, day, hour or 0, minute or 0, second or 0, - microsecond or 0) + microsecond or 0, fold=fold or 0) elif is_integer_object(freq): # User passed positional arguments: # Timestamp(year, month, day[, hour[, minute[, second[, # microsecond[, nanosecond[, tzinfo]]]]]]) ts_input = datetime(ts_input, freq, tz, unit or 0, - year or 0, month or 0, day or 0) + year or 0, month or 0, day or 0, fold=fold or 0) nanosecond = hour tz = minute freq = None @@ -456,8 +456,7 @@ class Timestamp(_Timestamp): raise ValueError("Cannot pass a datetime or Timestamp with tzinfo with " "the tz parameter. Use tz_convert instead.") - ts = convert_to_tsobject(ts_input, tz, unit, 0, 0, nanosecond or 0, - fold or 0) + ts = convert_to_tsobject(ts_input, tz, unit, 0, 0, nanosecond or 0) if ts.value == NPY_NAT: return NaT From 82ed93ccf8d875763363f05d5b991cc08c739b9f Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 14 Feb 2020 19:47:54 +0300 Subject: [PATCH 088/128] pass fold to func_create candidates in tslib.pyx --- pandas/_libs/tslib.pyx | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 5de6fc8976a35..94aac48702a7f 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -52,13 +52,14 @@ cdef inline object create_datetime_from_ts( object tz, object freq, bint fold): """ convenience routine to construct a datetime.datetime from its parts """ return datetime(dts.year, dts.month, dts.day, dts.hour, - dts.min, dts.sec, dts.us, tz) + dts.min, dts.sec, dts.us, tz, fold=fold) cdef inline object create_date_from_ts( int64_t value, npy_datetimestruct dts, object tz, object freq, bint fold): """ convenience routine to construct a datetime.date from its parts """ + # GH 25057 add fold argument to match other func_create signatures return date(dts.year, dts.month, dts.day) @@ -66,7 +67,7 @@ cdef inline object create_time_from_ts( int64_t value, npy_datetimestruct dts, object tz, object freq, bint fold): """ convenience routine to construct a datetime.time from its parts """ - return time(dts.hour, dts.min, dts.sec, dts.us, tz) + return time(dts.hour, dts.min, dts.sec, dts.us, tz, fold=fold) @cython.wraparound(False) From d9aea0985efc687f4cdace07c278c1bcd1351686 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 14 Feb 2020 20:46:09 +0300 Subject: [PATCH 089/128] remove dateutil adjustment from convert_datetime_to_tsobject --- pandas/_libs/tslibs/conversion.pyx | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 05521f8f047a9..7e17b0f8b8ba8 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -367,10 +367,8 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, if typ in ['pytz', 'dateutil']: pos = trans.searchsorted(obj.value, side='right') - 1 - # pytz assumes fold == 1, dateutil fold == 0 - # adjust only if necessary - if (typ == 'pytz' and obj.fold == 0 or - typ == 'dateutil' and obj.fold == 1): + # if ambiguous, pytz needs adjustment not in a fold + if typ == 'pytz' and obj.fold == 0: pos = _adjust_tsobject_for_fold(obj, trans, deltas, pos, obj.fold) obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos) From d68efb67f9bdc0bfc4de275a6bca357920edaae1 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 14 Feb 2020 21:04:47 +0300 Subject: [PATCH 090/128] move relevant parts of adjust function where called and drop --- pandas/_libs/tslibs/conversion.pyx | 54 +++--------------------------- 1 file changed, 5 insertions(+), 49 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 7e17b0f8b8ba8..35e98399091f3 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -369,8 +369,11 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, pos = trans.searchsorted(obj.value, side='right') - 1 # if ambiguous, pytz needs adjustment not in a fold if typ == 'pytz' and obj.fold == 0: - pos = _adjust_tsobject_for_fold(obj, trans, deltas, pos, - obj.fold) + if pos > 0: + fold_delta = deltas[pos - 1] - deltas[pos] + if obj.value - fold_delta < trans[pos]: + obj.value -= fold_delta + pos -= 1 obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos) check_dts_bounds(&obj.dts) @@ -603,53 +606,6 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz): obj.tzinfo = tz -cdef inline int32_t _adjust_tsobject_for_fold(_TSObject obj, object trans, - object deltas, int32_t pos, - bint fold): - """ - Adjust _TSObject value for fold is possible. Return updated last offset - transition position in the trans list. - - Parameters - ---------- - obj : _TSObject - trans : object - List of offset transition points in nanoseconds since epoch. - deltas : object - List of offsets corresponding to transition points in trans. - pos : int32_t - Position of the last transition point before taking fold into account. - fold : bint - Due to daylight saving time, one wall clock time can occur twice - when shifting from summer to winter time; fold describes whether the - datetime-like corresponds to the first (0) or the second time (1) - the wall clock hits the ambiguous time - - Returns - ------- - int32_t - Position of the last transition point after taking fold into account. - - Notes - ----- - Alters obj.value inplace. - """ - if fold == 0: - if pos > 0: - fold_delta = deltas[pos - 1] - deltas[pos] - if obj.value - fold_delta < trans[pos]: - obj.value -= fold_delta - pos -= 1 - elif fold == 1: - if pos < len(deltas): - fold_delta = deltas[pos] - deltas[pos + 1] - if obj.value + fold_delta > trans[pos + 1]: - obj.value += fold_delta - pos += 1 - - return pos - - cdef inline bint _infer_tsobject_fold(_TSObject obj, object trans, object deltas, int32_t pos): """ From 9328071aab99974fee134c1ea502302f650dec2f Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 14 Feb 2020 21:24:14 +0300 Subject: [PATCH 091/128] CLN: revert localize_tso docstring notes --- pandas/_libs/tslibs/conversion.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 35e98399091f3..ab4163a003bfc 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -562,7 +562,7 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz): Notes ----- - Sets obj.tzinfo inplace, alters obj.dts inplace, alters obj.value inplace. + Sets obj.tzinfo inplace, alters obj.dts inplace. """ cdef: ndarray[int64_t] trans From 4ecbaf194422c60904d424e50f4be6571e643708 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sat, 15 Feb 2020 08:27:26 +0300 Subject: [PATCH 092/128] REFACT: combine raise conditions in timestamps.pyx --- pandas/_libs/tslibs/timestamps.pyx | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 7903b2ba439bf..01b876497ea05 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -399,20 +399,20 @@ class Timestamp(_Timestamp): tz, tzinfo = tzinfo, None # Allow fold only for unambiguous input - if (fold is not None and ts_input is not _no_input and - (is_integer_object(ts_input) or is_float_object(ts_input) or - is_datetime64_object(ts_input) or isinstance(ts_input, str) - or getattr(ts_input, 'tzinfo', None) is not None - )): - raise ValueError( - "Cannot pass fold with possibly unambiguous input: int, float, " - "numpy.datetime64, str, or timezone-aware datetime-like. " - "Pass naive datetime-like or build Timestamp from components." - ) + if fold is not None: + if fold not in [0, 1]: + raise ValueError("Valid values for the fold argument are None, 0, " + "or 1.") - if fold is not None and fold not in [0, 1]: - raise ValueError("Valid values for the fold argument are None, 0, " - "or 1.") + if (ts_input is not _no_input and (is_integer_object(ts_input) + or is_float_object(ts_input) or + is_datetime64_object(ts_input) or isinstance(ts_input, str) + or getattr(ts_input, 'tzinfo', None) is not None)): + raise ValueError( + "Cannot pass fold with possibly unambiguous input: int, float, " + "numpy.datetime64, str, or timezone-aware datetime-like. " + "Pass naive datetime-like or build Timestamp from components." + ) if getattr(ts_input, 'fold', None) is not None and fold is not None: ts_input = ts_input.replace(fold=fold) From ee90ac74fd9f32bbb502ab7a44ee2d6fc707636f Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sat, 15 Feb 2020 08:37:12 +0300 Subject: [PATCH 093/128] CLN: tighten error messages --- pandas/_libs/tslibs/timestamps.pyx | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 01b876497ea05..eb6dee6fe6b66 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -401,17 +401,19 @@ class Timestamp(_Timestamp): # Allow fold only for unambiguous input if fold is not None: if fold not in [0, 1]: - raise ValueError("Valid values for the fold argument are None, 0, " - "or 1.") + raise ValueError( + "Valid values for the fold argument are None, 0, or 1." + ) if (ts_input is not _no_input and (is_integer_object(ts_input) or is_float_object(ts_input) or is_datetime64_object(ts_input) or isinstance(ts_input, str) or getattr(ts_input, 'tzinfo', None) is not None)): raise ValueError( - "Cannot pass fold with possibly unambiguous input: int, float, " - "numpy.datetime64, str, or timezone-aware datetime-like. " - "Pass naive datetime-like or build Timestamp from components." + "Cannot pass fold with possibly unambiguous input: int, " + "float, numpy.datetime64, str, or timezone-aware " + "datetime-like. Pass naive datetime-like or build " + "Timestamp from components." ) if getattr(ts_input, 'fold', None) is not None and fold is not None: From 25291e46e226aa0f5d71cbeb9cb4e1ff62d37e3a Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sat, 15 Feb 2020 09:14:09 +0300 Subject: [PATCH 094/128] CLN: black the tests --- pandas/_libs/tslibs/conversion.pyx | 35 ++++++------------- pandas/_libs/tslibs/timestamps.pyx | 6 ++++ .../indexes/datetimes/test_constructors.py | 13 +++++-- 3 files changed, 26 insertions(+), 28 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index ab4163a003bfc..de382707a2830 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -357,25 +357,6 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, obj.value += nanos obj.dts.ps = nanos * 1000 - if tz is not None: - if is_utc(tz) or is_tzlocal(tz): - # TODO: think on how we can infer fold for local Timezone - # and adjust value for fold - pass - else: - trans, deltas, typ = get_dst_info(tz) - - if typ in ['pytz', 'dateutil']: - pos = trans.searchsorted(obj.value, side='right') - 1 - # if ambiguous, pytz needs adjustment not in a fold - if typ == 'pytz' and obj.fold == 0: - if pos > 0: - fold_delta = deltas[pos - 1] - deltas[pos] - if obj.value - fold_delta < trans[pos]: - obj.value -= fold_delta - pos -= 1 - obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos) - check_dts_bounds(&obj.dts) check_overflows(obj) return obj @@ -402,7 +383,7 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, _TSObject obj = _TSObject() int64_t value # numpy dt64 datetime dt - bint fold + bint fold = 0 value = dtstruct_to_dt64(&dts) obj.dts = dts @@ -420,7 +401,7 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, else: trans, deltas, typ = get_dst_info(tz) - if typ in ['pytz', 'dateutil']: + if typ == 'dateutil': pos = trans.searchsorted(obj.value, side='right') - 1 fold = _infer_tsobject_fold(obj, trans, deltas, pos) @@ -590,12 +571,16 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz): # static/fixed tzinfo; in this case we know len(deltas) == 1 # This can come back with `typ` of either "fixed" or None dt64_to_dtstruct(obj.value + deltas[0], &obj.dts) - elif typ in ['pytz', 'dateutil']: + elif typ == 'pytz': + # i.e. treat_tz_as_pytz(tz) pos = trans.searchsorted(obj.value, side='right') - 1 - if typ == 'pytz': - tz = tz._tzinfos[tz._transition_info[pos]] + tz = tz._tzinfos[tz._transition_info[pos]] dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) - + elif typ == 'dateutil': + # i.e. treat_tz_as_dateutil(tz) + pos = trans.searchsorted(obj.value, side='right') - 1 + dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) + # dateutil supports fold, so we infer fold from value obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos) else: # Note: as of 2018-07-17 all tzinfo objects that are _not_ diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index eb6dee6fe6b66..3c42960ad1040 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -416,6 +416,12 @@ class Timestamp(_Timestamp): "Timestamp from components." ) + if tz is not None and treat_tz_as_pytz(tz): + raise ValueError( + "pytz timezones do not support fold. Please use dateutil " + "timezones." + ) + if getattr(ts_input, 'fold', None) is not None and fold is not None: ts_input = ts_input.replace(fold=fold) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index ebaa21de0b2c0..594b949c2b09e 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -961,6 +961,13 @@ def test_pass_datetimeindex_to_index(self): tm.assert_numpy_array_equal(idx.values, expected.values) +def test_timestamp_constructor_pytz_fold_raise(): + # Test for #25057 + # pytz doesn't support fold. Check that we raise + # if fold is passed with pytz + msg = "pytz timezones do not support fold. Please use dateutil " "timezones." + + @pytest.mark.parametrize("fold", [0, 1]) @pytest.mark.parametrize( "ts_input", @@ -984,7 +991,7 @@ def test_timestamp_constructor_fold_conflict(ts_input, fold): Timestamp(ts_input=ts_input, fold=fold) -@pytest.mark.parametrize("tz", ["dateutil/Europe/London", "Europe/London", None]) +@pytest.mark.parametrize("tz", ["dateutil/Europe/London", None]) @pytest.mark.parametrize("fold", [0, 1]) def test_timestamp_constructor_retain_fold(tz, fold): # Test for #25057 @@ -995,7 +1002,7 @@ def test_timestamp_constructor_retain_fold(tz, fold): assert result == expected -@pytest.mark.parametrize("tz", ["dateutil/Europe/London", "Europe/London"]) +@pytest.mark.parametrize("tz", ["dateutil/Europe/London"]) @pytest.mark.parametrize( "ts_input,fold_out", [ @@ -1017,7 +1024,7 @@ def test_timestamp_constructor_infer_fold_from_value(tz, ts_input, fold_out): assert result == expected -@pytest.mark.parametrize("tz", ["dateutil/Europe/London", "Europe/London"]) +@pytest.mark.parametrize("tz", ["dateutil/Europe/London"]) @pytest.mark.parametrize( "ts_input,fold,value_out", [ From 08cc256e5e5e144670004a8398a65d213513884a Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sat, 15 Feb 2020 09:37:38 +0300 Subject: [PATCH 095/128] DOC: expand doc in timeseries.rst and make it a note --- doc/source/user_guide/timeseries.rst | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index 3064301cc8bc5..97eb89838579f 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -2220,21 +2220,25 @@ you can use the ``tz_convert`` method. rng_pytz.tz_convert('US/Eastern') -.. versionadded:: 1.1.0 +.. note:: -For ambiguous times, pandas supports explicitly specifying the fold argument. -Due to daylight saving time, one wall clock time can occur twice when shifting -from summer to winter time; fold describes whether the datetime-like corresponds -to the first (0) or the second time (1) the wall clock hits the ambiguous time. -Fold is supported only for constructing from naive datetime or :class:`Timestamp` -or for constructing from components (see below). + .. versionadded:: 1.1.0 -.. ipython:: python + For ambiguous times, pandas supports explicitly specifying the fold argument. + Due to daylight saving time, one wall clock time can occur twice when shifting + from summer to winter time; fold describes whether the datetime-like corresponds + to the first (0) or the second time (1) the wall clock hits the ambiguous time. + Fold is supported only for constructing from naive datetime or :class:`Timestamp` + or for constructing from components (see below) with dateutil timezones as pytz + timezones do not support fold. When localizing an ambiguous datetime, we + recommend to rely on ``tz_localize`` instead as it gives more control. + + .. ipython:: python - pd.Timestamp(datetime.datetime(2019, 10, 27, 1, 30, 0, 0), - tz='dateutil/Europe/London', fold=0) - pd.Timestamp(year=2019, month=10, day=27, hour=1, minute=30, - tz='dateutil/Europe/London', fold=1) + pd.Timestamp(datetime.datetime(2019, 10, 27, 1, 30, 0, 0), + tz='dateutil/Europe/London', fold=0) + pd.Timestamp(year=2019, month=10, day=27, hour=1, minute=30, + tz='dateutil/Europe/London', fold=1) .. note:: From 29107203b61849d53e4aecc39e5096c9be33de25 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sat, 15 Feb 2020 09:40:11 +0300 Subject: [PATCH 096/128] TST: fix pytz fold conflict test --- pandas/tests/indexes/datetimes/test_constructors.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 594b949c2b09e..217bf2a5efe03 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -966,6 +966,9 @@ def test_timestamp_constructor_pytz_fold_raise(): # pytz doesn't support fold. Check that we raise # if fold is passed with pytz msg = "pytz timezones do not support fold. Please use dateutil " "timezones." + tz = pytz.timezone("Europe/London") + with pytest.raises(ValueError, match=msg): + Timestamp(datetime(2019, 10, 27, 0, 30, 0, 0), tz=tz, fold=0) @pytest.mark.parametrize("fold", [0, 1]) From f6c11da988c03d5b20dd2d6add976be0f6bb98f9 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sat, 15 Feb 2020 09:51:44 +0300 Subject: [PATCH 097/128] DOC: improve formatting in timeseries.rst addition --- doc/source/user_guide/timeseries.rst | 6 +++--- doc/source/whatsnew/v1.1.0.rst | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index 97eb89838579f..b16416da9c73d 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -2228,10 +2228,10 @@ you can use the ``tz_convert`` method. Due to daylight saving time, one wall clock time can occur twice when shifting from summer to winter time; fold describes whether the datetime-like corresponds to the first (0) or the second time (1) the wall clock hits the ambiguous time. - Fold is supported only for constructing from naive datetime or :class:`Timestamp` - or for constructing from components (see below) with dateutil timezones as pytz + Fold is supported only for constructing from naive ``datetime.datetime`` or :class:`Timestamp` + or for constructing from components (see below) with ``dateutil`` timezones as ``pytz`` timezones do not support fold. When localizing an ambiguous datetime, we - recommend to rely on ``tz_localize`` instead as it gives more control. + recommend to rely on :meth:`Timestamp.tz_localize` instead as it gives more control. .. ipython:: python diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 453ab14c002ee..96fdfe9608b8c 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -41,7 +41,7 @@ For example: Fold argument support in Timestamp constructor ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -:class:`Timestamp: now supports the fold argument according to PEP 495 similar to parent `pydatetime` class. It supports both accepting fold as an initialization argument and inferring fold from other constructor arguments (:issue:`25057`, :issue:`31338`). +:class:`Timestamp: now supports the fold argument according to PEP 495 similar to parent `pydatetime` class. It supports both accepting fold as an initialization argument and inferring fold from other constructor arguments (:issue:`25057`, :issue:`31338`). Support is limited to `dateutil` timezones as `pytz` doesn't support fold. For example: From 6f16ea5162c48d9ee6703873c97149911882b103 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sat, 15 Feb 2020 10:11:54 +0300 Subject: [PATCH 098/128] DOC: update whatsnew --- doc/source/whatsnew/v1.1.0.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 96fdfe9608b8c..0382abb5f6f09 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -41,7 +41,7 @@ For example: Fold argument support in Timestamp constructor ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -:class:`Timestamp: now supports the fold argument according to PEP 495 similar to parent `pydatetime` class. It supports both accepting fold as an initialization argument and inferring fold from other constructor arguments (:issue:`25057`, :issue:`31338`). Support is limited to `dateutil` timezones as `pytz` doesn't support fold. +:class:`Timestamp:` now supports the fold argument according to PEP 495 similar to parent `pydatetime` class. It supports both accepting fold as an initialization argument and inferring fold from other constructor arguments (:issue:`25057`, :issue:`31338`). Support is limited to `dateutil` timezones as `pytz` doesn't support fold. For example: @@ -56,6 +56,8 @@ For example: tz="dateutil/Europe/London", fold=1) ts +For more, see :ref:`Timezone section ` in the user guide on working with timezones. + .. _whatsnew_110.enhancements.other: Other enhancements From 5024452fa71981ab0dae037ccceb77213f4b4642 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sat, 15 Feb 2020 10:12:17 +0300 Subject: [PATCH 099/128] CLN: fix linting mistakes --- pandas/_libs/tslibs/timestamps.pyx | 4 ++-- pandas/tests/indexes/datetimes/test_constructors.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 3c42960ad1040..e3d27221f8a13 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -405,8 +405,8 @@ class Timestamp(_Timestamp): "Valid values for the fold argument are None, 0, or 1." ) - if (ts_input is not _no_input and (is_integer_object(ts_input) - or is_float_object(ts_input) or + if (ts_input is not _no_input and ( + is_integer_object(ts_input) or is_float_object(ts_input) or is_datetime64_object(ts_input) or isinstance(ts_input, str) or getattr(ts_input, 'tzinfo', None) is not None)): raise ValueError( diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 217bf2a5efe03..ec84dc3d5e565 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -965,7 +965,7 @@ def test_timestamp_constructor_pytz_fold_raise(): # Test for #25057 # pytz doesn't support fold. Check that we raise # if fold is passed with pytz - msg = "pytz timezones do not support fold. Please use dateutil " "timezones." + msg = "pytz timezones do not support fold. Please use dateutil timezones." tz = pytz.timezone("Europe/London") with pytest.raises(ValueError, match=msg): Timestamp(datetime(2019, 10, 27, 0, 30, 0, 0), tz=tz, fold=0) From 3e49b7adbc0f3315e441ab68b24f1df663f334c5 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sat, 15 Feb 2020 10:24:17 +0300 Subject: [PATCH 100/128] add versionadded where appropriate --- pandas/_libs/tslib.pyx | 1 + pandas/_libs/tslibs/timestamps.pyx | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 94aac48702a7f..a5713be186745 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -89,6 +89,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, when shifting from summer to winter time; fold describes whether the datetime-like corresponds to the first (0) or the second time (1) the wall clock hits the ambiguous time + .. versionadded:: 1.1.0 box : {'datetime', 'timestamp', 'date', 'time'}, default 'datetime' If datetime, convert to datetime.datetime If date, convert to datetime.date diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index e3d27221f8a13..db686815176ee 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -200,7 +200,6 @@ class Timestamp(_Timestamp): when shifting from summer to winter time; fold describes whether the datetime-like corresponds to the first (0) or the second time (1) the wall clock hits the ambiguous time - .. versionadded:: 1.1.0 Notes From bcf0905036a044d9a5b77667a663e88a6a313925 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sat, 15 Feb 2020 10:31:13 +0300 Subject: [PATCH 101/128] DOC: add references to PEP 495 --- pandas/_libs/tslibs/conversion.pyx | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index de382707a2830..60bb34d7a3795 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -394,6 +394,7 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, return obj # Infer fold from offset-adjusted obj.value + # see PEP 495 https://www.python.org/dev/peps/pep-0495/#the-fold-attribute if is_utc(tz) or is_tzlocal(tz): # TODO: think on how we can infer fold for local Timezone # and adjust value for fold @@ -614,6 +615,11 @@ cdef inline bint _infer_tsobject_fold(_TSObject obj, object trans, when shifting from summer to winter time; fold describes whether the datetime-like corresponds to the first (0) or the second time (1) the wall clock hits the ambiguous time + + References + ---------- + .. [1] "PEP 495 - Local Time Disambiguation" + https://www.python.org/dev/peps/pep-0495/#the-fold-attribute """ cdef: bint fold = 0 From 9b614ae471f8494da9f2b140ef13ee6fe2be8d1b Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sat, 15 Feb 2020 12:54:30 +0300 Subject: [PATCH 102/128] add local timezone support --- pandas/_libs/tslibs/conversion.pyx | 17 +++++------ pandas/_libs/tslibs/tzconversion.pxd | 2 ++ pandas/_libs/tslibs/tzconversion.pyx | 42 +++++++++++++++++++++++++++- 3 files changed, 52 insertions(+), 9 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 60bb34d7a3795..5b4ed4264b81c 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -39,7 +39,8 @@ from pandas._libs.tslibs.nattype cimport ( from pandas._libs.tslibs.tzconversion import ( tz_localize_to_utc, tz_convert_single) -from pandas._libs.tslibs.tzconversion cimport _tz_convert_tzlocal_utc +from pandas._libs.tslibs.tzconversion cimport ( + _tz_convert_tzlocal_utc, _tz_convert_utctsobject_to_local) # ---------------------------------------------------------------------- # Constants @@ -395,10 +396,12 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, # Infer fold from offset-adjusted obj.value # see PEP 495 https://www.python.org/dev/peps/pep-0495/#the-fold-attribute - if is_utc(tz) or is_tzlocal(tz): - # TODO: think on how we can infer fold for local Timezone - # and adjust value for fold + if is_utc(tz): pass + elif is_tzlocal(tz): + # Localize _TSObject to local timezone to infer fold + _tz_convert_utctsobject_to_local(obj, tz) + fold = obj.fold else: trans, deltas, typ = get_dst_info(tz) @@ -560,10 +563,8 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz): elif obj.value == NPY_NAT: pass elif is_tzlocal(tz): - local_val = _tz_convert_tzlocal_utc(obj.value, tz, to_utc=False) - dt64_to_dtstruct(local_val, &obj.dts) - # TODO: think on how we can infer fold for local Timezone - # and adjust value for fold + # Localize _TSObject to local timezone and infer fold + _tz_convert_utctsobject_to_local(obj, tz) else: # Adjust datetime64 timestamp, recompute datetimestruct trans, deltas, typ = get_dst_info(tz) diff --git a/pandas/_libs/tslibs/tzconversion.pxd b/pandas/_libs/tslibs/tzconversion.pxd index 9c86057b0a392..b9e5f8a1415c3 100644 --- a/pandas/_libs/tslibs/tzconversion.pxd +++ b/pandas/_libs/tslibs/tzconversion.pxd @@ -1,7 +1,9 @@ from cpython.datetime cimport tzinfo from numpy cimport int64_t +from pandas._libs.tslibs.conversion cimport _TSObject cdef int64_t tz_convert_utc_to_tzlocal(int64_t utc_val, tzinfo tz) cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=*) +cdef void _tz_convert_utctsobject_to_local(_TSObject obj, tzinfo tz) cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index b368f0fde3edc..4c6dad75f428c 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -17,6 +17,7 @@ from numpy cimport ndarray, int64_t, uint8_t, intp_t cnp.import_array() from pandas._libs.tslibs.ccalendar import DAY_SECONDS, HOUR_SECONDS +from pandas._libs.tslibs.conversion cimport _TSObject from pandas._libs.tslibs.nattype cimport NPY_NAT from pandas._libs.tslibs.np_datetime cimport ( npy_datetimestruct, dt64_to_dtstruct) @@ -24,7 +25,6 @@ from pandas._libs.tslibs.timedeltas cimport delta_to_nanoseconds from pandas._libs.tslibs.timezones cimport ( get_dst_info, is_tzlocal, is_utc, get_timezone, get_utcoffset) - # TODO: cdef scalar version to call from convert_str_to_tsobject @cython.boundscheck(False) @cython.wraparound(False) @@ -482,6 +482,46 @@ cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True): return val - delta +cdef void _tz_convert_utctsobject_to_local(_TSObject obj, tzinfo tz): + """ + Localize _TSObject (intermediate type holding future Timestamp data) from + UTC to local timezone. + + Private, not intended for use outside of tslibs.conversion + + Parameters + ---------- + obj : _TSObject + tz : tzinfo + + Returns + ------- + None + + Notes + ----- + Sets obj.tzinfo inplace, alters obj.dts inplace, + alters object.value inplace, alters obj.fold inplace + """ + cdef: + npy_datetimestruct dts + int64_t delta + datetime dt + + dt64_to_dtstruct(obj.value, &dts) + dt = datetime(dts.year, dts.month, dts.day, dts.hour, + dts.min, dts.sec, dts.us) + # get_utcoffset (tz.utcoffset under the hood) only makes sense if datetime + # is _wall time_, so if val is a UTC timestamp convert to wall time + dt = dt.replace(tzinfo=tzutc()) + dt = dt.astimezone(tz) + delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 + + obj.value += delta + obj.fold = dt.fold + dt64_to_dtstruct(obj.value, &obj.dts) + + @cython.boundscheck(False) @cython.wraparound(False) cdef int64_t[:] _tz_convert_dst(int64_t[:] values, tzinfo tz, From 2145b05e07a8a8fd2631b4578371cd02f5318493 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sat, 15 Feb 2020 13:24:51 +0300 Subject: [PATCH 103/128] Revert "add local timezone support" --- pandas/_libs/tslibs/conversion.pyx | 17 ++++++----- pandas/_libs/tslibs/tzconversion.pxd | 2 -- pandas/_libs/tslibs/tzconversion.pyx | 42 +--------------------------- 3 files changed, 9 insertions(+), 52 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 5b4ed4264b81c..60bb34d7a3795 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -39,8 +39,7 @@ from pandas._libs.tslibs.nattype cimport ( from pandas._libs.tslibs.tzconversion import ( tz_localize_to_utc, tz_convert_single) -from pandas._libs.tslibs.tzconversion cimport ( - _tz_convert_tzlocal_utc, _tz_convert_utctsobject_to_local) +from pandas._libs.tslibs.tzconversion cimport _tz_convert_tzlocal_utc # ---------------------------------------------------------------------- # Constants @@ -396,12 +395,10 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, # Infer fold from offset-adjusted obj.value # see PEP 495 https://www.python.org/dev/peps/pep-0495/#the-fold-attribute - if is_utc(tz): + if is_utc(tz) or is_tzlocal(tz): + # TODO: think on how we can infer fold for local Timezone + # and adjust value for fold pass - elif is_tzlocal(tz): - # Localize _TSObject to local timezone to infer fold - _tz_convert_utctsobject_to_local(obj, tz) - fold = obj.fold else: trans, deltas, typ = get_dst_info(tz) @@ -563,8 +560,10 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz): elif obj.value == NPY_NAT: pass elif is_tzlocal(tz): - # Localize _TSObject to local timezone and infer fold - _tz_convert_utctsobject_to_local(obj, tz) + local_val = _tz_convert_tzlocal_utc(obj.value, tz, to_utc=False) + dt64_to_dtstruct(local_val, &obj.dts) + # TODO: think on how we can infer fold for local Timezone + # and adjust value for fold else: # Adjust datetime64 timestamp, recompute datetimestruct trans, deltas, typ = get_dst_info(tz) diff --git a/pandas/_libs/tslibs/tzconversion.pxd b/pandas/_libs/tslibs/tzconversion.pxd index b9e5f8a1415c3..9c86057b0a392 100644 --- a/pandas/_libs/tslibs/tzconversion.pxd +++ b/pandas/_libs/tslibs/tzconversion.pxd @@ -1,9 +1,7 @@ from cpython.datetime cimport tzinfo from numpy cimport int64_t -from pandas._libs.tslibs.conversion cimport _TSObject cdef int64_t tz_convert_utc_to_tzlocal(int64_t utc_val, tzinfo tz) cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=*) -cdef void _tz_convert_utctsobject_to_local(_TSObject obj, tzinfo tz) cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 4c6dad75f428c..b368f0fde3edc 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -17,7 +17,6 @@ from numpy cimport ndarray, int64_t, uint8_t, intp_t cnp.import_array() from pandas._libs.tslibs.ccalendar import DAY_SECONDS, HOUR_SECONDS -from pandas._libs.tslibs.conversion cimport _TSObject from pandas._libs.tslibs.nattype cimport NPY_NAT from pandas._libs.tslibs.np_datetime cimport ( npy_datetimestruct, dt64_to_dtstruct) @@ -25,6 +24,7 @@ from pandas._libs.tslibs.timedeltas cimport delta_to_nanoseconds from pandas._libs.tslibs.timezones cimport ( get_dst_info, is_tzlocal, is_utc, get_timezone, get_utcoffset) + # TODO: cdef scalar version to call from convert_str_to_tsobject @cython.boundscheck(False) @cython.wraparound(False) @@ -482,46 +482,6 @@ cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True): return val - delta -cdef void _tz_convert_utctsobject_to_local(_TSObject obj, tzinfo tz): - """ - Localize _TSObject (intermediate type holding future Timestamp data) from - UTC to local timezone. - - Private, not intended for use outside of tslibs.conversion - - Parameters - ---------- - obj : _TSObject - tz : tzinfo - - Returns - ------- - None - - Notes - ----- - Sets obj.tzinfo inplace, alters obj.dts inplace, - alters object.value inplace, alters obj.fold inplace - """ - cdef: - npy_datetimestruct dts - int64_t delta - datetime dt - - dt64_to_dtstruct(obj.value, &dts) - dt = datetime(dts.year, dts.month, dts.day, dts.hour, - dts.min, dts.sec, dts.us) - # get_utcoffset (tz.utcoffset under the hood) only makes sense if datetime - # is _wall time_, so if val is a UTC timestamp convert to wall time - dt = dt.replace(tzinfo=tzutc()) - dt = dt.astimezone(tz) - delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 - - obj.value += delta - obj.fold = dt.fold - dt64_to_dtstruct(obj.value, &obj.dts) - - @cython.boundscheck(False) @cython.wraparound(False) cdef int64_t[:] _tz_convert_dst(int64_t[:] values, tzinfo tz, From 8f82aa1e8cff162c82554024577d0be91fbfd109 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Mon, 17 Feb 2020 09:27:30 +0300 Subject: [PATCH 104/128] make fold keyword-only --- doc/source/user_guide/timeseries.rst | 2 +- doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/_libs/tslibs/timestamps.pyx | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index b16416da9c73d..54e60834fe4b7 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -2224,7 +2224,7 @@ you can use the ``tz_convert`` method. .. versionadded:: 1.1.0 - For ambiguous times, pandas supports explicitly specifying the fold argument. + For ambiguous times, pandas supports explicitly specifying the keyword-only fold argument. Due to daylight saving time, one wall clock time can occur twice when shifting from summer to winter time; fold describes whether the datetime-like corresponds to the first (0) or the second time (1) the wall clock hits the ambiguous time. diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 6b4a3e0b85015..5067b8d8f40e6 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -41,7 +41,7 @@ For example: Fold argument support in Timestamp constructor ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -:class:`Timestamp:` now supports the fold argument according to PEP 495 similar to parent `pydatetime` class. It supports both accepting fold as an initialization argument and inferring fold from other constructor arguments (:issue:`25057`, :issue:`31338`). Support is limited to `dateutil` timezones as `pytz` doesn't support fold. +:class:`Timestamp:` now supports the keyword-only fold argument according to PEP 495 similar to parent `pydatetime` class. It supports both accepting fold as an initialization argument and inferring fold from other constructor arguments (:issue:`25057`, :issue:`31338`). Support is limited to `dateutil` timezones as `pytz` doesn't support fold. For example: diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index db686815176ee..195d25f79266c 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -195,7 +195,7 @@ class Timestamp(_Timestamp): nanosecond : int, optional, default 0 .. versionadded:: 0.23.0 tzinfo : datetime.tzinfo, optional, default None - fold : {0, 1}, default None + fold : {0, 1}, default None, keyword-only Due to daylight saving time, one wall clock time can occur twice when shifting from summer to winter time; fold describes whether the datetime-like corresponds to the first (0) or the second time (1) @@ -357,6 +357,7 @@ class Timestamp(_Timestamp): microsecond=None, nanosecond=None, tzinfo=None, + *, fold=None ): # The parameter list folds together legacy parameter names (the first From d39e81141ebeeeffa537f6e3fa2ebb02a58fdccb Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Mon, 17 Feb 2020 09:31:07 +0300 Subject: [PATCH 105/128] CLN: prune pytz-specific logic in Timestamp.replace --- pandas/_libs/tslibs/timestamps.pyx | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 195d25f79266c..81deda08e6425 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1006,8 +1006,6 @@ default 'raise' dts.hour, dts.min, dts.sec, dts.us), is_dst=not bool(fold)) - # set fold after pytz resets it to 0 (GH 25057) - ts_input = ts_input.replace(fold=fold) _tzinfo = ts_input.tzinfo else: kwargs = {'year': dts.year, 'month': dts.month, 'day': dts.day, From 309168937ad19b1fb6690da59a3ee663d6f30a83 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Mon, 17 Feb 2020 09:34:29 +0300 Subject: [PATCH 106/128] switch from getattr to hasattr in the constructor --- pandas/_libs/tslibs/timestamps.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 81deda08e6425..e7a760c215116 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -422,7 +422,7 @@ class Timestamp(_Timestamp): "timezones." ) - if getattr(ts_input, 'fold', None) is not None and fold is not None: + if hasattr(ts_input, 'fold') and fold is not None: ts_input = ts_input.replace(fold=fold) # GH 30543 if pd.Timestamp already passed, return it From e58fe0c05aba88ce530b55924ab3c6aa0a1f3387 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Mon, 17 Feb 2020 11:51:08 +0300 Subject: [PATCH 107/128] ENH: add fold support --- pandas/_libs/tslibs/conversion.pyx | 16 +++--- pandas/_libs/tslibs/tzconversion.pxd | 1 + pandas/_libs/tslibs/tzconversion.pyx | 84 ++++++++++++++++++++++++---- 3 files changed, 83 insertions(+), 18 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 60bb34d7a3795..bafe411ed58a9 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -39,7 +39,8 @@ from pandas._libs.tslibs.nattype cimport ( from pandas._libs.tslibs.tzconversion import ( tz_localize_to_utc, tz_convert_single) -from pandas._libs.tslibs.tzconversion cimport _tz_convert_tzlocal_utc +from pandas._libs.tslibs.tzconversion cimport ( + _tz_convert_tzlocal_utc, _tz_convert_tzlocal_fromutc) # ---------------------------------------------------------------------- # Constants @@ -383,7 +384,6 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, _TSObject obj = _TSObject() int64_t value # numpy dt64 datetime dt - bint fold = 0 value = dtstruct_to_dt64(&dts) obj.dts = dts @@ -395,21 +395,23 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, # Infer fold from offset-adjusted obj.value # see PEP 495 https://www.python.org/dev/peps/pep-0495/#the-fold-attribute - if is_utc(tz) or is_tzlocal(tz): + if is_utc(tz): + pass + elif is_tzlocal(tz): # TODO: think on how we can infer fold for local Timezone # and adjust value for fold - pass + _tz_convert_tzlocal_fromutc(obj.value, tz, &obj.fold) else: trans, deltas, typ = get_dst_info(tz) if typ == 'dateutil': pos = trans.searchsorted(obj.value, side='right') - 1 - fold = _infer_tsobject_fold(obj, trans, deltas, pos) + obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos) # Keep the converter same as PyDateTime's dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day, obj.dts.hour, obj.dts.min, obj.dts.sec, - obj.dts.us, obj.tzinfo, fold=fold) + obj.dts.us, obj.tzinfo, fold=obj.fold) obj = convert_datetime_to_tsobject( dt, tz, nanos=obj.dts.ps // 1000) return obj @@ -560,7 +562,7 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz): elif obj.value == NPY_NAT: pass elif is_tzlocal(tz): - local_val = _tz_convert_tzlocal_utc(obj.value, tz, to_utc=False) + local_val = _tz_convert_tzlocal_fromutc(obj.value, tz, &obj.fold) dt64_to_dtstruct(local_val, &obj.dts) # TODO: think on how we can infer fold for local Timezone # and adjust value for fold diff --git a/pandas/_libs/tslibs/tzconversion.pxd b/pandas/_libs/tslibs/tzconversion.pxd index 9c86057b0a392..c1dd88e5b2313 100644 --- a/pandas/_libs/tslibs/tzconversion.pxd +++ b/pandas/_libs/tslibs/tzconversion.pxd @@ -4,4 +4,5 @@ from numpy cimport int64_t cdef int64_t tz_convert_utc_to_tzlocal(int64_t utc_val, tzinfo tz) cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=*) +cdef int64_t _tz_convert_tzlocal_fromutc(int64_t val, tzinfo tz, bint *fold) cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index b368f0fde3edc..9d198ad1414b0 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -444,12 +444,12 @@ cdef int64_t[:] _tz_convert_one_way(int64_t[:] vals, object tz, bint to_utc): return converted -cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True): +cdef inline void _tzlocal_get_offset_components(int64_t val, tzinfo tz, + bint to_utc, int64_t *delta, + bint *fold=NULL): """ - Convert the i8 representation of a datetime from a tzlocal timezone to - UTC, or vice-versa. - - Private, not intended for use outside of tslibs.conversion + Calculate offset in nanoseconds needed to convert the i8 representation of + a datetime from a tzlocal timezone to UTC, or vice-versa. Parameters ---------- @@ -457,14 +457,22 @@ cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True): tz : tzinfo to_utc : bint True if converting tzlocal _to_ UTC, False if going the other direction + delta : int64_t* + pointer to delta: offset in nanoseconds needed to adjust val from/to UTC + fold : bint*, default NULL + pointer to fold: whether datetime ends up in a fold or not + after adjustment Returns ------- - result : int64_t + None + + Notes + ----- + Sets delta by pointer, sets fold by pointer. """ cdef: npy_datetimestruct dts - int64_t delta datetime dt dt64_to_dtstruct(val, &dts) @@ -475,11 +483,65 @@ cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True): if not to_utc: dt = dt.replace(tzinfo=tzutc()) dt = dt.astimezone(tz) - delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 - if not to_utc: - return val + delta - return val - delta + if to_utc: + delta[0] = -int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 + else: + delta[0] = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 + + if fold is not NULL: + fold[0] = dt.fold + + +cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True): + """ + Convert the i8 representation of a datetime from a tzlocal timezone to + UTC, or vice-versa. + + Private, not intended for use outside of tslibs.conversion + + Parameters + ---------- + val : int64_t + tz : tzinfo + to_utc : bint + True if converting tzlocal _to_ UTC, False if going the other direction + + Returns + ------- + result : int64_t + """ + cdef int64_t delta + + _tzlocal_get_offset_components(val, tz, to_utc, &delta, NULL) + + return val + delta + + +cdef int64_t _tz_convert_tzlocal_fromutc(int64_t val, tzinfo tz, bint *fold): + """ + Convert the i8 representation of a datetime from UTC to local timezone, + set fold by pointer + + Private, not intended for use outside of tslibs.conversion + + Parameters + ---------- + val : int64_t + tz : tzinfo + fold : bint* + pointer to fold: whether datetime ends up in a fold or not + after adjustment + + Returns + ------- + result : int64_t + """ + cdef int64_t delta + + _tzlocal_get_offset_components(val, tz, False, &delta, fold) + + return val + delta @cython.boundscheck(False) From d166a673ef8bfc656a2b0bde0b7917479e038fca Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Mon, 17 Feb 2020 11:55:41 +0300 Subject: [PATCH 108/128] REFACT: initalize _TSObject.fold to False explicitly --- pandas/_libs/tslibs/conversion.pyx | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index bafe411ed58a9..26a0a1cda9ccc 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -219,6 +219,9 @@ cdef class _TSObject: # object tzinfo # bint fold + def __cinit__(self): + self.fold = 0 + @property def value(self): # This is needed in order for `value` to be accessible in lib.pyx From 5729eb894b085fd79c9a1c46da1e064135b9e4fe Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Mon, 17 Feb 2020 12:17:17 +0300 Subject: [PATCH 109/128] CLN: fix linting --- pandas/_libs/tslibs/tzconversion.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 9d198ad1414b0..ab90c3b49d609 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -445,8 +445,8 @@ cdef int64_t[:] _tz_convert_one_way(int64_t[:] vals, object tz, bint to_utc): cdef inline void _tzlocal_get_offset_components(int64_t val, tzinfo tz, - bint to_utc, int64_t *delta, - bint *fold=NULL): + bint to_utc, int64_t *delta, + bint *fold=NULL): """ Calculate offset in nanoseconds needed to convert the i8 representation of a datetime from a tzlocal timezone to UTC, or vice-versa. From c9863e1bd278751769070064c908167aaf5e24de Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Mon, 17 Feb 2020 12:22:11 +0300 Subject: [PATCH 110/128] CLN: remove unnecessary TODOs --- pandas/_libs/tslibs/conversion.pyx | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 26a0a1cda9ccc..98054e098c0ed 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -401,8 +401,6 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, if is_utc(tz): pass elif is_tzlocal(tz): - # TODO: think on how we can infer fold for local Timezone - # and adjust value for fold _tz_convert_tzlocal_fromutc(obj.value, tz, &obj.fold) else: trans, deltas, typ = get_dst_info(tz) @@ -567,8 +565,6 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz): elif is_tzlocal(tz): local_val = _tz_convert_tzlocal_fromutc(obj.value, tz, &obj.fold) dt64_to_dtstruct(local_val, &obj.dts) - # TODO: think on how we can infer fold for local Timezone - # and adjust value for fold else: # Adjust datetime64 timestamp, recompute datetimestruct trans, deltas, typ = get_dst_info(tz) From 1d72e2d0b5f40a5094526e3a2b035cfc2bb88a23 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Mon, 17 Feb 2020 12:33:10 +0300 Subject: [PATCH 111/128] add comment to __cinit__ --- pandas/_libs/tslibs/conversion.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 98054e098c0ed..41f4859e9901a 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -220,6 +220,7 @@ cdef class _TSObject: # bint fold def __cinit__(self): + # GH 25057. As per PEP 495, set fold to 0 by default self.fold = 0 @property From 97883cecd1baa9128ab72f9b26fdac4c5526df4d Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Mon, 17 Feb 2020 13:47:26 +0300 Subject: [PATCH 112/128] try reverting the changes to fix test error --- pandas/_libs/tslibs/conversion.pyx | 22 ++++---- pandas/_libs/tslibs/tzconversion.pxd | 1 - pandas/_libs/tslibs/tzconversion.pyx | 84 ++++------------------------ 3 files changed, 21 insertions(+), 86 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 41f4859e9901a..60bb34d7a3795 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -39,8 +39,7 @@ from pandas._libs.tslibs.nattype cimport ( from pandas._libs.tslibs.tzconversion import ( tz_localize_to_utc, tz_convert_single) -from pandas._libs.tslibs.tzconversion cimport ( - _tz_convert_tzlocal_utc, _tz_convert_tzlocal_fromutc) +from pandas._libs.tslibs.tzconversion cimport _tz_convert_tzlocal_utc # ---------------------------------------------------------------------- # Constants @@ -219,10 +218,6 @@ cdef class _TSObject: # object tzinfo # bint fold - def __cinit__(self): - # GH 25057. As per PEP 495, set fold to 0 by default - self.fold = 0 - @property def value(self): # This is needed in order for `value` to be accessible in lib.pyx @@ -388,6 +383,7 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, _TSObject obj = _TSObject() int64_t value # numpy dt64 datetime dt + bint fold = 0 value = dtstruct_to_dt64(&dts) obj.dts = dts @@ -399,21 +395,21 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, # Infer fold from offset-adjusted obj.value # see PEP 495 https://www.python.org/dev/peps/pep-0495/#the-fold-attribute - if is_utc(tz): + if is_utc(tz) or is_tzlocal(tz): + # TODO: think on how we can infer fold for local Timezone + # and adjust value for fold pass - elif is_tzlocal(tz): - _tz_convert_tzlocal_fromutc(obj.value, tz, &obj.fold) else: trans, deltas, typ = get_dst_info(tz) if typ == 'dateutil': pos = trans.searchsorted(obj.value, side='right') - 1 - obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos) + fold = _infer_tsobject_fold(obj, trans, deltas, pos) # Keep the converter same as PyDateTime's dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day, obj.dts.hour, obj.dts.min, obj.dts.sec, - obj.dts.us, obj.tzinfo, fold=obj.fold) + obj.dts.us, obj.tzinfo, fold=fold) obj = convert_datetime_to_tsobject( dt, tz, nanos=obj.dts.ps // 1000) return obj @@ -564,8 +560,10 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz): elif obj.value == NPY_NAT: pass elif is_tzlocal(tz): - local_val = _tz_convert_tzlocal_fromutc(obj.value, tz, &obj.fold) + local_val = _tz_convert_tzlocal_utc(obj.value, tz, to_utc=False) dt64_to_dtstruct(local_val, &obj.dts) + # TODO: think on how we can infer fold for local Timezone + # and adjust value for fold else: # Adjust datetime64 timestamp, recompute datetimestruct trans, deltas, typ = get_dst_info(tz) diff --git a/pandas/_libs/tslibs/tzconversion.pxd b/pandas/_libs/tslibs/tzconversion.pxd index c1dd88e5b2313..9c86057b0a392 100644 --- a/pandas/_libs/tslibs/tzconversion.pxd +++ b/pandas/_libs/tslibs/tzconversion.pxd @@ -4,5 +4,4 @@ from numpy cimport int64_t cdef int64_t tz_convert_utc_to_tzlocal(int64_t utc_val, tzinfo tz) cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=*) -cdef int64_t _tz_convert_tzlocal_fromutc(int64_t val, tzinfo tz, bint *fold) cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index ab90c3b49d609..b368f0fde3edc 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -444,12 +444,12 @@ cdef int64_t[:] _tz_convert_one_way(int64_t[:] vals, object tz, bint to_utc): return converted -cdef inline void _tzlocal_get_offset_components(int64_t val, tzinfo tz, - bint to_utc, int64_t *delta, - bint *fold=NULL): +cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True): """ - Calculate offset in nanoseconds needed to convert the i8 representation of - a datetime from a tzlocal timezone to UTC, or vice-versa. + Convert the i8 representation of a datetime from a tzlocal timezone to + UTC, or vice-versa. + + Private, not intended for use outside of tslibs.conversion Parameters ---------- @@ -457,22 +457,14 @@ cdef inline void _tzlocal_get_offset_components(int64_t val, tzinfo tz, tz : tzinfo to_utc : bint True if converting tzlocal _to_ UTC, False if going the other direction - delta : int64_t* - pointer to delta: offset in nanoseconds needed to adjust val from/to UTC - fold : bint*, default NULL - pointer to fold: whether datetime ends up in a fold or not - after adjustment Returns ------- - None - - Notes - ----- - Sets delta by pointer, sets fold by pointer. + result : int64_t """ cdef: npy_datetimestruct dts + int64_t delta datetime dt dt64_to_dtstruct(val, &dts) @@ -483,65 +475,11 @@ cdef inline void _tzlocal_get_offset_components(int64_t val, tzinfo tz, if not to_utc: dt = dt.replace(tzinfo=tzutc()) dt = dt.astimezone(tz) + delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 - if to_utc: - delta[0] = -int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 - else: - delta[0] = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 - - if fold is not NULL: - fold[0] = dt.fold - - -cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True): - """ - Convert the i8 representation of a datetime from a tzlocal timezone to - UTC, or vice-versa. - - Private, not intended for use outside of tslibs.conversion - - Parameters - ---------- - val : int64_t - tz : tzinfo - to_utc : bint - True if converting tzlocal _to_ UTC, False if going the other direction - - Returns - ------- - result : int64_t - """ - cdef int64_t delta - - _tzlocal_get_offset_components(val, tz, to_utc, &delta, NULL) - - return val + delta - - -cdef int64_t _tz_convert_tzlocal_fromutc(int64_t val, tzinfo tz, bint *fold): - """ - Convert the i8 representation of a datetime from UTC to local timezone, - set fold by pointer - - Private, not intended for use outside of tslibs.conversion - - Parameters - ---------- - val : int64_t - tz : tzinfo - fold : bint* - pointer to fold: whether datetime ends up in a fold or not - after adjustment - - Returns - ------- - result : int64_t - """ - cdef int64_t delta - - _tzlocal_get_offset_components(val, tz, False, &delta, fold) - - return val + delta + if not to_utc: + return val + delta + return val - delta @cython.boundscheck(False) From aa5232b3e503d71cfa736d3ccffa015d4bae244a Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Mon, 17 Feb 2020 13:59:22 +0300 Subject: [PATCH 113/128] Revert "try reverting the changes to fix test error" --- pandas/_libs/tslibs/conversion.pyx | 22 ++++---- pandas/_libs/tslibs/tzconversion.pxd | 1 + pandas/_libs/tslibs/tzconversion.pyx | 84 ++++++++++++++++++++++++---- 3 files changed, 86 insertions(+), 21 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 60bb34d7a3795..41f4859e9901a 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -39,7 +39,8 @@ from pandas._libs.tslibs.nattype cimport ( from pandas._libs.tslibs.tzconversion import ( tz_localize_to_utc, tz_convert_single) -from pandas._libs.tslibs.tzconversion cimport _tz_convert_tzlocal_utc +from pandas._libs.tslibs.tzconversion cimport ( + _tz_convert_tzlocal_utc, _tz_convert_tzlocal_fromutc) # ---------------------------------------------------------------------- # Constants @@ -218,6 +219,10 @@ cdef class _TSObject: # object tzinfo # bint fold + def __cinit__(self): + # GH 25057. As per PEP 495, set fold to 0 by default + self.fold = 0 + @property def value(self): # This is needed in order for `value` to be accessible in lib.pyx @@ -383,7 +388,6 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, _TSObject obj = _TSObject() int64_t value # numpy dt64 datetime dt - bint fold = 0 value = dtstruct_to_dt64(&dts) obj.dts = dts @@ -395,21 +399,21 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, # Infer fold from offset-adjusted obj.value # see PEP 495 https://www.python.org/dev/peps/pep-0495/#the-fold-attribute - if is_utc(tz) or is_tzlocal(tz): - # TODO: think on how we can infer fold for local Timezone - # and adjust value for fold + if is_utc(tz): pass + elif is_tzlocal(tz): + _tz_convert_tzlocal_fromutc(obj.value, tz, &obj.fold) else: trans, deltas, typ = get_dst_info(tz) if typ == 'dateutil': pos = trans.searchsorted(obj.value, side='right') - 1 - fold = _infer_tsobject_fold(obj, trans, deltas, pos) + obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos) # Keep the converter same as PyDateTime's dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day, obj.dts.hour, obj.dts.min, obj.dts.sec, - obj.dts.us, obj.tzinfo, fold=fold) + obj.dts.us, obj.tzinfo, fold=obj.fold) obj = convert_datetime_to_tsobject( dt, tz, nanos=obj.dts.ps // 1000) return obj @@ -560,10 +564,8 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz): elif obj.value == NPY_NAT: pass elif is_tzlocal(tz): - local_val = _tz_convert_tzlocal_utc(obj.value, tz, to_utc=False) + local_val = _tz_convert_tzlocal_fromutc(obj.value, tz, &obj.fold) dt64_to_dtstruct(local_val, &obj.dts) - # TODO: think on how we can infer fold for local Timezone - # and adjust value for fold else: # Adjust datetime64 timestamp, recompute datetimestruct trans, deltas, typ = get_dst_info(tz) diff --git a/pandas/_libs/tslibs/tzconversion.pxd b/pandas/_libs/tslibs/tzconversion.pxd index 9c86057b0a392..c1dd88e5b2313 100644 --- a/pandas/_libs/tslibs/tzconversion.pxd +++ b/pandas/_libs/tslibs/tzconversion.pxd @@ -4,4 +4,5 @@ from numpy cimport int64_t cdef int64_t tz_convert_utc_to_tzlocal(int64_t utc_val, tzinfo tz) cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=*) +cdef int64_t _tz_convert_tzlocal_fromutc(int64_t val, tzinfo tz, bint *fold) cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index b368f0fde3edc..ab90c3b49d609 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -444,12 +444,12 @@ cdef int64_t[:] _tz_convert_one_way(int64_t[:] vals, object tz, bint to_utc): return converted -cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True): +cdef inline void _tzlocal_get_offset_components(int64_t val, tzinfo tz, + bint to_utc, int64_t *delta, + bint *fold=NULL): """ - Convert the i8 representation of a datetime from a tzlocal timezone to - UTC, or vice-versa. - - Private, not intended for use outside of tslibs.conversion + Calculate offset in nanoseconds needed to convert the i8 representation of + a datetime from a tzlocal timezone to UTC, or vice-versa. Parameters ---------- @@ -457,14 +457,22 @@ cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True): tz : tzinfo to_utc : bint True if converting tzlocal _to_ UTC, False if going the other direction + delta : int64_t* + pointer to delta: offset in nanoseconds needed to adjust val from/to UTC + fold : bint*, default NULL + pointer to fold: whether datetime ends up in a fold or not + after adjustment Returns ------- - result : int64_t + None + + Notes + ----- + Sets delta by pointer, sets fold by pointer. """ cdef: npy_datetimestruct dts - int64_t delta datetime dt dt64_to_dtstruct(val, &dts) @@ -475,11 +483,65 @@ cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True): if not to_utc: dt = dt.replace(tzinfo=tzutc()) dt = dt.astimezone(tz) - delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 - if not to_utc: - return val + delta - return val - delta + if to_utc: + delta[0] = -int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 + else: + delta[0] = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 + + if fold is not NULL: + fold[0] = dt.fold + + +cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True): + """ + Convert the i8 representation of a datetime from a tzlocal timezone to + UTC, or vice-versa. + + Private, not intended for use outside of tslibs.conversion + + Parameters + ---------- + val : int64_t + tz : tzinfo + to_utc : bint + True if converting tzlocal _to_ UTC, False if going the other direction + + Returns + ------- + result : int64_t + """ + cdef int64_t delta + + _tzlocal_get_offset_components(val, tz, to_utc, &delta, NULL) + + return val + delta + + +cdef int64_t _tz_convert_tzlocal_fromutc(int64_t val, tzinfo tz, bint *fold): + """ + Convert the i8 representation of a datetime from UTC to local timezone, + set fold by pointer + + Private, not intended for use outside of tslibs.conversion + + Parameters + ---------- + val : int64_t + tz : tzinfo + fold : bint* + pointer to fold: whether datetime ends up in a fold or not + after adjustment + + Returns + ------- + result : int64_t + """ + cdef int64_t delta + + _tzlocal_get_offset_components(val, tz, False, &delta, fold) + + return val + delta @cython.boundscheck(False) From 0ebbe02fdc5828b04a87be9c10f3afaba1af7a50 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Mon, 17 Feb 2020 14:40:09 +0300 Subject: [PATCH 114/128] REFACT: no longer set delta by pointer --- pandas/_libs/tslibs/tzconversion.pyx | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index ab90c3b49d609..5ff602a811188 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -444,9 +444,8 @@ cdef int64_t[:] _tz_convert_one_way(int64_t[:] vals, object tz, bint to_utc): return converted -cdef inline void _tzlocal_get_offset_components(int64_t val, tzinfo tz, - bint to_utc, int64_t *delta, - bint *fold=NULL): +cdef inline int64_t _tzlocal_get_offset_components(int64_t val, tzinfo tz, + bint to_utc, bint *fold=NULL): """ Calculate offset in nanoseconds needed to convert the i8 representation of a datetime from a tzlocal timezone to UTC, or vice-versa. @@ -457,23 +456,22 @@ cdef inline void _tzlocal_get_offset_components(int64_t val, tzinfo tz, tz : tzinfo to_utc : bint True if converting tzlocal _to_ UTC, False if going the other direction - delta : int64_t* - pointer to delta: offset in nanoseconds needed to adjust val from/to UTC fold : bint*, default NULL pointer to fold: whether datetime ends up in a fold or not after adjustment Returns ------- - None + delta : int64_t Notes ----- - Sets delta by pointer, sets fold by pointer. + Sets fold by pointer. """ cdef: npy_datetimestruct dts datetime dt + int64_t delta dt64_to_dtstruct(val, &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, @@ -484,14 +482,14 @@ cdef inline void _tzlocal_get_offset_components(int64_t val, tzinfo tz, dt = dt.replace(tzinfo=tzutc()) dt = dt.astimezone(tz) - if to_utc: - delta[0] = -int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 - else: - delta[0] = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 - if fold is not NULL: fold[0] = dt.fold + if to_utc: + return -int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 + else: + return int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 + cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True): """ @@ -513,7 +511,7 @@ cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True): """ cdef int64_t delta - _tzlocal_get_offset_components(val, tz, to_utc, &delta, NULL) + delta = _tzlocal_get_offset_components(val, tz, to_utc, NULL) return val + delta @@ -539,7 +537,7 @@ cdef int64_t _tz_convert_tzlocal_fromutc(int64_t val, tzinfo tz, bint *fold): """ cdef int64_t delta - _tzlocal_get_offset_components(val, tz, False, &delta, fold) + delta = _tzlocal_get_offset_components(val, tz, False, fold) return val + delta From c840fd686e0377a81ce56a14a7dab3f470c20f18 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Mon, 17 Feb 2020 15:00:52 +0300 Subject: [PATCH 115/128] fix linting --- pandas/_libs/tslibs/tzconversion.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 5ff602a811188..e73e223bb8039 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -445,7 +445,8 @@ cdef int64_t[:] _tz_convert_one_way(int64_t[:] vals, object tz, bint to_utc): cdef inline int64_t _tzlocal_get_offset_components(int64_t val, tzinfo tz, - bint to_utc, bint *fold=NULL): + bint to_utc, + bint *fold=NULL): """ Calculate offset in nanoseconds needed to convert the i8 representation of a datetime from a tzlocal timezone to UTC, or vice-versa. From a793bb5310d0762a38b1d51cf4711a1733bdc640 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Mon, 17 Feb 2020 16:31:32 +0300 Subject: [PATCH 116/128] DOC: tweak docstrings in tzconversion --- pandas/_libs/tslibs/tzconversion.pyx | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index e73e223bb8039..5fdba69ec0d30 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -467,7 +467,7 @@ cdef inline int64_t _tzlocal_get_offset_components(int64_t val, tzinfo tz, Notes ----- - Sets fold by pointer. + Sets fold by pointer """ cdef: npy_datetimestruct dts @@ -535,6 +535,10 @@ cdef int64_t _tz_convert_tzlocal_fromutc(int64_t val, tzinfo tz, bint *fold): Returns ------- result : int64_t + + Notes + ----- + Sets fold by pointer """ cdef int64_t delta From d7321397032f3a964e048809a7e449ba04ae7325 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Wed, 19 Feb 2020 10:08:04 +0300 Subject: [PATCH 117/128] statically type trans and deltas --- pandas/_libs/tslibs/conversion.pyx | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 9093eca2faade..630cfcc6298e9 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -388,6 +388,8 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, _TSObject obj = _TSObject() int64_t value # numpy dt64 datetime dt + ndarray[int64_t] trans + int64_t[:] deltas value = dtstruct_to_dt64(&dts) obj.dts = dts @@ -594,8 +596,8 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz): obj.tzinfo = tz -cdef inline bint _infer_tsobject_fold(_TSObject obj, object trans, - object deltas, int32_t pos): +cdef inline bint _infer_tsobject_fold(_TSObject obj, ndarray[int64_t] trans, + int64_t[:] deltas, int32_t pos): """ Infer _TSObject fold property from value by assuming 0 and then setting to 1 if necessary. @@ -603,10 +605,10 @@ cdef inline bint _infer_tsobject_fold(_TSObject obj, object trans, Parameters ---------- obj : _TSObject - trans : object - List of offset transition points in nanoseconds since epoch. - deltas : object - List of offsets corresponding to transition points in trans. + trans : ndarray[int64_t] + ndarray of offset transition points in nanoseconds since epoch. + deltas : int64_t[:] + array of offsets corresponding to transition points in trans. pos : int32_t Position of the last transition point before taking fold into account. From 752acbc394c2f5597f26db16660b77671d8e130c Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Wed, 19 Feb 2020 10:18:27 +0300 Subject: [PATCH 118/128] REFACT: use PyDateTime_Check when deciding if to raise --- pandas/_libs/tslibs/timestamps.pyx | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index e7a760c215116..80c7aff93c94d 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -6,7 +6,7 @@ from numpy cimport int64_t cnp.import_array() from datetime import time as datetime_time, timedelta -from cpython.datetime cimport (datetime, +from cpython.datetime cimport (datetime, PyDateTime_Check, PyTZInfo_Check, PyDateTime_IMPORT) PyDateTime_IMPORT @@ -405,10 +405,8 @@ class Timestamp(_Timestamp): "Valid values for the fold argument are None, 0, or 1." ) - if (ts_input is not _no_input and ( - is_integer_object(ts_input) or is_float_object(ts_input) or - is_datetime64_object(ts_input) or isinstance(ts_input, str) - or getattr(ts_input, 'tzinfo', None) is not None)): + if (ts_input is not _no_input and not (PyDateTime_Check(ts_input) and + getattr(ts_input, 'tzinfo', None) is None)): raise ValueError( "Cannot pass fold with possibly unambiguous input: int, " "float, numpy.datetime64, str, or timezone-aware " From a1f69cf1d68dd7434dede2d9800a6e59d1c4d308 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Wed, 19 Feb 2020 11:57:52 +0300 Subject: [PATCH 119/128] DOC: add refs to docs, add blank lines to docstrings --- doc/source/user_guide/timeseries.rst | 14 ++++++++++---- doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/_libs/tslib.pyx | 1 + pandas/_libs/tslibs/timestamps.pyx | 1 + 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index 54e60834fe4b7..3abfe8d71a421 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -2228,10 +2228,16 @@ you can use the ``tz_convert`` method. Due to daylight saving time, one wall clock time can occur twice when shifting from summer to winter time; fold describes whether the datetime-like corresponds to the first (0) or the second time (1) the wall clock hits the ambiguous time. - Fold is supported only for constructing from naive ``datetime.datetime`` or :class:`Timestamp` - or for constructing from components (see below) with ``dateutil`` timezones as ``pytz`` - timezones do not support fold. When localizing an ambiguous datetime, we - recommend to rely on :meth:`Timestamp.tz_localize` instead as it gives more control. + Fold is supported only for constructing from naive ``datetime.datetime`` + (see `here `_ for details) or :class:`Timestamp` + or for constructing from components (see below) with ``dateutil`` timezones + (see `here `_ + for ``dateutil`` methods that deal with ambiguous datetimes) as ``pytz`` + timezones do not support fold (see `here `_ + for details on how ``pytz`` deals with ambiguous datetimes). To localize an ambiguous datetime + with ``pytz``, please use :meth:`Timestamp.tz_localize`. In general, we recommend to rely + on :meth:`Timestamp.tz_localize` when localizing ambiguous datetimes if you need direct + control over how they are handled. .. ipython:: python diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 5067b8d8f40e6..872bdcbfcdeb4 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -41,7 +41,7 @@ For example: Fold argument support in Timestamp constructor ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -:class:`Timestamp:` now supports the keyword-only fold argument according to PEP 495 similar to parent `pydatetime` class. It supports both accepting fold as an initialization argument and inferring fold from other constructor arguments (:issue:`25057`, :issue:`31338`). Support is limited to `dateutil` timezones as `pytz` doesn't support fold. +:class:`Timestamp:` now supports the keyword-only fold argument according to `PEP 495 `_ similar to parent ``datetime.datetime`` class. It supports both accepting fold as an initialization argument and inferring fold from other constructor arguments (:issue:`25057`, :issue:`31338`). Support is limited to ``dateutil`` timezones as ``pytz`` doesn't support fold. For example: diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index a5713be186745..a176c4e41e834 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -89,6 +89,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, when shifting from summer to winter time; fold describes whether the datetime-like corresponds to the first (0) or the second time (1) the wall clock hits the ambiguous time + .. versionadded:: 1.1.0 box : {'datetime', 'timestamp', 'date', 'time'}, default 'datetime' If datetime, convert to datetime.datetime diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 80c7aff93c94d..de4f26dfb2cbd 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -200,6 +200,7 @@ class Timestamp(_Timestamp): when shifting from summer to winter time; fold describes whether the datetime-like corresponds to the first (0) or the second time (1) the wall clock hits the ambiguous time + .. versionadded:: 1.1.0 Notes From 97dc342dbaa5afdd8a7a9bfb3adc8bc08a2db3ef Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Wed, 19 Feb 2020 12:07:25 +0300 Subject: [PATCH 120/128] REFACT: merge replace with error raising in timestamp.pyx --- pandas/_libs/tslibs/timestamps.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index de4f26dfb2cbd..09adb4eabe783 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -421,8 +421,8 @@ class Timestamp(_Timestamp): "timezones." ) - if hasattr(ts_input, 'fold') and fold is not None: - ts_input = ts_input.replace(fold=fold) + if hasattr(ts_input, 'fold'): + ts_input = ts_input.replace(fold=fold) # GH 30543 if pd.Timestamp already passed, return it # check that only ts_input is passed From 7ac14df718707dc6884f1e34e7b2e72afa293d5f Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Wed, 19 Feb 2020 12:10:54 +0300 Subject: [PATCH 121/128] REFACT: move +/- delta logic out of _tzlocal_get_offset_components --- pandas/_libs/tslibs/tzconversion.pyx | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 5fdba69ec0d30..a9702f91107ec 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -486,10 +486,7 @@ cdef inline int64_t _tzlocal_get_offset_components(int64_t val, tzinfo tz, if fold is not NULL: fold[0] = dt.fold - if to_utc: - return -int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 - else: - return int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 + return int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True): @@ -514,7 +511,10 @@ cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True): delta = _tzlocal_get_offset_components(val, tz, to_utc, NULL) - return val + delta + if to_utc: + return val - delta + else: + return val + delta cdef int64_t _tz_convert_tzlocal_fromutc(int64_t val, tzinfo tz, bint *fold): From 397b2c860409efe7f1fcb1d0e52fa34df5cf6857 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Wed, 19 Feb 2020 12:39:56 +0300 Subject: [PATCH 122/128] CLN: fix indentation --- pandas/_libs/tslibs/timestamps.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 09adb4eabe783..a5f933e3ee7a7 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -406,7 +406,8 @@ class Timestamp(_Timestamp): "Valid values for the fold argument are None, 0, or 1." ) - if (ts_input is not _no_input and not (PyDateTime_Check(ts_input) and + if (ts_input is not _no_input and not ( + PyDateTime_Check(ts_input) and getattr(ts_input, 'tzinfo', None) is None)): raise ValueError( "Cannot pass fold with possibly unambiguous input: int, " From 757bd41a229bd3d10f148427724a8af586907c84 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Wed, 19 Feb 2020 12:44:11 +0300 Subject: [PATCH 123/128] DOC: anonimize references --- doc/source/user_guide/timeseries.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index 3abfe8d71a421..78f765c5678bf 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -2229,11 +2229,11 @@ you can use the ``tz_convert`` method. from summer to winter time; fold describes whether the datetime-like corresponds to the first (0) or the second time (1) the wall clock hits the ambiguous time. Fold is supported only for constructing from naive ``datetime.datetime`` - (see `here `_ for details) or :class:`Timestamp` + (see `here `__ for details) or :class:`Timestamp` or for constructing from components (see below) with ``dateutil`` timezones - (see `here `_ + (see `here `__ for ``dateutil`` methods that deal with ambiguous datetimes) as ``pytz`` - timezones do not support fold (see `here `_ + timezones do not support fold (see `here `__ for details on how ``pytz`` deals with ambiguous datetimes). To localize an ambiguous datetime with ``pytz``, please use :meth:`Timestamp.tz_localize`. In general, we recommend to rely on :meth:`Timestamp.tz_localize` when localizing ambiguous datetimes if you need direct From 46a279bd1563ed984582b929d86aef27ea45ccf3 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Wed, 19 Feb 2020 16:22:12 +0300 Subject: [PATCH 124/128] DOC: give descriptive names to refs --- doc/source/user_guide/timeseries.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index 78f765c5678bf..618ce9de8677a 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -2229,11 +2229,11 @@ you can use the ``tz_convert`` method. from summer to winter time; fold describes whether the datetime-like corresponds to the first (0) or the second time (1) the wall clock hits the ambiguous time. Fold is supported only for constructing from naive ``datetime.datetime`` - (see `here `__ for details) or :class:`Timestamp` + (see `datetime documentation `__ for details) or :class:`Timestamp` or for constructing from components (see below) with ``dateutil`` timezones - (see `here `__ + (see `dateutil documentation `__ for ``dateutil`` methods that deal with ambiguous datetimes) as ``pytz`` - timezones do not support fold (see `here `__ + timezones do not support fold (see `pytz documentation `__ for details on how ``pytz`` deals with ambiguous datetimes). To localize an ambiguous datetime with ``pytz``, please use :meth:`Timestamp.tz_localize`. In general, we recommend to rely on :meth:`Timestamp.tz_localize` when localizing ambiguous datetimes if you need direct From 81560bba745d15db2818bb1603c68ef440f8cafb Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 25 Feb 2020 09:32:30 +0300 Subject: [PATCH 125/128] DOC: make fold a subsection in timeseries.rst --- doc/source/user_guide/timeseries.rst | 55 +++++++++++++++------------- doc/source/whatsnew/v1.1.0.rst | 2 +- 2 files changed, 30 insertions(+), 27 deletions(-) diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index 618ce9de8677a..0cc443a2d9d23 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -2220,32 +2220,6 @@ you can use the ``tz_convert`` method. rng_pytz.tz_convert('US/Eastern') -.. note:: - - .. versionadded:: 1.1.0 - - For ambiguous times, pandas supports explicitly specifying the keyword-only fold argument. - Due to daylight saving time, one wall clock time can occur twice when shifting - from summer to winter time; fold describes whether the datetime-like corresponds - to the first (0) or the second time (1) the wall clock hits the ambiguous time. - Fold is supported only for constructing from naive ``datetime.datetime`` - (see `datetime documentation `__ for details) or :class:`Timestamp` - or for constructing from components (see below) with ``dateutil`` timezones - (see `dateutil documentation `__ - for ``dateutil`` methods that deal with ambiguous datetimes) as ``pytz`` - timezones do not support fold (see `pytz documentation `__ - for details on how ``pytz`` deals with ambiguous datetimes). To localize an ambiguous datetime - with ``pytz``, please use :meth:`Timestamp.tz_localize`. In general, we recommend to rely - on :meth:`Timestamp.tz_localize` when localizing ambiguous datetimes if you need direct - control over how they are handled. - - .. ipython:: python - - pd.Timestamp(datetime.datetime(2019, 10, 27, 1, 30, 0, 0), - tz='dateutil/Europe/London', fold=0) - pd.Timestamp(year=2019, month=10, day=27, hour=1, minute=30, - tz='dateutil/Europe/London', fold=1) - .. note:: When using ``pytz`` time zones, :class:`DatetimeIndex` will construct a different @@ -2323,6 +2297,35 @@ To remove time zone information, use ``tz_localize(None)`` or ``tz_convert(None) # tz_convert(None) is identical to tz_convert('UTC').tz_localize(None) didx.tz_convert('UTC').tz_localize(None) +.. _timeseries.fold: + +Fold +~~~~ + +.. versionadded:: 1.1.0 + +For ambiguous times, pandas supports explicitly specifying the keyword-only fold argument. +Due to daylight saving time, one wall clock time can occur twice when shifting +from summer to winter time; fold describes whether the datetime-like corresponds +to the first (0) or the second time (1) the wall clock hits the ambiguous time. +Fold is supported only for constructing from naive ``datetime.datetime`` +(see `datetime documentation `__ for details) or :class:`Timestamp` +or for constructing from components (see below) with ``dateutil`` timezones +(see `dateutil documentation `__ +for ``dateutil`` methods that deal with ambiguous datetimes) as ``pytz`` +timezones do not support fold (see `pytz documentation `__ +for details on how ``pytz`` deals with ambiguous datetimes). To localize an ambiguous datetime +with ``pytz``, please use :meth:`Timestamp.tz_localize`. In general, we recommend to rely +on :meth:`Timestamp.tz_localize` when localizing ambiguous datetimes if you need direct +control over how they are handled. + +.. ipython:: python + + pd.Timestamp(datetime.datetime(2019, 10, 27, 1, 30, 0, 0), + tz='dateutil/Europe/London', fold=0) + pd.Timestamp(year=2019, month=10, day=27, hour=1, minute=30, + tz='dateutil/Europe/London', fold=1) + .. _timeseries.timezone_ambiguous: Ambiguous times when localizing diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 872bdcbfcdeb4..cccf95865bbd7 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -56,7 +56,7 @@ For example: tz="dateutil/Europe/London", fold=1) ts -For more, see :ref:`Timezone section ` in the user guide on working with timezones. +For more on working with fold, see :ref:`Fold subsection ` in the user guide. .. _whatsnew_110.enhancements.other: From 42566426977bbcfb697f6e59c19d471d72fa2d16 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 25 Feb 2020 09:52:05 +0300 Subject: [PATCH 126/128] TST: add test for invalid fold raise --- pandas/tests/indexes/datetimes/test_constructors.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index ec84dc3d5e565..b293c008d6683 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -961,6 +961,14 @@ def test_pass_datetimeindex_to_index(self): tm.assert_numpy_array_equal(idx.values, expected.values) +def test_timestamp_constructor_invalid_fold_raise(): + # Test for #25057 + # Valid fold values are only [None, 0, 1] + msg = "Valid values for the fold argument are None, 0, or 1." + with pytest.raises(ValueError, match=msg): + Timestamp(123, fold=2) + + def test_timestamp_constructor_pytz_fold_raise(): # Test for #25057 # pytz doesn't support fold. Check that we raise From 0168aa6680844589af9a39a20066e4df81ebda64 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 25 Feb 2020 11:24:30 +0300 Subject: [PATCH 127/128] DOC: rephrase text in timeseries.rst to improve readability --- doc/source/user_guide/timeseries.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index 0cc443a2d9d23..f208c8d576131 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -2309,8 +2309,8 @@ Due to daylight saving time, one wall clock time can occur twice when shifting from summer to winter time; fold describes whether the datetime-like corresponds to the first (0) or the second time (1) the wall clock hits the ambiguous time. Fold is supported only for constructing from naive ``datetime.datetime`` -(see `datetime documentation `__ for details) or :class:`Timestamp` -or for constructing from components (see below) with ``dateutil`` timezones +(see `datetime documentation `__ for details) or from :class:`Timestamp` +or for constructing from components (see below). Only ``dateutil`` timezones are supported (see `dateutil documentation `__ for ``dateutil`` methods that deal with ambiguous datetimes) as ``pytz`` timezones do not support fold (see `pytz documentation `__ From cd02318aca686126c6c96c1ab3cefda4fbe4b128 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Wed, 26 Feb 2020 09:22:28 +0300 Subject: [PATCH 128/128] add fold support to merged code --- pandas/_libs/tslibs/timestamps.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index ce0524d3c9814..5cd3467eed042 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -454,7 +454,8 @@ class Timestamp(_Timestamp): "hour": hour or 0, "minute": minute or 0, "second": second or 0, - "microsecond": microsecond or 0 + "microsecond": microsecond or 0, + "fold": fold or 0 } if year is not None: datetime_kwargs["year"] = year