From 6a74884af39fdf05a6847ab4175ff9860f92b380 Mon Sep 17 00:00:00 2001 From: Stefan Scherfke Date: Mon, 28 Aug 2017 10:11:22 +0200 Subject: [PATCH 1/2] BUG: in Timestamp.replace when replacing tzinfo around DST changes closes #15683 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/_libs/tslib.pyx | 20 ++++++++++---------- pandas/tests/tseries/test_timezones.py | 21 +++++++++++++++++++++ 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 5a353544a4283..8ed3a26a0ee8f 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -487,6 +487,7 @@ Conversion - Bug in ``IntervalIndex.is_non_overlapping_monotonic`` when intervals are closed on both sides and overlap at a point (:issue:`16560`) - Bug in :func:`Series.fillna` returns frame when ``inplace=True`` and ``value`` is dict (:issue:`16156`) - Bug in :attr:`Timestamp.weekday_name` returning a UTC-based weekday name when localized to a timezone (:issue:`17354`) +- Bug in ``Timestamp.replace`` when replacing ``tzinfo`` around DST changes (:issue:`15683`) Indexing ^^^^^^^^ diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index ec12611ae7f02..38c07d0c25233 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -683,14 +683,16 @@ class Timestamp(_Timestamp): cdef: pandas_datetimestruct dts - int64_t value - object _tzinfo, result, k, v + int64_t value, value_tz, offset + object _tzinfo, result, k, v, ts_input # set to naive if needed _tzinfo = self.tzinfo value = self.value if _tzinfo is not None: - value = tz_convert_single(value, 'UTC', _tzinfo) + value_tz = tz_convert_single(value, _tzinfo, 'UTC') + offset = value - value_tz + value += offset # setup components pandas_datetime_to_datetimestruct(value, PANDAS_FR_ns, &dts) @@ -724,16 +726,14 @@ class Timestamp(_Timestamp): _tzinfo = tzinfo # reconstruct & check bounds - value = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts) + ts_input = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, + dts.sec, dts.us, tzinfo=_tzinfo) + ts = convert_to_tsobject(ts_input, _tzinfo, None, 0, 0) + value = ts.value + (dts.ps // 1000) if value != NPY_NAT: _check_dts_bounds(&dts) - # set tz if needed - if _tzinfo is not None: - value = tz_convert_single(value, _tzinfo, 'UTC') - - result = create_timestamp_from_ts(value, dts, _tzinfo, self.freq) - return result + return create_timestamp_from_ts(value, dts, _tzinfo, self.freq) def isoformat(self, sep='T'): base = super(_Timestamp, self).isoformat(sep=sep) diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index a9ecfd797a32b..ac1a338d2844d 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -1269,6 +1269,27 @@ def test_ambiguous_compat(self): assert (result_pytz.to_pydatetime().tzname() == result_dateutil.to_pydatetime().tzname()) + def test_replace_tzinfo(self): + # GH 15683 + dt = datetime(2016, 3, 27, 1) + tzinfo = pytz.timezone('CET').localize(dt, is_dst=False).tzinfo + + result_dt = dt.replace(tzinfo=tzinfo) + result_pd = Timestamp(dt).replace(tzinfo=tzinfo) + + if hasattr(result_dt, 'timestamp'): # New method in Py 3.3 + assert result_dt.timestamp() == result_pd.timestamp() + assert result_dt == result_pd + assert result_dt == result_pd.to_pydatetime() + + result_dt = dt.replace(tzinfo=tzinfo).replace(tzinfo=None) + result_pd = Timestamp(dt).replace(tzinfo=tzinfo).replace(tzinfo=None) + + if hasattr(result_dt, 'timestamp'): # New method in Py 3.3 + assert result_dt.timestamp() == result_pd.timestamp() + assert result_dt == result_pd + assert result_dt == result_pd.to_pydatetime() + def test_index_equals_with_tz(self): left = date_range('1/1/2011', periods=100, freq='H', tz='utc') right = date_range('1/1/2011', periods=100, freq='H', tz='US/Eastern') From 66d94a82ff465abb457f4a18f3029c6697a31a63 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 12 Sep 2017 21:54:33 -0400 Subject: [PATCH 2/2] asv & some typing --- asv_bench/benchmarks/timestamp.py | 23 ++++++++++++++++++++ pandas/_libs/tslib.pyx | 36 ++++++++++++++++++------------- 2 files changed, 44 insertions(+), 15 deletions(-) diff --git a/asv_bench/benchmarks/timestamp.py b/asv_bench/benchmarks/timestamp.py index 066479b22739a..e4f3023037580 100644 --- a/asv_bench/benchmarks/timestamp.py +++ b/asv_bench/benchmarks/timestamp.py @@ -1,5 +1,7 @@ from .pandas_vb_common import * from pandas import to_timedelta, Timestamp +import pytz +import datetime class TimestampProperties(object): @@ -58,3 +60,24 @@ def time_is_leap_year(self): def time_microsecond(self): self.ts.microsecond + + +class TimestampOps(object): + goal_time = 0.2 + + def setup(self): + self.ts = Timestamp('2017-08-25 08:16:14') + self.ts_tz = Timestamp('2017-08-25 08:16:14', tz='US/Eastern') + + dt = datetime.datetime(2016, 3, 27, 1) + self.tzinfo = pytz.timezone('CET').localize(dt, is_dst=False).tzinfo + self.ts2 = Timestamp(dt) + + def time_replace_tz(self): + self.ts.replace(tzinfo=pytz.timezone('US/Eastern')) + + def time_replace_across_dst(self): + self.ts2.replace(tzinfo=self.tzinfo) + + def time_replace_None(self): + self.ts_tz.replace(tzinfo=None) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 38c07d0c25233..8238552b44e03 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -142,6 +142,7 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, box=False): cdef: Py_ssize_t i, n = len(arr) + ndarray[int64_t] trans, deltas pandas_datetimestruct dts object dt int64_t value @@ -417,8 +418,9 @@ class Timestamp(_Timestamp): def _round(self, freq, rounder): - cdef int64_t unit - cdef object result, value + cdef: + int64_t unit, r, value, buff = 1000000 + object result from pandas.tseries.frequencies import to_offset unit = to_offset(freq).nanos @@ -429,16 +431,15 @@ class Timestamp(_Timestamp): if unit < 1000 and unit % 1000 != 0: # for nano rounding, work with the last 6 digits separately # due to float precision - buff = 1000000 - result = (buff * (value // buff) + unit * - (rounder((value % buff) / float(unit))).astype('i8')) + r = (buff * (value // buff) + unit * + (rounder((value % buff) / float(unit))).astype('i8')) elif unit >= 1000 and unit % 1000 != 0: msg = 'Precision will be lost using frequency: {}' warnings.warn(msg.format(freq)) - result = (unit * rounder(value / float(unit)).astype('i8')) + r = (unit * rounder(value / float(unit)).astype('i8')) else: - result = (unit * rounder(value / float(unit)).astype('i8')) - result = Timestamp(result, unit='ns') + r = (unit * rounder(value / float(unit)).astype('i8')) + result = Timestamp(r, unit='ns') if self.tz is not None: result = result.tz_localize(self.tz) return result @@ -684,15 +685,15 @@ class Timestamp(_Timestamp): cdef: pandas_datetimestruct dts int64_t value, value_tz, offset - object _tzinfo, result, k, v, ts_input + object _tzinfo, result, k, v + datetime ts_input # set to naive if needed _tzinfo = self.tzinfo value = self.value if _tzinfo is not None: value_tz = tz_convert_single(value, _tzinfo, 'UTC') - offset = value - value_tz - value += offset + value += value - value_tz # setup components pandas_datetime_to_datetimestruct(value, PANDAS_FR_ns, &dts) @@ -1175,7 +1176,7 @@ cdef class _Timestamp(datetime): return np.datetime64(self.value, 'ns') def __add__(self, other): - cdef int64_t other_int + cdef int64_t other_int, nanos if is_timedelta64_object(other): other_int = other.astype('timedelta64[ns]').view('i8') @@ -1625,6 +1626,10 @@ cdef inline void _localize_tso(_TSObject obj, object tz): """ Take a TSObject in UTC and localizes to timezone tz. """ + cdef: + ndarray[int64_t] trans, deltas + Py_ssize_t delta, posn + if is_utc(tz): obj.tzinfo = tz elif is_tzlocal(tz): @@ -1676,7 +1681,7 @@ cdef inline void _localize_tso(_TSObject obj, object tz): obj.tzinfo = tz -def _localize_pydatetime(object dt, object tz): +cpdef inline object _localize_pydatetime(object dt, object tz): """ Take a datetime/Timestamp in UTC and localizes to timezone tz. """ @@ -3892,7 +3897,7 @@ for _maybe_method_name in dir(NaTType): # Conversion routines -def _delta_to_nanoseconds(delta): +cpdef int64_t _delta_to_nanoseconds(delta): if isinstance(delta, np.ndarray): return delta.astype('m8[ns]').astype('int64') if hasattr(delta, 'nanos'): @@ -4137,7 +4142,7 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): return result -def tz_convert_single(int64_t val, object tz1, object tz2): +cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2): """ Convert the val (in i8) from timezone1 to timezone2 @@ -5006,6 +5011,7 @@ cdef inline int64_t _normalized_stamp(pandas_datetimestruct *dts) nogil: def dates_normalized(ndarray[int64_t] stamps, tz=None): cdef: Py_ssize_t i, n = len(stamps) + ndarray[int64_t] trans, deltas pandas_datetimestruct dts if tz is None or is_utc(tz):