Skip to content

BUG: in Timestamp.replace when replacing tzinfo around DST changes #17507

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 20, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions asv_bench/benchmarks/timestamp.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from .pandas_vb_common import *
from pandas import to_timedelta, Timestamp
import pytz
import datetime


class TimestampProperties(object):
Expand Down Expand Up @@ -58,3 +60,24 @@ def time_is_leap_year(self):

def time_microsecond(self):
self.ts.microsecond


class TimestampOps(object):
goal_time = 0.2

def setup(self):
self.ts = Timestamp('2017-08-25 08:16:14')
self.ts_tz = Timestamp('2017-08-25 08:16:14', tz='US/Eastern')

dt = datetime.datetime(2016, 3, 27, 1)
self.tzinfo = pytz.timezone('CET').localize(dt, is_dst=False).tzinfo
self.ts2 = Timestamp(dt)

def time_replace_tz(self):
self.ts.replace(tzinfo=pytz.timezone('US/Eastern'))

def time_replace_across_dst(self):
self.ts2.replace(tzinfo=self.tzinfo)

def time_replace_None(self):
self.ts_tz.replace(tzinfo=None)
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,7 @@ Conversion
- Bug in ``IntervalIndex.is_non_overlapping_monotonic`` when intervals are closed on both sides and overlap at a point (:issue:`16560`)
- Bug in :func:`Series.fillna` returns frame when ``inplace=True`` and ``value`` is dict (:issue:`16156`)
- Bug in :attr:`Timestamp.weekday_name` returning a UTC-based weekday name when localized to a timezone (:issue:`17354`)
- Bug in ``Timestamp.replace`` when replacing ``tzinfo`` around DST changes (:issue:`15683`)

Indexing
^^^^^^^^
Expand Down
48 changes: 27 additions & 21 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, box=False):

cdef:
Py_ssize_t i, n = len(arr)
ndarray[int64_t] trans, deltas
pandas_datetimestruct dts
object dt
int64_t value
Expand Down Expand Up @@ -417,8 +418,9 @@ class Timestamp(_Timestamp):

def _round(self, freq, rounder):

cdef int64_t unit
cdef object result, value
cdef:
int64_t unit, r, value, buff = 1000000
object result

from pandas.tseries.frequencies import to_offset
unit = to_offset(freq).nanos
Expand All @@ -429,16 +431,15 @@ class Timestamp(_Timestamp):
if unit < 1000 and unit % 1000 != 0:
# for nano rounding, work with the last 6 digits separately
# due to float precision
buff = 1000000
result = (buff * (value // buff) + unit *
(rounder((value % buff) / float(unit))).astype('i8'))
r = (buff * (value // buff) + unit *
(rounder((value % buff) / float(unit))).astype('i8'))
elif unit >= 1000 and unit % 1000 != 0:
msg = 'Precision will be lost using frequency: {}'
warnings.warn(msg.format(freq))
result = (unit * rounder(value / float(unit)).astype('i8'))
r = (unit * rounder(value / float(unit)).astype('i8'))
else:
result = (unit * rounder(value / float(unit)).astype('i8'))
result = Timestamp(result, unit='ns')
r = (unit * rounder(value / float(unit)).astype('i8'))
result = Timestamp(r, unit='ns')
if self.tz is not None:
result = result.tz_localize(self.tz)
return result
Expand Down Expand Up @@ -683,14 +684,16 @@ class Timestamp(_Timestamp):

cdef:
pandas_datetimestruct dts
int64_t value
int64_t value, value_tz, offset
object _tzinfo, result, k, v
datetime ts_input

# set to naive if needed
_tzinfo = self.tzinfo
value = self.value
if _tzinfo is not None:
value = tz_convert_single(value, 'UTC', _tzinfo)
value_tz = tz_convert_single(value, _tzinfo, 'UTC')
value += value - value_tz

# setup components
pandas_datetime_to_datetimestruct(value, PANDAS_FR_ns, &dts)
Expand Down Expand Up @@ -724,16 +727,14 @@ class Timestamp(_Timestamp):
_tzinfo = tzinfo

# reconstruct & check bounds
value = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
ts_input = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min,
dts.sec, dts.us, tzinfo=_tzinfo)
ts = convert_to_tsobject(ts_input, _tzinfo, None, 0, 0)
value = ts.value + (dts.ps // 1000)
if value != NPY_NAT:
_check_dts_bounds(&dts)

# set tz if needed
if _tzinfo is not None:
value = tz_convert_single(value, _tzinfo, 'UTC')

result = create_timestamp_from_ts(value, dts, _tzinfo, self.freq)
return result
return create_timestamp_from_ts(value, dts, _tzinfo, self.freq)

def isoformat(self, sep='T'):
base = super(_Timestamp, self).isoformat(sep=sep)
Expand Down Expand Up @@ -1175,7 +1176,7 @@ cdef class _Timestamp(datetime):
return np.datetime64(self.value, 'ns')

def __add__(self, other):
cdef int64_t other_int
cdef int64_t other_int, nanos

if is_timedelta64_object(other):
other_int = other.astype('timedelta64[ns]').view('i8')
Expand Down Expand Up @@ -1625,6 +1626,10 @@ cdef inline void _localize_tso(_TSObject obj, object tz):
"""
Take a TSObject in UTC and localizes to timezone tz.
"""
cdef:
ndarray[int64_t] trans, deltas
Py_ssize_t delta, posn

if is_utc(tz):
obj.tzinfo = tz
elif is_tzlocal(tz):
Expand Down Expand Up @@ -1676,7 +1681,7 @@ cdef inline void _localize_tso(_TSObject obj, object tz):
obj.tzinfo = tz


def _localize_pydatetime(object dt, object tz):
cpdef inline object _localize_pydatetime(object dt, object tz):
"""
Take a datetime/Timestamp in UTC and localizes to timezone tz.
"""
Expand Down Expand Up @@ -3892,7 +3897,7 @@ for _maybe_method_name in dir(NaTType):
# Conversion routines


def _delta_to_nanoseconds(delta):
cpdef int64_t _delta_to_nanoseconds(delta):
if isinstance(delta, np.ndarray):
return delta.astype('m8[ns]').astype('int64')
if hasattr(delta, 'nanos'):
Expand Down Expand Up @@ -4137,7 +4142,7 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
return result


def tz_convert_single(int64_t val, object tz1, object tz2):
cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2):
"""
Convert the val (in i8) from timezone1 to timezone2

Expand Down Expand Up @@ -5006,6 +5011,7 @@ cdef inline int64_t _normalized_stamp(pandas_datetimestruct *dts) nogil:
def dates_normalized(ndarray[int64_t] stamps, tz=None):
cdef:
Py_ssize_t i, n = len(stamps)
ndarray[int64_t] trans, deltas
pandas_datetimestruct dts

if tz is None or is_utc(tz):
Expand Down
21 changes: 21 additions & 0 deletions pandas/tests/tseries/test_timezones.py
Original file line number Diff line number Diff line change
Expand Up @@ -1269,6 +1269,27 @@ def test_ambiguous_compat(self):
assert (result_pytz.to_pydatetime().tzname() ==
result_dateutil.to_pydatetime().tzname())

def test_replace_tzinfo(self):
# GH 15683
dt = datetime(2016, 3, 27, 1)
tzinfo = pytz.timezone('CET').localize(dt, is_dst=False).tzinfo

result_dt = dt.replace(tzinfo=tzinfo)
result_pd = Timestamp(dt).replace(tzinfo=tzinfo)

if hasattr(result_dt, 'timestamp'): # New method in Py 3.3
assert result_dt.timestamp() == result_pd.timestamp()
assert result_dt == result_pd
assert result_dt == result_pd.to_pydatetime()

result_dt = dt.replace(tzinfo=tzinfo).replace(tzinfo=None)
result_pd = Timestamp(dt).replace(tzinfo=tzinfo).replace(tzinfo=None)

if hasattr(result_dt, 'timestamp'): # New method in Py 3.3
assert result_dt.timestamp() == result_pd.timestamp()
assert result_dt == result_pd
assert result_dt == result_pd.to_pydatetime()

def test_index_equals_with_tz(self):
left = date_range('1/1/2011', periods=100, freq='H', tz='utc')
right = date_range('1/1/2011', periods=100, freq='H', tz='US/Eastern')
Expand Down