Skip to content

Commit 3f95042

Browse files
committed
BUG: segfault manifesting with dateutil=2.6 w.r.t. replace when timezones are present
closes #14621
1 parent 46000da commit 3f95042

File tree

7 files changed

+188
-27
lines changed

7 files changed

+188
-27
lines changed

ci/requirements-3.5_OSX.pip

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
python-dateutil>=2.5.0
1+
python-dateutil==2.5.3

doc/source/whatsnew/v0.19.2.txt

+3
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,6 @@ Performance Improvements
2222

2323
Bug Fixes
2424
~~~~~~~~~
25+
26+
- compat with ``dateutil==2.6.0`` for testing (:issue:`14621`)
27+
- allow ``nanoseconds`` in ``Timestamp.replace`` kwargs (:issue:`14621`)

pandas/tseries/offsets.py

+1
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ def wrapper(self, other):
6868
other = other.tz_localize(None)
6969

7070
result = func(self, other)
71+
7172
if self._adjust_dst:
7273
result = tslib._localize_pydatetime(result, tz)
7374

pandas/tseries/tests/test_offsets.py

+14-6
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
from distutils.version import LooseVersion
23
from datetime import date, datetime, timedelta
34
from dateutil.relativedelta import relativedelta
45
from pandas.compat import range, iteritems
@@ -4851,6 +4852,7 @@ def _test_all_offsets(self, n, **kwds):
48514852

48524853
def _test_offset(self, offset_name, offset_n, tstart, expected_utc_offset):
48534854
offset = DateOffset(**{offset_name: offset_n})
4855+
48544856
t = tstart + offset
48554857
if expected_utc_offset is not None:
48564858
self.assertTrue(get_utc_offset_hours(t) == expected_utc_offset)
@@ -4890,17 +4892,23 @@ def _make_timestamp(self, string, hrs_offset, tz):
48904892
return Timestamp(string + offset_string).tz_convert(tz)
48914893

48924894
def test_fallback_plural(self):
4893-
"""test moving from daylight savings to standard time"""
4895+
# test moving from daylight savings to standard time
4896+
import dateutil
48944897
for tz, utc_offsets in self.timezone_utc_offsets.items():
48954898
hrs_pre = utc_offsets['utc_offset_daylight']
48964899
hrs_post = utc_offsets['utc_offset_standard']
4897-
self._test_all_offsets(
4898-
n=3, tstart=self._make_timestamp(self.ts_pre_fallback,
4899-
hrs_pre, tz),
4900-
expected_utc_offset=hrs_post)
4900+
4901+
if dateutil.__version__ != LooseVersion('2.6.0'):
4902+
# buggy ambiguous behavior in 2.6.0
4903+
# GH 14621
4904+
# https://github.com/dateutil/dateutil/issues/321
4905+
self._test_all_offsets(
4906+
n=3, tstart=self._make_timestamp(self.ts_pre_fallback,
4907+
hrs_pre, tz),
4908+
expected_utc_offset=hrs_post)
49014909

49024910
def test_springforward_plural(self):
4903-
"""test moving from standard to daylight savings"""
4911+
# test moving from standard to daylight savings
49044912
for tz, utc_offsets in self.timezone_utc_offsets.items():
49054913
hrs_pre = utc_offsets['utc_offset_standard']
49064914
hrs_post = utc_offsets['utc_offset_daylight']

pandas/tseries/tests/test_timezones.py

+86-3
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import numpy as np
66
import pytz
7-
7+
from distutils.version import LooseVersion
88
from pandas.types.dtypes import DatetimeTZDtype
99
from pandas import (Index, Series, DataFrame, isnull, Timestamp)
1010

@@ -518,8 +518,12 @@ def f():
518518

519519
times = date_range("2013-10-26 23:00", "2013-10-27 01:00", freq="H",
520520
tz=tz, ambiguous='infer')
521-
self.assertEqual(times[0], Timestamp('2013-10-26 23:00', tz=tz))
522-
self.assertEqual(times[-1], Timestamp('2013-10-27 01:00', tz=tz))
521+
self.assertEqual(times[0], Timestamp('2013-10-26 23:00', tz=tz,
522+
freq="H"))
523+
if dateutil.__version__ != LooseVersion('2.6.0'):
524+
# GH 14621
525+
self.assertEqual(times[-1], Timestamp('2013-10-27 01:00', tz=tz,
526+
freq="H"))
523527

524528
def test_ambiguous_nat(self):
525529
tz = self.tz('US/Eastern')
@@ -1163,6 +1167,85 @@ class TestTimeZones(tm.TestCase):
11631167
def setUp(self):
11641168
tm._skip_if_no_pytz()
11651169

1170+
def test_replace(self):
1171+
# GH 14621
1172+
# GH 7825
1173+
# replacing datetime components with and w/o presence of a timezone
1174+
dt = Timestamp('2016-01-01 09:00:00')
1175+
result = dt.replace(hour=0)
1176+
expected = Timestamp('2016-01-01 00:00:00')
1177+
self.assertEqual(result, expected)
1178+
1179+
for tz in self.timezones:
1180+
dt = Timestamp('2016-01-01 09:00:00', tz=tz)
1181+
result = dt.replace(hour=0)
1182+
expected = Timestamp('2016-01-01 00:00:00', tz=tz)
1183+
self.assertEqual(result, expected)
1184+
1185+
# we preserve nanoseconds
1186+
dt = Timestamp('2016-01-01 09:00:00.000000123', tz=tz)
1187+
result = dt.replace(hour=0)
1188+
expected = Timestamp('2016-01-01 00:00:00.000000123', tz=tz)
1189+
self.assertEqual(result, expected)
1190+
1191+
# test all
1192+
dt = Timestamp('2016-01-01 09:00:00.000000123', tz=tz)
1193+
result = dt.replace(year=2015, month=2, day=2, hour=0, minute=5,
1194+
second=5, microsecond=5, nanosecond=5)
1195+
expected = Timestamp('2015-02-02 00:05:05.000005005', tz=tz)
1196+
self.assertEqual(result, expected)
1197+
1198+
# error
1199+
def f():
1200+
dt.replace(foo=5)
1201+
self.assertRaises(ValueError, f)
1202+
1203+
def f():
1204+
dt.replace(hour=0.1)
1205+
self.assertRaises(ValueError, f)
1206+
1207+
# assert conversion to naive is the same as replacing tzinfo with None
1208+
dt = Timestamp('2013-11-03 01:59:59.999999-0400', tz='US/Eastern')
1209+
self.assertEqual(dt.tz_localize(None), dt.replace(tzinfo=None))
1210+
1211+
def test_ambiguous_compat(self):
1212+
# validate that pytz and dateutil are compat for dst
1213+
# when the transition happens
1214+
tm._skip_if_no_dateutil()
1215+
tm._skip_if_no_pytz()
1216+
1217+
pytz_zone = 'Europe/London'
1218+
dateutil_zone = 'dateutil/Europe/London'
1219+
result_pytz = (Timestamp('2013-10-27 01:00:00')
1220+
.tz_localize(pytz_zone, ambiguous=0))
1221+
result_dateutil = (Timestamp('2013-10-27 01:00:00')
1222+
.tz_localize(dateutil_zone, ambiguous=0))
1223+
self.assertEqual(result_pytz.value, result_dateutil.value)
1224+
self.assertEqual(result_pytz.value, 1382835600000000000)
1225+
1226+
# dateutil 2.6 buggy w.r.t. ambiguous=0
1227+
if dateutil.__version__ != LooseVersion('2.6.0'):
1228+
# GH 14621
1229+
# https://github.com/dateutil/dateutil/issues/321
1230+
self.assertEqual(result_pytz.to_pydatetime().tzname(),
1231+
result_dateutil.to_pydatetime().tzname())
1232+
self.assertEqual(str(result_pytz), str(result_dateutil))
1233+
1234+
# 1 hour difference
1235+
result_pytz = (Timestamp('2013-10-27 01:00:00')
1236+
.tz_localize(pytz_zone, ambiguous=1))
1237+
result_dateutil = (Timestamp('2013-10-27 01:00:00')
1238+
.tz_localize(dateutil_zone, ambiguous=1))
1239+
self.assertEqual(result_pytz.value, result_dateutil.value)
1240+
self.assertEqual(result_pytz.value, 1382832000000000000)
1241+
1242+
# dateutil < 2.6 is buggy w.r.t. ambiguous timezones
1243+
if dateutil.__version__ > LooseVersion('2.5.3'):
1244+
# GH 14621
1245+
self.assertEqual(str(result_pytz), str(result_dateutil))
1246+
self.assertEqual(result_pytz.to_pydatetime().tzname(),
1247+
result_dateutil.to_pydatetime().tzname())
1248+
11661249
def test_index_equals_with_tz(self):
11671250
left = date_range('1/1/2011', periods=100, freq='H', tz='utc')
11681251
right = date_range('1/1/2011', periods=100, freq='H', tz='US/Eastern')

pandas/tseries/tests/test_tslib.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -327,8 +327,9 @@ def test_repr(self):
327327

328328
# dateutil zone change (only matters for repr)
329329
import dateutil
330-
if dateutil.__version__ >= LooseVersion(
331-
'2.3') and dateutil.__version__ <= LooseVersion('2.4.0'):
330+
if (dateutil.__version__ >= LooseVersion('2.3') and
331+
(dateutil.__version__ <= LooseVersion('2.4.0') or
332+
dateutil.__version__ >= LooseVersion('2.6.0'))):
332333
timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern',
333334
'dateutil/US/Pacific']
334335
else:

pandas/tslib.pyx

+80-15
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ except NameError: # py3
9898
cdef inline object create_timestamp_from_ts(
9999
int64_t value, pandas_datetimestruct dts,
100100
object tz, object freq):
101+
""" convenience routine to construct a Timestamp from its parts """
101102
cdef _Timestamp ts_base
102103
ts_base = _Timestamp.__new__(Timestamp, dts.year, dts.month,
103104
dts.day, dts.hour, dts.min,
@@ -112,6 +113,7 @@ cdef inline object create_timestamp_from_ts(
112113
cdef inline object create_datetime_from_ts(
113114
int64_t value, pandas_datetimestruct dts,
114115
object tz, object freq):
116+
""" convenience routine to construct a datetime.datetime from its parts """
115117
return datetime(dts.year, dts.month, dts.day, dts.hour,
116118
dts.min, dts.sec, dts.us, tz)
117119

@@ -378,7 +380,6 @@ class Timestamp(_Timestamp):
378380
# Mixing pydatetime positional and keyword arguments is forbidden!
379381

380382
cdef _TSObject ts
381-
cdef _Timestamp ts_base
382383

383384
if offset is not None:
384385
# deprecate offset kwd in 0.19.0, GH13593
@@ -412,17 +413,7 @@ class Timestamp(_Timestamp):
412413
from pandas.tseries.frequencies import to_offset
413414
freq = to_offset(freq)
414415

415-
# make datetime happy
416-
ts_base = _Timestamp.__new__(cls, ts.dts.year, ts.dts.month,
417-
ts.dts.day, ts.dts.hour, ts.dts.min,
418-
ts.dts.sec, ts.dts.us, ts.tzinfo)
419-
420-
# fill out rest of data
421-
ts_base.value = ts.value
422-
ts_base.freq = freq
423-
ts_base.nanosecond = ts.dts.ps / 1000
424-
425-
return ts_base
416+
return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq)
426417

427418
def _round(self, freq, rounder):
428419

@@ -660,8 +651,80 @@ class Timestamp(_Timestamp):
660651
astimezone = tz_convert
661652

662653
def replace(self, **kwds):
663-
return Timestamp(datetime.replace(self, **kwds),
664-
freq=self.freq)
654+
"""
655+
implements datetime.replace, handles nanoseconds
656+
657+
Parameters
658+
----------
659+
kwargs: key-value dict
660+
661+
accepted keywords are:
662+
year, month, day, hour, minute, second, microsecond, nanosecond, tzinfo
663+
664+
values must be integer, or for tzinfo, a tz-convertible
665+
666+
Returns
667+
-------
668+
Timestamp with fields replaced
669+
"""
670+
671+
cdef:
672+
pandas_datetimestruct dts
673+
int64_t value
674+
object tzinfo, result, k, v
675+
_TSObject ts
676+
677+
# set to naive if needed
678+
tzinfo = self.tzinfo
679+
value = self.value
680+
if tzinfo is not None:
681+
value = tz_convert_single(value, 'UTC', tzinfo)
682+
683+
# setup components
684+
pandas_datetime_to_datetimestruct(value, PANDAS_FR_ns, &dts)
685+
dts.ps = self.nanosecond * 1000
686+
687+
# replace
688+
def validate(k, v):
689+
""" validate integers """
690+
if not isinstance(v, int):
691+
raise ValueError("value must be an integer, received {v} for {k}".format(v=type(v), k=k))
692+
return v
693+
694+
for k, v in kwds.items():
695+
if k == 'year':
696+
dts.year = validate(k, v)
697+
elif k == 'month':
698+
dts.month = validate(k, v)
699+
elif k == 'day':
700+
dts.day = validate(k, v)
701+
elif k == 'hour':
702+
dts.hour = validate(k, v)
703+
elif k == 'minute':
704+
dts.min = validate(k, v)
705+
elif k == 'second':
706+
dts.sec = validate(k, v)
707+
elif k == 'microsecond':
708+
dts.us = validate(k, v)
709+
elif k == 'nanosecond':
710+
dts.ps = validate(k, v) * 1000
711+
elif k == 'tzinfo':
712+
tzinfo = v
713+
else:
714+
raise ValueError("invalid name {} passed".format(k))
715+
716+
# reconstruct & check bounds
717+
value = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
718+
if value != NPY_NAT:
719+
_check_dts_bounds(&dts)
720+
721+
# set tz if needed
722+
if tzinfo is not None:
723+
value = tz_convert_single(value, tzinfo, 'UTC')
724+
725+
result = create_timestamp_from_ts(value, dts, tzinfo, self.freq)
726+
727+
return result
665728

666729
def isoformat(self, sep='T'):
667730
base = super(_Timestamp, self).isoformat(sep=sep)
@@ -5041,7 +5104,9 @@ cpdef normalize_date(object dt):
50415104
-------
50425105
normalized : datetime.datetime or Timestamp
50435106
"""
5044-
if PyDateTime_Check(dt):
5107+
if is_timestamp(dt):
5108+
return dt.replace(hour=0, minute=0, second=0, microsecond=0, nanosecond=0)
5109+
elif PyDateTime_Check(dt):
50455110
return dt.replace(hour=0, minute=0, second=0, microsecond=0)
50465111
elif PyDate_Check(dt):
50475112
return datetime(dt.year, dt.month, dt.day)

0 commit comments

Comments
 (0)