Skip to content

Commit cf4eb8c

Browse files
committed
BUG: segfault manifesting with dateutil=2.6 w.r.t. replace when timezones are present
closes pandas-dev#14621
1 parent 2e276fb commit cf4eb8c

File tree

4 files changed

+127
-20
lines changed

4 files changed

+127
-20
lines changed

ci/requirements-3.5_OSX.pip

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
python-dateutil>=2.5.0
1+
python-dateutil==2.5.3

pandas/tseries/tests/test_offsets.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -4890,7 +4890,7 @@ def _make_timestamp(self, string, hrs_offset, tz):
48904890
return Timestamp(string + offset_string).tz_convert(tz)
48914891

48924892
def test_fallback_plural(self):
4893-
"""test moving from daylight savings to standard time"""
4893+
# test moving from daylight savings to standard time
48944894
for tz, utc_offsets in self.timezone_utc_offsets.items():
48954895
hrs_pre = utc_offsets['utc_offset_daylight']
48964896
hrs_post = utc_offsets['utc_offset_standard']
@@ -4900,7 +4900,7 @@ def test_fallback_plural(self):
49004900
expected_utc_offset=hrs_post)
49014901

49024902
def test_springforward_plural(self):
4903-
"""test moving from standard to daylight savings"""
4903+
# test moving from standard to daylight savings
49044904
for tz, utc_offsets in self.timezone_utc_offsets.items():
49054905
hrs_pre = utc_offsets['utc_offset_standard']
49064906
hrs_post = utc_offsets['utc_offset_daylight']

pandas/tseries/tests/test_timezones.py

+44-2
Original file line numberDiff line numberDiff line change
@@ -518,8 +518,10 @@ def f():
518518

519519
times = date_range("2013-10-26 23:00", "2013-10-27 01:00", freq="H",
520520
tz=tz, ambiguous='infer')
521-
self.assertEqual(times[0], Timestamp('2013-10-26 23:00', tz=tz))
522-
self.assertEqual(times[-1], Timestamp('2013-10-27 01:00', tz=tz))
521+
self.assertEqual(times[0], Timestamp('2013-10-26 23:00', tz=tz,
522+
freq="H"))
523+
self.assertEqual(times[-1], Timestamp('2013-10-27 01:00', tz=tz,
524+
freq="H"))
523525

524526
def test_ambiguous_nat(self):
525527
tz = self.tz('US/Eastern')
@@ -1163,6 +1165,46 @@ class TestTimeZones(tm.TestCase):
11631165
def setUp(self):
11641166
tm._skip_if_no_pytz()
11651167

1168+
def test_replace(self):
1169+
# GH 14621
1170+
# GH 7825
1171+
# replacing datetime components with and w/o presence of a timezone
1172+
dt = Timestamp('2016-01-01 09:00:00')
1173+
result = dt.replace(hour=0)
1174+
expected = Timestamp('2016-01-01 00:00:00')
1175+
self.assertEqual(result, expected)
1176+
1177+
for tz in self.timezones:
1178+
dt = Timestamp('2016-01-01 09:00:00', tz=tz)
1179+
result = dt.replace(hour=0)
1180+
expected = Timestamp('2016-01-01 00:00:00', tz=tz)
1181+
self.assertEqual(result, expected)
1182+
1183+
# we preserve nanoseconds
1184+
dt = Timestamp('2016-01-01 09:00:00.000000123', tz=tz)
1185+
result = dt.replace(hour=0)
1186+
expected = Timestamp('2016-01-01 00:00:00.000000123', tz=tz)
1187+
self.assertEqual(result, expected)
1188+
1189+
# test all
1190+
dt = Timestamp('2016-01-01 09:00:00.000000123', tz=tz)
1191+
result = dt.replace(year=2015, month=2, day=2, hour=0, minute=5, second=5, microsecond=5, nanosecond=5)
1192+
expected = Timestamp('2015-02-02 00:05:05.000005005', tz=tz)
1193+
self.assertEqual(result, expected)
1194+
1195+
# error
1196+
def f():
1197+
dt.replace(foo=5)
1198+
self.assertRaises(ValueError, f)
1199+
1200+
def f():
1201+
dt.replace(hour=0.1)
1202+
self.assertRaises(ValueError, f)
1203+
1204+
# assert conversion to naive is the same as replacing tzinfo with None
1205+
dt = Timestamp('2013-11-03 01:59:59.999999-0400', tz='US/Eastern')
1206+
self.assertEqual(dt.tz_localize(None), dt.replace(tzinfo=None))
1207+
11661208
def test_index_equals_with_tz(self):
11671209
left = date_range('1/1/2011', periods=100, freq='H', tz='utc')
11681210
right = date_range('1/1/2011', periods=100, freq='H', tz='US/Eastern')

pandas/tslib.pyx

+80-15
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ except NameError: # py3
9898
cdef inline object create_timestamp_from_ts(
9999
int64_t value, pandas_datetimestruct dts,
100100
object tz, object freq):
101+
""" convenience routine to construct a Timestamp from its parts """
101102
cdef _Timestamp ts_base
102103
ts_base = _Timestamp.__new__(Timestamp, dts.year, dts.month,
103104
dts.day, dts.hour, dts.min,
@@ -112,6 +113,7 @@ cdef inline object create_timestamp_from_ts(
112113
cdef inline object create_datetime_from_ts(
113114
int64_t value, pandas_datetimestruct dts,
114115
object tz, object freq):
116+
""" convenience routine to construct a datetime.datetime from its parts """
115117
return datetime(dts.year, dts.month, dts.day, dts.hour,
116118
dts.min, dts.sec, dts.us, tz)
117119

@@ -378,7 +380,6 @@ class Timestamp(_Timestamp):
378380
# Mixing pydatetime positional and keyword arguments is forbidden!
379381

380382
cdef _TSObject ts
381-
cdef _Timestamp ts_base
382383

383384
if offset is not None:
384385
# deprecate offset kwd in 0.19.0, GH13593
@@ -412,17 +413,7 @@ class Timestamp(_Timestamp):
412413
from pandas.tseries.frequencies import to_offset
413414
freq = to_offset(freq)
414415

415-
# make datetime happy
416-
ts_base = _Timestamp.__new__(cls, ts.dts.year, ts.dts.month,
417-
ts.dts.day, ts.dts.hour, ts.dts.min,
418-
ts.dts.sec, ts.dts.us, ts.tzinfo)
419-
420-
# fill out rest of data
421-
ts_base.value = ts.value
422-
ts_base.freq = freq
423-
ts_base.nanosecond = ts.dts.ps / 1000
424-
425-
return ts_base
416+
return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq)
426417

427418
def _round(self, freq, rounder):
428419

@@ -660,8 +651,80 @@ class Timestamp(_Timestamp):
660651
astimezone = tz_convert
661652

662653
def replace(self, **kwds):
663-
return Timestamp(datetime.replace(self, **kwds),
664-
freq=self.freq)
654+
"""
655+
implements datetime.replace, handles nanoseconds
656+
657+
Parameters
658+
----------
659+
kwargs: key-value dict
660+
661+
accepted keywords are:
662+
year, month, day, hour, minute, second, microsecond, nanosecond, tzinfo
663+
664+
values must be integer, or for tzinfo, a tz-convertible
665+
666+
Returns
667+
-------
668+
Timestamp with fields replaced
669+
"""
670+
671+
cdef:
672+
pandas_datetimestruct dts
673+
int64_t value
674+
object tzinfo, result, k, v
675+
_TSObject ts
676+
677+
# set to naive if needed
678+
tzinfo = self.tzinfo
679+
value = self.value
680+
if tzinfo is not None:
681+
value = tz_convert_single(value, 'UTC', tzinfo)
682+
683+
# setup components
684+
pandas_datetime_to_datetimestruct(value, PANDAS_FR_ns, &dts)
685+
dts.ps = self.nanosecond * 1000
686+
687+
# replace
688+
def validate(k, v):
689+
""" validate integers """
690+
if not isinstance(v, int):
691+
raise ValueError("value must be an integer, received {v} for {k}".format(v=type(v), k=k))
692+
return int(v)
693+
694+
for k, v in kwds.items():
695+
if k == 'year':
696+
dts.year = validate(k, v)
697+
elif k == 'month':
698+
dts.month = validate(k, v)
699+
elif k == 'day':
700+
dts.day = validate(k, v)
701+
elif k == 'hour':
702+
dts.hour = validate(k, v)
703+
elif k == 'minute':
704+
dts.min = validate(k, v)
705+
elif k == 'second':
706+
dts.sec = validate(k, v)
707+
elif k == 'microsecond':
708+
dts.us = validate(k, v)
709+
elif k == 'nanosecond':
710+
dts.ps = validate(k, v) * 1000
711+
elif k == 'tzinfo':
712+
tzinfo = v
713+
else:
714+
raise ValueError("invalid name {} passed".format(k))
715+
716+
# reconstruct & check bounds
717+
value = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
718+
if value != NPY_NAT:
719+
_check_dts_bounds(&dts)
720+
721+
# set tz if needed
722+
if tzinfo is not None:
723+
value = tz_convert_single(value, tzinfo, 'UTC')
724+
725+
result = create_timestamp_from_ts(value, dts, tzinfo, self.freq)
726+
727+
return result
665728

666729
def isoformat(self, sep='T'):
667730
base = super(_Timestamp, self).isoformat(sep=sep)
@@ -5041,7 +5104,9 @@ cpdef normalize_date(object dt):
50415104
-------
50425105
normalized : datetime.datetime or Timestamp
50435106
"""
5044-
if PyDateTime_Check(dt):
5107+
if is_timestamp(dt):
5108+
return dt.replace(hour=0, minute=0, second=0, microsecond=0, nanosecond=0)
5109+
elif PyDateTime_Check(dt):
50455110
return dt.replace(hour=0, minute=0, second=0, microsecond=0)
50465111
elif PyDate_Check(dt):
50475112
return datetime(dt.year, dt.month, dt.day)

0 commit comments

Comments
 (0)