-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
BUG: DateOffset weekday around DST produces unexpected results. fixes #5175 #5292
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3104,6 +3104,134 @@ def test_str_for_named_is_name(self): | |
self.assertEqual(str(offset), name) | ||
|
||
|
||
def get_utc_offset_hours(ts): | ||
# take a Timestamp and compute total hours of utc offset | ||
o = ts.utcoffset() | ||
return (o.days * 24 * 3600 + o.seconds) / 3600.0 | ||
|
||
|
||
class TestDST(tm.TestCase): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jreback - want to have a look at these test cases? Went with a limited set of all of the possible There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should be a sub-class of e.g.
|
||
""" | ||
test DateOffset additions over Daylight Savings Time | ||
""" | ||
# one microsecond before the DST transition | ||
ts_pre_fallback = "2013-11-03 01:59:59.999999" | ||
ts_pre_springfwd = "2013-03-10 01:59:59.999999" | ||
|
||
# test both basic names and dateutil timezones | ||
timezone_utc_offsets = { | ||
'US/Eastern': dict( | ||
utc_offset_daylight=-4, | ||
utc_offset_standard=-5, | ||
), | ||
'dateutil/US/Pacific': dict( | ||
utc_offset_daylight=-7, | ||
utc_offset_standard=-8, | ||
) | ||
} | ||
valid_date_offsets_singular = [ | ||
'weekday', 'day', 'hour', 'minute', 'second', 'microsecond' | ||
] | ||
valid_date_offsets_plural = [ | ||
'weeks', 'days', | ||
'hours', 'minutes', 'seconds', | ||
'milliseconds', 'microseconds' | ||
] | ||
|
||
def _test_all_offsets(self, n, **kwds): | ||
valid_offsets = self.valid_date_offsets_plural if n > 1 \ | ||
else self.valid_date_offsets_singular | ||
|
||
for name in valid_offsets: | ||
self._test_offset(offset_name=name, offset_n=n, **kwds) | ||
|
||
def _test_offset(self, offset_name, offset_n, tstart, expected_utc_offset): | ||
offset = DateOffset(**{offset_name: offset_n}) | ||
t = tstart + offset | ||
if expected_utc_offset is not None: | ||
self.assertTrue(get_utc_offset_hours(t) == expected_utc_offset) | ||
|
||
if offset_name == 'weeks': | ||
# dates should match | ||
self.assertTrue( | ||
t.date() == | ||
timedelta(days=7 * offset.kwds['weeks']) + tstart.date() | ||
) | ||
# expect the same day of week, hour of day, minute, second, ... | ||
self.assertTrue( | ||
t.dayofweek == tstart.dayofweek and | ||
t.hour == tstart.hour and | ||
t.minute == tstart.minute and | ||
t.second == tstart.second | ||
) | ||
elif offset_name == 'days': | ||
# dates should match | ||
self.assertTrue(timedelta(offset.kwds['days']) + tstart.date() == t.date()) | ||
# expect the same hour of day, minute, second, ... | ||
self.assertTrue( | ||
t.hour == tstart.hour and | ||
t.minute == tstart.minute and | ||
t.second == tstart.second | ||
) | ||
elif offset_name in self.valid_date_offsets_singular: | ||
# expect the signular offset value to match between tstart and t | ||
datepart_offset = getattr(t, offset_name if offset_name != 'weekday' else 'dayofweek') | ||
self.assertTrue(datepart_offset == offset.kwds[offset_name]) | ||
else: | ||
# the offset should be the same as if it was done in UTC | ||
self.assertTrue( | ||
t == (tstart.tz_convert('UTC') + offset).tz_convert('US/Pacific') | ||
) | ||
|
||
def _make_timestamp(self, string, hrs_offset, tz): | ||
offset_string = '{hrs:02d}00'.format(hrs=hrs_offset) if hrs_offset >= 0 else \ | ||
'-{hrs:02d}00'.format(hrs=-1 * hrs_offset) | ||
return Timestamp(string + offset_string).tz_convert(tz) | ||
|
||
def test_fallback_plural(self): | ||
"""test moving from daylight savings to standard time""" | ||
for tz, utc_offsets in self.timezone_utc_offsets.items(): | ||
hrs_pre = utc_offsets['utc_offset_daylight'] | ||
hrs_post = utc_offsets['utc_offset_standard'] | ||
self._test_all_offsets( | ||
n=3, | ||
tstart=self._make_timestamp(self.ts_pre_fallback, hrs_pre, tz), | ||
expected_utc_offset=hrs_post | ||
) | ||
|
||
def test_springforward_plural(self): | ||
"""test moving from standard to daylight savings""" | ||
for tz, utc_offsets in self.timezone_utc_offsets.items(): | ||
hrs_pre = utc_offsets['utc_offset_standard'] | ||
hrs_post = utc_offsets['utc_offset_daylight'] | ||
self._test_all_offsets( | ||
n=3, | ||
tstart=self._make_timestamp(self.ts_pre_springfwd, hrs_pre, tz), | ||
expected_utc_offset=hrs_post | ||
) | ||
|
||
def test_fallback_singular(self): | ||
# in the case of signular offsets, we dont neccesarily know which utc offset | ||
# the new Timestamp will wind up in (the tz for 1 month may be different from 1 second) | ||
# so we don't specify an expected_utc_offset | ||
for tz, utc_offsets in self.timezone_utc_offsets.items(): | ||
hrs_pre = utc_offsets['utc_offset_standard'] | ||
self._test_all_offsets( | ||
n=1, | ||
tstart=self._make_timestamp(self.ts_pre_fallback, hrs_pre, tz), | ||
expected_utc_offset=None | ||
) | ||
|
||
def test_springforward_singular(self): | ||
for tz, utc_offsets in self.timezone_utc_offsets.items(): | ||
hrs_pre = utc_offsets['utc_offset_standard'] | ||
self._test_all_offsets( | ||
n=1, | ||
tstart=self._make_timestamp(self.ts_pre_springfwd, hrs_pre, tz), | ||
expected_utc_offset=None | ||
) | ||
|
||
|
||
if __name__ == '__main__': | ||
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], | ||
exit=False) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it seems that you would use relativedelta for everything (except nanoseconds)? is that right? how does this solve the problem?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
relativedelta
is used for all singluar offsets and plural offsets greater than daily frequency. With this change these calculations are done in UTC.Also with this change,
timedelta
is now used for all plural offsets (hours, minutes, seconds) under daily frequency.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
so why are
minutes,hours,seconds,microseconds
here?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
mintue, hour, second, ... -- singular not plural. Because you can do:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why wouldn't you do ALL calculations then using relativedelta? why is timedelta not good?
shouldn't you ALWAYS do them in UTC (e.g. convert to/from utc if their is a timezone, then back)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why don't you just tranlsate the plurals then, isn't it simpler?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
plurals and singular are not the same -- plurals behave how you would expect, but singulars are used for fixing the time part:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this seems like a bug to me, eg.
t + pd.offsets.Hour(9)
==t + pd.DateOffset(hours=9) == t + timedelta(hours=9)
(and the first is prob how most users do this). Are their tests for the former anywhere? can you point them out.Is this documented anywhere?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I couldn't find any tests or documentation for the singular form anywhere in pandas. However, I think this behavior is inherited from
dateutil.relativedelta
, which mentions it in passing in the documentation:There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can u put that link on the linked issue #7418