-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: Avoid AmbiguousTimeError on groupby #14683
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -439,7 +439,7 @@ def _generate(cls, start, end, periods, name, offset, | |
tz = tz.localize(date.replace(tzinfo=None)).tzinfo | ||
|
||
if tz is not None and inferred_tz is not None: | ||
if not inferred_tz == tz: | ||
if not tslib.get_timezone(inferred_tz) == tslib.get_timezone(tz): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm, this should not be necessary; str comparison already does this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I get an exception if I don't... apparently the timezone with and without dst are not the same thing:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm, can you repro that directly via a DatetimeIndex construction call? does it break anything else? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, I've traced the issue to the tz of input index: I do: df1 = DataFrame([1477786980, 1477790580], columns=['ts'])
dti1 = DatetimeIndex(pd.to_datetime(df1.ts, unit='s').dt.tz_localize('UTC').dt.tz_convert('Europe/Madrid')) Now, this dti1 index has a funny tz attribute: That probably means I don't need this change... but the fix is needed elsewhere. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, my bad.... To reproduce: t1=Timestamp(2016,10,30,2,23).tz_localize('Europe/Madrid',ambiguous=True)
t2=Timestamp(2016,10,30,2,13).tz_localize('Europe/Madrid',ambiguous=False)
dti=DatetimeIndex(start=t1,end=t2,freq='H',tz='Europe/Madrid') I've also found that the same check is done in pandas.tseries.tools.py: def _infer_tzinfo(start, end):
def _infer(a, b):
tz = a.tzinfo
if b and b.tzinfo:
if not (tslib.get_timezone(tz) == tslib.get_timezone(b.tzinfo)):
raise AssertionError('Inputs must both have the same timezone,'
' {0} != {1}'.format(tz, b.tzinfo))
return tz
tz = None
if start is not None:
tz = _infer(start, end)
elif end is not None:
tz = _infer(end, start)
return tz I'll submit now the changes and see if this time they are ok. |
||
raise AssertionError("Inferred time zone not equal to passed " | ||
"time zone") | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1283,9 +1283,18 @@ def _adjust_dates_anchored(first, last, offset, closed='right', base=0): | |
# | ||
# See https://github.com/pandas-dev/pandas/issues/8683 | ||
|
||
# 14682 - Since we need to drop the TZ information to perform | ||
# the adjustment in the presence of a DST change, | ||
# save TZ Info and the DST state of the first and last parameters | ||
# so that we can accurately rebuild them at the end. | ||
first_tzinfo = first.tzinfo | ||
last_tzinfo = last.tzinfo | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. put the issue number as a comment and a brief expl. future readers will appreciate! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will |
||
first_dst = bool(first.dst()) | ||
last_dst = bool(last.dst()) | ||
|
||
first = first.tz_localize(None) | ||
last = last.tz_localize(None) | ||
|
||
start_day_nanos = first.normalize().value | ||
|
||
base_nanos = (base % offset.n) * offset.nanos // offset.n | ||
|
@@ -1320,11 +1329,8 @@ def _adjust_dates_anchored(first, last, offset, closed='right', base=0): | |
else: | ||
lresult = last.value + offset.nanos | ||
|
||
# return (Timestamp(fresult, tz=first.tz), | ||
# Timestamp(lresult, tz=last.tz)) | ||
|
||
return (Timestamp(fresult).tz_localize(first_tzinfo), | ||
Timestamp(lresult).tz_localize(first_tzinfo)) | ||
return (Timestamp(fresult).tz_localize(first_tzinfo, ambiguous=first_dst), | ||
Timestamp(lresult).tz_localize(last_tzinfo, ambiguous=last_dst)) | ||
|
||
|
||
def asfreq(obj, freq, method=None, how=None, normalize=False): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1912,7 +1912,31 @@ def test_resample_size(self): | |
right = Series(val, index=ix) | ||
assert_series_equal(left, right) | ||
|
||
def test_resmaple_dst_anchor(self): | ||
def test_resample_across_dst(self): | ||
# The test resamples a DatetimeIndex with values before and after a | ||
# DST change | ||
# Issue: 14682 | ||
|
||
# The DatetimeIndex we will start with | ||
# (note that DST happens at 03:00+02:00 -> 02:00+01:00) | ||
# 2016-10-30 02:23:00+02:00, 2016-10-30 02:23:00+01:00 | ||
df1 = DataFrame([1477786980, 1477790580], columns=['ts']) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. give a 1-liner about what you are testing here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One question... where's the whatsnew file I need to add the description to? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is a bug, so you can put in 0.19.2 |
||
dti1 = DatetimeIndex(pd.to_datetime(df1.ts, unit='s') | ||
.dt.tz_localize('UTC') | ||
.dt.tz_convert('Europe/Madrid')) | ||
|
||
# The expected DatetimeIndex after resampling. | ||
# 2016-10-30 02:00:00+02:00, 2016-10-30 02:00:00+01:00 | ||
df2 = DataFrame([1477785600, 1477789200], columns=['ts']) | ||
dti2 = DatetimeIndex(pd.to_datetime(df2.ts, unit='s') | ||
.dt.tz_localize('UTC') | ||
.dt.tz_convert('Europe/Madrid')) | ||
df = DataFrame([5, 5], index=dti1) | ||
assert_frame_equal( | ||
df.resample(rule='H').sum(), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok |
||
DataFrame([5, 5], index=dti2)) | ||
|
||
def test_resample_dst_anchor(self): | ||
# 5172 | ||
dti = DatetimeIndex([datetime(2012, 11, 4, 23)], tz='US/Eastern') | ||
df = DataFrame([5], index=dti) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Bug in
Ambiguous....
(use double-backticks around exception types andDatetimeIndex
)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll change