-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: Merge timezone aware data with DST #22825
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
ce784e0
52602b9
cff2bca
cc5e9cf
da257a3
c9cc43a
9eca40e
254af4e
26b0600
f5c3083
e4491d8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -277,7 +277,7 @@ def _evaluate_compare(self, other, op): | |
except TypeError: | ||
return result | ||
|
||
def _ensure_localized(self, result, ambiguous='raise'): | ||
def _ensure_localized(self, result, ambiguous='raise', from_utc=False): | ||
""" | ||
ensure that we are re-localized | ||
|
||
|
@@ -289,6 +289,7 @@ def _ensure_localized(self, result, ambiguous='raise'): | |
result : DatetimeIndex / i8 ndarray | ||
ambiguous : str, bool, or bool-ndarray | ||
default 'raise' | ||
from_utc : bool | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add the default here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add what this actually means (and similarly for to_utc below). also maybe rename 'result' to array or input or something, I think these are holdover names and are confusing. |
||
|
||
Returns | ||
------- | ||
|
@@ -299,7 +300,10 @@ def _ensure_localized(self, result, ambiguous='raise'): | |
if getattr(self, 'tz', None) is not None: | ||
if not isinstance(result, ABCIndexClass): | ||
result = self._simple_new(result) | ||
result = result.tz_localize(self.tz, ambiguous=ambiguous) | ||
if from_utc: | ||
result = result.tz_localize('UTC').tz_convert(self.tz) | ||
else: | ||
result = result.tz_localize(self.tz, ambiguous=ambiguous) | ||
return result | ||
|
||
def _box_values_as_index(self): | ||
|
@@ -622,11 +626,11 @@ def repeat(self, repeats, *args, **kwargs): | |
|
||
@Appender(_index_shared_docs['where'] % _index_doc_kwargs) | ||
def where(self, cond, other=None): | ||
other = _ensure_datetimelike_to_i8(other) | ||
values = _ensure_datetimelike_to_i8(self) | ||
other = _ensure_datetimelike_to_i8(other, to_utc=True) | ||
values = _ensure_datetimelike_to_i8(self, to_utc=True) | ||
result = np.where(cond, values, other).astype('i8') | ||
|
||
result = self._ensure_localized(result) | ||
result = self._ensure_localized(result, from_utc=True) | ||
return self._shallow_copy(result, | ||
**self._get_attributes_dict()) | ||
|
||
|
@@ -695,14 +699,17 @@ def astype(self, dtype, copy=True): | |
return super(DatetimeIndexOpsMixin, self).astype(dtype, copy=copy) | ||
|
||
|
||
def _ensure_datetimelike_to_i8(other): | ||
def _ensure_datetimelike_to_i8(other, to_utc=False): | ||
""" helper for coercing an input scalar or array to i8 """ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a doc-string here |
||
if is_scalar(other) and isna(other): | ||
other = iNaT | ||
elif isinstance(other, ABCIndexClass): | ||
# convert tz if needed | ||
if getattr(other, 'tz', None) is not None: | ||
other = other.tz_localize(None).asi8 | ||
if to_utc: | ||
other = other.tz_convert('UTC').asi8 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you do the .asi8 lower? (and just do it on other) to avoid repeating code |
||
else: | ||
other = other.tz_localize(None).asi8 | ||
else: | ||
other = other.asi8 | ||
else: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -590,11 +590,9 @@ def test_where_series_datetime64(self, fill_val, exp_dtype): | |
pd.Timestamp('2011-01-03'), values[3]]) | ||
self._assert_where_conversion(obj, cond, values, exp, exp_dtype) | ||
|
||
@pytest.mark.parametrize("fill_val,exp_dtype", [ | ||
(pd.Timestamp('2012-01-01'), 'datetime64[ns]'), | ||
(pd.Timestamp('2012-01-01', tz='US/Eastern'), np.object)], | ||
ids=['datetime64', 'datetime64tz']) | ||
def test_where_index_datetime(self, fill_val, exp_dtype): | ||
def test_where_index_datetime(self): | ||
fill_val = pd.Timestamp('2012-01-01') | ||
exp_dtype = 'datetime64[ns]' | ||
obj = pd.Index([pd.Timestamp('2011-01-01'), | ||
pd.Timestamp('2011-01-02'), | ||
pd.Timestamp('2011-01-03'), | ||
|
@@ -613,13 +611,32 @@ def test_where_index_datetime(self, fill_val, exp_dtype): | |
pd.Timestamp('2011-01-03'), | ||
pd.Timestamp('2012-01-04')]) | ||
|
||
if fill_val.tz: | ||
self._assert_where_conversion(obj, cond, values, exp, | ||
'datetime64[ns]') | ||
pytest.xfail("ToDo: do not ignore timezone, must be object") | ||
self._assert_where_conversion(obj, cond, values, exp, exp_dtype) | ||
pytest.xfail("datetime64 + datetime64 -> datetime64 must support" | ||
" scalar") | ||
|
||
@pytest.mark.xfail(reason="ToDo: do not ignore timezone, must be object") | ||
def test_where_index_datetimetz(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test was bizarrely structured above. It was given the wrong arguments for everything to pass before it hit the xfail... Good news is that one of the cases is passing, so I am splitting the tz-aware and tz-naive case and properly xfailing the tz-aware case (which was already failing on master and is orthogonal to this change) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is there an issue for this case? if not can you create one and reference it here |
||
fill_val = pd.Timestamp('2012-01-01', tz='US/Eastern') | ||
exp_dtype = np.object | ||
obj = pd.Index([pd.Timestamp('2011-01-01'), | ||
pd.Timestamp('2011-01-02'), | ||
pd.Timestamp('2011-01-03'), | ||
pd.Timestamp('2011-01-04')]) | ||
assert obj.dtype == 'datetime64[ns]' | ||
cond = pd.Index([True, False, True, False]) | ||
|
||
msg = ("Index\\(\\.\\.\\.\\) must be called with a collection " | ||
"of some kind") | ||
with tm.assert_raises_regex(TypeError, msg): | ||
obj.where(cond, fill_val) | ||
|
||
values = pd.Index(pd.date_range(fill_val, periods=4)) | ||
exp = pd.Index([pd.Timestamp('2011-01-01'), | ||
pd.Timestamp('2012-01-02', tz='US/Eastern'), | ||
pd.Timestamp('2011-01-03'), | ||
pd.Timestamp('2012-01-04', tz='US/Eastern')], | ||
dtype=exp_dtype) | ||
|
||
self._assert_where_conversion(obj, cond, values, exp, exp_dtype) | ||
|
||
def test_where_index_complex128(self): | ||
pass | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -601,6 +601,30 @@ def test_merge_on_datetime64tz(self): | |
assert result['value_x'].dtype == 'datetime64[ns, US/Eastern]' | ||
assert result['value_y'].dtype == 'datetime64[ns, US/Eastern]' | ||
|
||
def test_merge_datetime64tz_with_dst_transition(self): | ||
# GH 18885 | ||
df1 = pd.DataFrame(pd.date_range( | ||
'2017-10-29 01:00', periods=4, freq='H', tz='Europe/Madrid'), | ||
columns=['date']) | ||
df1['value'] = 1 | ||
df2 = pd.DataFrame([ | ||
pd.to_datetime('2017-10-29 03:00:00'), | ||
pd.to_datetime('2017-10-29 04:00:00'), | ||
pd.to_datetime('2017-10-29 05:00:00') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you create this one as well with date_range to shorten it a bit? (and if not, I think it is more typical to pass a list of strings to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can maybe also directly include the 'value' column in the constructor |
||
], | ||
columns=['date']) | ||
df2['date'] = df2['date'].dt.tz_localize('UTC').dt.tz_convert( | ||
'Europe/Madrid') | ||
df2['value'] = 2 | ||
result = pd.merge(df1, df2, how='outer', on='date') | ||
expected = pd.DataFrame({ | ||
'date': pd.date_range( | ||
'2017-10-29 01:00', periods=7, freq='H', tz='Europe/Madrid'), | ||
'value_x': [1] * 4 + [np.nan] * 3, | ||
'value_y': [np.nan] * 4 + [2] * 3 | ||
}) | ||
assert_frame_equal(result, expected) | ||
|
||
def test_merge_non_unique_period_index(self): | ||
# GH #16871 | ||
index = pd.period_range('2016-01-01', periods=16, freq='M') | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you move to reshaping