-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: Merge timezone aware data with DST #22825
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
ce784e0
52602b9
cff2bca
cc5e9cf
da257a3
c9cc43a
9eca40e
254af4e
26b0600
f5c3083
e4491d8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -277,7 +277,7 @@ def _evaluate_compare(self, other, op): | |
except TypeError: | ||
return result | ||
|
||
def _ensure_localized(self, result, ambiguous='raise'): | ||
def _ensure_localized(self, arg, ambiguous='raise', from_utc=False): | ||
""" | ||
ensure that we are re-localized | ||
|
||
|
@@ -286,9 +286,13 @@ def _ensure_localized(self, result, ambiguous='raise'): | |
|
||
Parameters | ||
---------- | ||
result : DatetimeIndex / i8 ndarray | ||
arg : DatetimeIndex / i8 ndarray | ||
ambiguous : str, bool, or bool-ndarray | ||
default 'raise' | ||
from_utc : bool | ||
default False | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. default goes on the line before. |
||
If True, localize the i8 ndarray to UTC first before converting to | ||
the appropriate tz. If False, localize directly to the tz. | ||
|
||
Returns | ||
------- | ||
|
@@ -297,10 +301,13 @@ def _ensure_localized(self, result, ambiguous='raise'): | |
|
||
# reconvert to local tz | ||
if getattr(self, 'tz', None) is not None: | ||
if not isinstance(result, ABCIndexClass): | ||
result = self._simple_new(result) | ||
result = result.tz_localize(self.tz, ambiguous=ambiguous) | ||
return result | ||
if not isinstance(arg, ABCIndexClass): | ||
arg = self._simple_new(arg) | ||
if from_utc: | ||
arg = arg.tz_localize('UTC').tz_convert(self.tz) | ||
else: | ||
arg = arg.tz_localize(self.tz, ambiguous=ambiguous) | ||
return arg | ||
|
||
def _box_values_as_index(self): | ||
""" | ||
|
@@ -622,11 +629,11 @@ def repeat(self, repeats, *args, **kwargs): | |
|
||
@Appender(_index_shared_docs['where'] % _index_doc_kwargs) | ||
def where(self, cond, other=None): | ||
other = _ensure_datetimelike_to_i8(other) | ||
values = _ensure_datetimelike_to_i8(self) | ||
other = _ensure_datetimelike_to_i8(other, to_utc=True) | ||
values = _ensure_datetimelike_to_i8(self, to_utc=True) | ||
result = np.where(cond, values, other).astype('i8') | ||
|
||
result = self._ensure_localized(result) | ||
result = self._ensure_localized(result, from_utc=True) | ||
return self._shallow_copy(result, | ||
**self._get_attributes_dict()) | ||
|
||
|
@@ -695,23 +702,38 @@ def astype(self, dtype, copy=True): | |
return super(DatetimeIndexOpsMixin, self).astype(dtype, copy=copy) | ||
|
||
|
||
def _ensure_datetimelike_to_i8(other): | ||
""" helper for coercing an input scalar or array to i8 """ | ||
def _ensure_datetimelike_to_i8(other, to_utc=False): | ||
""" | ||
helper for coercing an input scalar or array to i8 | ||
|
||
Parameters | ||
---------- | ||
other : 1d array | ||
to_utc : bool | ||
default False | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same |
||
If True, convert the values to UTC before extracting the i8 values | ||
If False, extract the i8 values directly. | ||
|
||
Returns | ||
------- | ||
i8 1d array | ||
""" | ||
if is_scalar(other) and isna(other): | ||
other = iNaT | ||
return iNaT | ||
elif isinstance(other, ABCIndexClass): | ||
# convert tz if needed | ||
if getattr(other, 'tz', None) is not None: | ||
other = other.tz_localize(None).asi8 | ||
else: | ||
other = other.asi8 | ||
if to_utc: | ||
other = other.tz_convert('UTC') | ||
else: | ||
other = other.tz_localize(None) | ||
else: | ||
try: | ||
other = np.array(other, copy=False).view('i8') | ||
return np.array(other, copy=False).view('i8') | ||
except TypeError: | ||
# period array cannot be coerces to int | ||
other = Index(other).asi8 | ||
return other | ||
other = Index(other) | ||
return other.asi8 | ||
|
||
|
||
def wrap_arithmetic_op(self, other, result): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -590,11 +590,9 @@ def test_where_series_datetime64(self, fill_val, exp_dtype): | |
pd.Timestamp('2011-01-03'), values[3]]) | ||
self._assert_where_conversion(obj, cond, values, exp, exp_dtype) | ||
|
||
@pytest.mark.parametrize("fill_val,exp_dtype", [ | ||
(pd.Timestamp('2012-01-01'), 'datetime64[ns]'), | ||
(pd.Timestamp('2012-01-01', tz='US/Eastern'), np.object)], | ||
ids=['datetime64', 'datetime64tz']) | ||
def test_where_index_datetime(self, fill_val, exp_dtype): | ||
def test_where_index_datetime(self): | ||
fill_val = pd.Timestamp('2012-01-01') | ||
exp_dtype = 'datetime64[ns]' | ||
obj = pd.Index([pd.Timestamp('2011-01-01'), | ||
pd.Timestamp('2011-01-02'), | ||
pd.Timestamp('2011-01-03'), | ||
|
@@ -613,13 +611,32 @@ def test_where_index_datetime(self, fill_val, exp_dtype): | |
pd.Timestamp('2011-01-03'), | ||
pd.Timestamp('2012-01-04')]) | ||
|
||
if fill_val.tz: | ||
self._assert_where_conversion(obj, cond, values, exp, | ||
'datetime64[ns]') | ||
pytest.xfail("ToDo: do not ignore timezone, must be object") | ||
self._assert_where_conversion(obj, cond, values, exp, exp_dtype) | ||
pytest.xfail("datetime64 + datetime64 -> datetime64 must support" | ||
" scalar") | ||
|
||
@pytest.mark.xfail(reason="ToDo: do not ignore timezone, must be object") | ||
def test_where_index_datetimetz(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test was bizarrely structured above. It was given the wrong arguments for everything to pass before it hit the xfail... Good news is that one of the cases is passing, so I am splitting the tz-aware and tz-naive case and properly xfailing the tz-aware case (which was already failing on master and is orthogonal to this change) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is there an issue for this case? if not can you create one and reference it here |
||
fill_val = pd.Timestamp('2012-01-01', tz='US/Eastern') | ||
exp_dtype = np.object | ||
obj = pd.Index([pd.Timestamp('2011-01-01'), | ||
pd.Timestamp('2011-01-02'), | ||
pd.Timestamp('2011-01-03'), | ||
pd.Timestamp('2011-01-04')]) | ||
assert obj.dtype == 'datetime64[ns]' | ||
cond = pd.Index([True, False, True, False]) | ||
|
||
msg = ("Index\\(\\.\\.\\.\\) must be called with a collection " | ||
"of some kind") | ||
with tm.assert_raises_regex(TypeError, msg): | ||
obj.where(cond, fill_val) | ||
|
||
values = pd.Index(pd.date_range(fill_val, periods=4)) | ||
exp = pd.Index([pd.Timestamp('2011-01-01'), | ||
pd.Timestamp('2012-01-02', tz='US/Eastern'), | ||
pd.Timestamp('2011-01-03'), | ||
pd.Timestamp('2012-01-04', tz='US/Eastern')], | ||
dtype=exp_dtype) | ||
|
||
self._assert_where_conversion(obj, cond, values, exp, exp_dtype) | ||
|
||
def test_where_index_complex128(self): | ||
pass | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -601,6 +601,30 @@ def test_merge_on_datetime64tz(self): | |
assert result['value_x'].dtype == 'datetime64[ns, US/Eastern]' | ||
assert result['value_y'].dtype == 'datetime64[ns, US/Eastern]' | ||
|
||
def test_merge_datetime64tz_with_dst_transition(self): | ||
# GH 18885 | ||
df1 = pd.DataFrame(pd.date_range( | ||
'2017-10-29 01:00', periods=4, freq='H', tz='Europe/Madrid'), | ||
columns=['date']) | ||
df1['value'] = 1 | ||
df2 = pd.DataFrame([ | ||
pd.to_datetime('2017-10-29 03:00:00'), | ||
pd.to_datetime('2017-10-29 04:00:00'), | ||
pd.to_datetime('2017-10-29 05:00:00') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you create this one as well with date_range to shorten it a bit? (and if not, I think it is more typical to pass a list of strings to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can maybe also directly include the 'value' column in the constructor |
||
], | ||
columns=['date']) | ||
df2['date'] = df2['date'].dt.tz_localize('UTC').dt.tz_convert( | ||
'Europe/Madrid') | ||
df2['value'] = 2 | ||
result = pd.merge(df1, df2, how='outer', on='date') | ||
expected = pd.DataFrame({ | ||
'date': pd.date_range( | ||
'2017-10-29 01:00', periods=7, freq='H', tz='Europe/Madrid'), | ||
'value_x': [1] * 4 + [np.nan] * 3, | ||
'value_y': [np.nan] * 4 + [2] * 3 | ||
}) | ||
assert_frame_equal(result, expected) | ||
|
||
def test_merge_non_unique_period_index(self): | ||
# GH #16871 | ||
index = pd.period_range('2016-01-01', periods=16, freq='M') | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
add the default here
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you add what this actually means (and similarly for to_utc below).
also maybe rename 'result' to array or input or something, I think these are holdover names and are confusing.