Skip to content

BUG: fix tz-aware DatetimeIndex + TimedeltaIndex (#17558) #17583

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,7 @@ Indexing
- Bug in ``CategoricalIndex`` reindexing in which specified indices containing duplicates were not being respected (:issue:`17323`)
- Bug in intersection of ``RangeIndex`` with negative step (:issue:`17296`)
- Bug in ``IntervalIndex`` where performing a scalar lookup fails for included right endpoints of non-overlapping monotonic decreasing indexes (:issue:`16417`, :issue:`17271`)
- Bug in adding tz-aware ``DatetimeIndex`` with a numpy array of ``timedelta64``s, and a bug in adding tz-aware ``DatetimeIndex`` with ``TimedeltaIndex`` (:issue:`17558`)

I/O
^^^
Expand Down
19 changes: 17 additions & 2 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
is_integer, is_float,
is_bool_dtype, _ensure_int64,
is_scalar, is_dtype_equal,
is_list_like)
is_timedelta64_dtype, is_datetime64tz_dtype,
is_integer_dtype, is_list_like)
from pandas.core.dtypes.generic import (
ABCIndex, ABCSeries,
ABCPeriodIndex, ABCIndexClass)
Expand Down Expand Up @@ -651,6 +652,18 @@ def __add__(self, other):
raise TypeError("cannot add {typ1} and {typ2}"
.format(typ1=type(self).__name__,
typ2=type(other).__name__))
elif isinstance(other, np.ndarray):
if is_timedelta64_dtype(other):
return self._add_delta(other)
elif is_integer_dtype(other):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think instead of this, we want to specifically define add in PeriodIndex which handles this case and calls super otherwise.

# for internal use only:
# allow PeriodIndex + np.array(int64) to
# fallthrough to ufunc operator
return NotImplemented
else:
raise TypeError("cannot add {typ1} and np.ndarray[{typ2}]"
.format(typ1=type(self).__name__,
typ2=other.dtype))
elif isinstance(other, (DateOffset, timedelta, np.timedelta64,
Timedelta)):
return self._add_delta(other)
Expand Down Expand Up @@ -717,7 +730,7 @@ def _add_delta_td(self, other):

def _add_delta_tdi(self, other):
# add a delta of a TimedeltaIndex
# return the i8 result view
# return the i8 result view for datetime64tz

# delta operation
if not len(self) == len(other):
Expand All @@ -731,6 +744,8 @@ def _add_delta_tdi(self, other):
if self.hasnans or other.hasnans:
mask = (self._isnan) | (other._isnan)
new_values[mask] = iNaT
if is_datetime64tz_dtype(self.dtype):
return new_values.view('i8')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no, see my comments

return new_values.view(self.dtype)

def isin(self, values):
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
is_integer, is_float,
is_integer_dtype,
is_datetime64_ns_dtype,
is_timedelta64_dtype,
is_period_dtype,
is_bool_dtype,
is_string_dtype,
Expand Down Expand Up @@ -801,6 +802,8 @@ def _add_delta(self, delta):

if isinstance(delta, (Tick, timedelta, np.timedelta64)):
new_values = self._add_delta_td(delta)
elif isinstance(delta, np.ndarray) and is_timedelta64_dtype(delta):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should simply be incorporated into the case right below

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

by below, do I understand you correctly that we can consolidate as:

if isinstance(delta, (Tick, timedelta, np.timedelta64, np.ndarray)):
    new_values = self._add_delta_td(delta)
elif ..:

or are you suggesting to wrap delta into a TimedeltaIndex?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes
but i think we need to check the dtype as well if it's a numpy array (can be separate from this)

new_values = self._add_delta_td(delta)
elif isinstance(delta, TimedeltaIndex):
new_values = self._add_delta_tdi(delta)
# update name when delta is Index
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -647,7 +647,7 @@ def _maybe_convert_timedelta(self, other):
return other.n
msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr)
raise IncompatibleFrequency(msg)
elif isinstance(other, np.ndarray):
elif isinstance(other, (np.ndarray, TimedeltaIndex)):
if is_integer_dtype(other):
return other
elif is_timedelta64_dtype(other):
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,9 +312,11 @@ def _maybe_update_attributes(self, attrs):
return attrs

def _add_delta(self, delta):
name = self.name
if isinstance(delta, (Tick, timedelta, np.timedelta64)):
new_values = self._add_delta_td(delta)
name = self.name
elif isinstance(delta, np.ndarray) and is_timedelta64_dtype(delta):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same comment

new_values = self._add_delta_td(delta)
elif isinstance(delta, TimedeltaIndex):
new_values = self._add_delta_tdi(delta)
# update name when delta is index
Expand Down
32 changes: 31 additions & 1 deletion pandas/tests/indexes/datetimelike.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
""" generic datetimelike tests """

from .common import Base
import numpy as np
import pandas as pd
import pandas.util.testing as tm

from .common import Base


class DatetimeLike(Base):

Expand Down Expand Up @@ -38,3 +41,30 @@ def test_view(self):
i_view = i.view(self._holder)
result = self._holder(i)
tm.assert_index_equal(result, i_view)

def test_add_timedelta(self):
# GH 17558
# Check that tz-aware DatetimeIndex + np.array(dtype="timedelta64")
# and DatetimeIndex + TimedeltaIndex work as expected
idx = self.create_index()
idx.name = "x"
if isinstance(idx, pd.DatetimeIndex):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't use an if like this.

let's create a sub-class that specifically tests tz-aware (so .create_index()) works correctly. (add in datetimes/test_datetimelikes)

Copy link
Author

@azjps azjps Sep 20, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Totally agree with this, but unfortunately a tz-aware-based subclass fails a whole bunch of other common tests, so the scope of this PR would have to be expanded further (for which I'd probably need help). Here's one example to illustrate, basically DatetimeIndex constructor will read its input list into a (tz-naive) numpy array and "re-tz_localize" based on dtype's timezone:

    def test_repr_roundtrip(self):
    
        idx = self.create_index()
>       tm.assert_index_equal(eval(repr(idx)), idx)

>       raise AssertionError(msg)
E       AssertionError: Index are different
E       
E       Index values are different (100.0 %)
E       [left]:  DatetimeIndex(['2013-01-01 05:00:00-05:00', '2013-01-02 05:00:00-05:00',
E                      '2013-01-03 05:00:00-05:00', '2013-01-04 05:00:00-05:00',
E                      '2013-01-05 05:00:00-05:00'],
E                     dtype='datetime64[ns, US/Eastern]', freq='D')
E       [right]: DatetimeIndex(['2013-01-01 00:00:00-05:00', '2013-01-02 00:00:00-05:00',
E                      '2013-01-03 00:00:00-05:00', '2013-01-04 00:00:00-05:00',
E                      '2013-01-05 00:00:00-05:00'],
E                     dtype='datetime64[ns, US/Eastern]', freq='D')

(Pdb) repr(idx)  # broke up output for readability
"DatetimeIndex(['2013-01-01 00:00:00-05:00', '2013-01-02 00:00:00-05:00',\n"
"               '2013-01-03 00:00:00-05:00', '2013-01-04 00:00:00-05:00',\n"
"               '2013-01-05 00:00:00-05:00'],\n"
"              dtype='datetime64[ns, US/Eastern]', freq='D')"
(Pdb) eval(repr(idx))
DatetimeIndex(['2013-01-01 05:00:00-05:00', '2013-01-02 05:00:00-05:00',
               '2013-01-03 05:00:00-05:00', '2013-01-04 05:00:00-05:00',
               '2013-01-05 05:00:00-05:00'],
              dtype='datetime64[ns, US/Eastern]', freq='D')

Most of the other test failures can also be treated as variants of this, as these tests also construct a separate DatetimeIndex instance without tz information, although its possible to (say) modify those tests directly to tz_localize to UTC and then tz_convert to appropriate timezone if create_index().tz is available.

Also the tests related to union, intersection, symmetric_difference, etc try to do these operations against the underlying numpy .values which are tz-naive, leading to issues. We maybe could get around these by just disabling that part of the test.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I think the right way to do this is to do another PR which separates out the DTI tests to 2 classes for tz-naive and tz-aware and modifies create_index as appropriate. If there are failing tests we can look closer. Want to start on that?

idx = idx.tz_localize("US/Eastern")

expected = idx + np.timedelta64(1, 'D')
tm.assert_index_equal(idx, expected - np.timedelta64(1, 'D'))

deltas = np.array([np.timedelta64(1, 'D')] * len(idx),
dtype="timedelta64[ns]")
results = [idx + deltas, # add numpy array
idx + deltas.astype(dtype="timedelta64[m]"),
idx + pd.TimedeltaIndex(deltas, name=idx.name),
idx + pd.to_timedelta(deltas[0]),
]
for actual in results:
tm.assert_index_equal(actual, expected)

errmsg = (r"cannot add {cls} and np.ndarray\[float64\]"
.format(cls=idx.__class__.__name__))
with tm.assert_raises_regex(TypeError, errmsg):
idx + np.array([0.1], dtype=np.float64)
24 changes: 24 additions & 0 deletions pandas/tests/indexes/datetimes/test_datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,27 @@ def test_union(self):
for case in cases:
result = first.union(case)
assert tm.equalContents(result, everything)

def test_add_dti_td(self):
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is almost the same as special case of the above test in datetimelike.py for DatetimeIndex, if that test looks good I will remove this one.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see my comments above you can simply re-define create_index to make this work (in another test class)

# GH 17558
# Check that tz-aware DatetimeIndex + np.array(dtype="timedelta64")
# and DatetimeIndex + TimedeltaIndex work as expected
dti = pd.DatetimeIndex([pd.Timestamp("2017/01/01")],
name="x").tz_localize('US/Eastern')

expected = pd.DatetimeIndex([pd.Timestamp("2017/01/01 01:00")],
name="x").tz_localize('US/Eastern')

td_np = np.array([np.timedelta64(1, 'h')], dtype="timedelta64[ns]")
results = [dti + td_np, # add numpy array
dti + td_np.astype(dtype="timedelta64[m]"),
dti + pd.TimedeltaIndex(td_np, name=dti.name),
dti + td_np[0], # add timedelta scalar
dti + pd.to_timedelta(td_np[0]),
]
for actual in results:
tm.assert_index_equal(actual, expected)

errmsg = r"cannot add DatetimeIndex and np.ndarray\[float64\]"
with tm.assert_raises_regex(TypeError, errmsg):
dti + np.array([0.1], dtype=np.float64)