Skip to content

Commit bda1567

Browse files
committed
BUG: Support IntervalIndex TZ Aware
1 parent 32f562d commit bda1567

File tree

5 files changed

+161
-65
lines changed

5 files changed

+161
-65
lines changed

doc/source/whatsnew/v0.22.0.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ Other Enhancements
4545
- Improved wording of ``ValueError`` raised in :func:`to_datetime` when ``unit=`` is passed with a non-convertible value (:issue:`14350`)
4646
- :func:`Series.fillna` now accepts a Series or a dict as a ``value`` for a categorical dtype (:issue:`17033`)
4747
- :func:`pandas.read_clipboard` updated to use qtpy, falling back to PyQt5 and then PyQt4, adding compatibility with Python3 and multiple python-qt bindings (:issue:`17722`)
48+
- :class:`IntervalIndex` now supports time zone aware ``Interval`` objects (:issue:`18537`, :issue:`18538`)
4849

4950
.. _whatsnew_0220.api_breaking:
5051

@@ -54,7 +55,7 @@ Backwards incompatible API changes
5455
- :func:`Series.fillna` now raises a ``TypeError`` instead of a ``ValueError`` when passed a list, tuple or DataFrame as a ``value`` (:issue:`18293`)
5556
- :func:`pandas.DataFrame.merge` no longer casts a ``float`` column to ``object`` when merging on ``int`` and ``float`` columns (:issue:`16572`)
5657
- The default NA value for :class:`UInt64Index` has changed from 0 to ``NaN``, which impacts methods that mask with NA, such as ``UInt64Index.where()`` (:issue:`18398`)
57-
-
58+
- :class:`Interval` now raises a ``ValueError`` when the left and right endpoints are ``Timestamp`` objects with different time zones (:issue:`18538`)
5859

5960

6061

pandas/_libs/interval.pyx

+16-6
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,9 @@ cdef class Interval(IntervalMixin):
5858
Left bound for interval.
5959
right : value
6060
Right bound for interval.
61-
closed : {'left', 'right', 'both', 'neither'}
61+
closed : {'left', 'right', 'both', 'neither'}, default 'right'
6262
Whether the interval is closed on the left-side, right-side, both or
63-
neither. Defaults to 'right'.
63+
neither.
6464
6565
Examples
6666
--------
@@ -77,8 +77,8 @@ cdef class Interval(IntervalMixin):
7777
7878
See Also
7979
--------
80-
IntervalIndex : an Index of ``interval`` s that are all closed on the same
81-
side.
80+
IntervalIndex : an Index of ``Interval`` objects that are all closed on the
81+
same side.
8282
cut, qcut : convert arrays of continuous data into categoricals/series of
8383
``Interval``.
8484
"""
@@ -91,8 +91,18 @@ cdef class Interval(IntervalMixin):
9191
# constructor (__cinit__/__new__) to avoid them
9292
if closed not in _VALID_CLOSED:
9393
raise ValueError("invalid option for 'closed': %s" % closed)
94-
if not left <= right:
95-
raise ValueError('left side of interval must be <= right side')
94+
try:
95+
if not left <= right:
96+
raise ValueError('left side of interval must be <= right side')
97+
except TypeError as e:
98+
msg = 'left and right must be comparable, {e}'.format(e=e)
99+
raise TypeError(msg)
100+
if isinstance(left, Timestamp) and left.tzinfo != right.tzinfo:
101+
# GH 18538
102+
msg = ("left and right must have the same time zone, got "
103+
"'{left_tz}' and '{right_tz}'").format(
104+
left_tz=left.tzinfo, right_tz=right.tzinfo)
105+
raise ValueError(msg)
96106
self.left = left
97107
self.right = right
98108
self.closed = closed

pandas/core/indexes/interval.py

+28-13
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,14 @@
33
import numpy as np
44

55
from pandas.core.dtypes.missing import notna, isna
6-
from pandas.core.dtypes.generic import ABCPeriodIndex
6+
from pandas.core.dtypes.generic import ABCDatetimeIndex, ABCPeriodIndex
77
from pandas.core.dtypes.dtypes import IntervalDtype
88
from pandas.core.dtypes.cast import maybe_convert_platform
99
from pandas.core.dtypes.common import (
1010
_ensure_platform_int,
1111
is_list_like,
1212
is_datetime_or_timedelta_dtype,
13+
is_datetime64tz_dtype,
1314
is_integer_dtype,
1415
is_object_dtype,
1516
is_categorical_dtype,
@@ -28,7 +29,7 @@
2829
Interval, IntervalMixin, IntervalTree,
2930
intervals_to_interval_bounds)
3031

31-
from pandas.core.indexes.datetimes import date_range
32+
from pandas.core.indexes.datetimes import DatetimeIndex, date_range
3233
from pandas.core.indexes.timedeltas import timedelta_range
3334
from pandas.core.indexes.multi import MultiIndex
3435
from pandas.compat.numpy import function as nv
@@ -54,7 +55,7 @@ def _get_next_label(label):
5455
dtype = getattr(label, 'dtype', type(label))
5556
if isinstance(label, (Timestamp, Timedelta)):
5657
dtype = 'datetime64'
57-
if is_datetime_or_timedelta_dtype(dtype):
58+
if is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype):
5859
return label + np.timedelta64(1, 'ns')
5960
elif is_integer_dtype(dtype):
6061
return label + 1
@@ -69,7 +70,7 @@ def _get_prev_label(label):
6970
dtype = getattr(label, 'dtype', type(label))
7071
if isinstance(label, (Timestamp, Timedelta)):
7172
dtype = 'datetime64'
72-
if is_datetime_or_timedelta_dtype(dtype):
73+
if is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype):
7374
return label - np.timedelta64(1, 'ns')
7475
elif is_integer_dtype(dtype):
7576
return label - 1
@@ -228,17 +229,21 @@ def _simple_new(cls, left, right, closed=None, name=None,
228229
# coerce dtypes to match if needed
229230
if is_float_dtype(left) and is_integer_dtype(right):
230231
right = right.astype(left.dtype)
231-
if is_float_dtype(right) and is_integer_dtype(left):
232+
elif is_float_dtype(right) and is_integer_dtype(left):
232233
left = left.astype(right.dtype)
233234

234235
if type(left) != type(right):
235-
raise ValueError("must not have differing left [{}] "
236-
"and right [{}] types".format(
237-
type(left), type(right)))
238-
239-
if isinstance(left, ABCPeriodIndex):
240-
raise ValueError("Period dtypes are not supported, "
241-
"use a PeriodIndex instead")
236+
msg = ('must not have differing left [{ltype}] and right '
237+
'[{rtype}] types')
238+
raise ValueError(msg.format(ltype=type(left).__name__,
239+
rtype=type(right).__name__))
240+
elif isinstance(left, ABCPeriodIndex):
241+
msg = 'Period dtypes are not supported, use a PeriodIndex instead'
242+
raise ValueError(msg)
243+
elif isinstance(left, ABCDatetimeIndex) and left.tz != right.tz:
244+
msg = ("left and right must have the same time zone, got "
245+
"'{left_tz}' and '{right_tz}'")
246+
raise ValueError(msg.format(left_tz=left.tz, right_tz=right.tz))
242247

243248
result._left = left
244249
result._right = right
@@ -640,8 +645,18 @@ def mid(self):
640645
return Index(0.5 * (self.left.values + self.right.values))
641646
except TypeError:
642647
# datetime safe version
648+
tz = self.right.tz
649+
freq = self.right.freq
643650
delta = self.right.values - self.left.values
644-
return Index(self.left.values + 0.5 * delta)
651+
652+
# handle tz aware
653+
if tz:
654+
data = self.left.tz_localize(None) + 0.5 * delta
655+
data = data.tz_localize(tz)
656+
else:
657+
data = self.left + 0.5 * delta
658+
659+
return DatetimeIndex(data, freq=freq, tz=tz)
645660

646661
@cache_readonly
647662
def is_monotonic(self):

pandas/tests/indexes/test_interval.py

+91-44
Original file line numberDiff line numberDiff line change
@@ -42,24 +42,37 @@ def create_index_with_nan(self, closed='right'):
4242
np.where(mask, np.arange(10), np.nan),
4343
np.where(mask, np.arange(1, 11), np.nan), closed=closed)
4444

45-
def test_constructors(self, closed, name):
46-
left, right = Index([0, 1, 2, 3]), Index([1, 2, 3, 4])
45+
@pytest.mark.parametrize('data', [
46+
Index([0, 1, 2, 3, 4]),
47+
Index(list('abcde')),
48+
date_range('2017-01-01', periods=5),
49+
date_range('2017-01-01', periods=5, tz='US/Eastern'),
50+
timedelta_range('1 day', periods=5)])
51+
def test_constructors(self, data, closed, name):
52+
left, right = data[:-1], data[1:]
4753
ivs = [Interval(l, r, closed=closed) for l, r in lzip(left, right)]
4854
expected = IntervalIndex._simple_new(
4955
left=left, right=right, closed=closed, name=name)
5056

57+
# validate expected
58+
assert expected.closed == closed
59+
assert expected.name == name
60+
assert expected.dtype.subtype == data.dtype
61+
tm.assert_index_equal(expected.left, data[:-1])
62+
tm.assert_index_equal(expected.right, data[1:])
63+
64+
# validated constructors
5165
result = IntervalIndex(ivs, name=name)
5266
tm.assert_index_equal(result, expected)
5367

5468
result = IntervalIndex.from_intervals(ivs, name=name)
5569
tm.assert_index_equal(result, expected)
5670

57-
result = IntervalIndex.from_breaks(
58-
np.arange(5), closed=closed, name=name)
71+
result = IntervalIndex.from_breaks(data, closed=closed, name=name)
5972
tm.assert_index_equal(result, expected)
6073

6174
result = IntervalIndex.from_arrays(
62-
left.values, right.values, closed=closed, name=name)
75+
left, right, closed=closed, name=name)
6376
tm.assert_index_equal(result, expected)
6477

6578
result = IntervalIndex.from_tuples(
@@ -186,6 +199,9 @@ def test_constructors_errors(self):
186199
IntervalIndex.from_intervals([Interval(0, 1),
187200
Interval(1, 2, closed='left')])
188201

202+
with tm.assert_raises_regex(ValueError, msg):
203+
IntervalIndex([Interval(0, 1), Interval(2, 3, closed='left')])
204+
189205
with tm.assert_raises_regex(ValueError, msg):
190206
Index([Interval(0, 1), Interval(2, 3, closed='left')])
191207

@@ -209,26 +225,23 @@ def test_constructors_errors(self):
209225
with tm.assert_raises_regex(ValueError, msg):
210226
IntervalIndex.from_arrays(range(10, -1, -1), range(9, -2, -1))
211227

212-
def test_constructors_datetimelike(self, closed):
228+
@pytest.mark.parametrize('tz_left, tz_right', [
229+
(None, 'UTC'), ('UTC', None), ('UTC', 'US/Eastern')])
230+
def test_constructors_errors_tz(self, tz_left, tz_right):
231+
left = date_range('2017-01-01', periods=4, tz=tz_left)
232+
right = date_range('2017-01-02', periods=4, tz=tz_right)
213233

214-
# DTI / TDI
215-
for idx in [pd.date_range('20130101', periods=5),
216-
pd.timedelta_range('1 day', periods=5)]:
217-
result = IntervalIndex.from_breaks(idx, closed=closed)
218-
expected = IntervalIndex.from_breaks(idx.values, closed=closed)
219-
tm.assert_index_equal(result, expected)
220-
221-
expected_scalar_type = type(idx[0])
222-
i = result[0]
223-
assert isinstance(i.left, expected_scalar_type)
224-
assert isinstance(i.right, expected_scalar_type)
234+
# don't need to check IntervalIndex(...) or from_intervals, since
235+
# mixed tz are disallowed at the Interval level
236+
with pytest.raises(ValueError):
237+
IntervalIndex.from_arrays(left, right)
225238

226-
def test_constructors_error(self):
239+
with pytest.raises(ValueError):
240+
IntervalIndex.from_tuples(lzip(left, right))
227241

228-
# non-intervals
229-
def f():
230-
IntervalIndex.from_intervals([0.997, 4.0])
231-
pytest.raises(TypeError, f)
242+
with pytest.raises(ValueError):
243+
breaks = left.tolist() + [right[-1]]
244+
IntervalIndex.from_breaks(breaks)
232245

233246
def test_properties(self, closed):
234247
index = self.create_index(closed=closed)
@@ -964,23 +977,46 @@ def test_sort_values(self, closed):
964977
expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)])
965978
tm.assert_index_equal(result, expected)
966979

967-
def test_datetime(self):
968-
dates = date_range('2000', periods=3)
969-
idx = IntervalIndex.from_breaks(dates)
970-
971-
tm.assert_index_equal(idx.left, dates[:2])
972-
tm.assert_index_equal(idx.right, dates[-2:])
973-
974-
expected = date_range('2000-01-01T12:00', periods=2)
975-
tm.assert_index_equal(idx.mid, expected)
976-
977-
assert Timestamp('2000-01-01T12') not in idx
978-
assert Timestamp('2000-01-01T12') not in idx
979-
980-
target = date_range('1999-12-31T12:00', periods=7, freq='12H')
981-
actual = idx.get_indexer(target)
980+
@pytest.mark.parametrize('tz', [None, 'US/Eastern'])
981+
def test_datetime(self, tz):
982+
start = Timestamp('2000-01-01', tz=tz)
983+
dates = date_range(start=start, periods=10)
984+
index = IntervalIndex.from_breaks(dates)
985+
986+
# test mid
987+
start = Timestamp('2000-01-01T12:00', tz=tz)
988+
expected = date_range(start=start, periods=9)
989+
tm.assert_index_equal(index.mid, expected)
990+
991+
# __contains__ doesn't check individual points
992+
assert Timestamp('2000-01-01', tz=tz) not in index
993+
assert Timestamp('2000-01-01T12', tz=tz) not in index
994+
assert Timestamp('2000-01-02', tz=tz) not in index
995+
iv_true = Interval(Timestamp('2000-01-01T08', tz=tz),
996+
Timestamp('2000-01-01T18', tz=tz))
997+
iv_false = Interval(Timestamp('1999-12-31', tz=tz),
998+
Timestamp('2000-01-01', tz=tz))
999+
assert iv_true in index
1000+
assert iv_false not in index
1001+
1002+
# .contains does check individual points
1003+
assert not index.contains(Timestamp('2000-01-01', tz=tz))
1004+
assert index.contains(Timestamp('2000-01-01T12', tz=tz))
1005+
assert index.contains(Timestamp('2000-01-02', tz=tz))
1006+
assert index.contains(iv_true)
1007+
assert not index.contains(iv_false)
1008+
1009+
# test get_indexer
1010+
start = Timestamp('1999-12-31T12:00', tz=tz)
1011+
target = date_range(start=start, periods=7, freq='12H')
1012+
actual = index.get_indexer(target)
1013+
expected = np.array([-1, -1, 0, 0, 1, 1, 2], dtype='intp')
1014+
tm.assert_numpy_array_equal(actual, expected)
9821015

983-
expected = np.array([-1, -1, 0, 0, 1, 1, -1], dtype='intp')
1016+
start = Timestamp('2000-01-08T18:00', tz=tz)
1017+
target = date_range(start=start, periods=7, freq='6H')
1018+
actual = index.get_indexer(target)
1019+
expected = np.array([7, 7, 8, 8, 8, 8, -1], dtype='intp')
9841020
tm.assert_numpy_array_equal(actual, expected)
9851021

9861022
def test_append(self, closed):
@@ -1079,9 +1115,11 @@ def test_construction_from_numeric(self, closed, name):
10791115
closed=closed)
10801116
tm.assert_index_equal(result, expected)
10811117

1082-
def test_construction_from_timestamp(self, closed, name):
1118+
@pytest.mark.parametrize('tz', [None, 'US/Eastern'])
1119+
def test_construction_from_timestamp(self, closed, name, tz):
10831120
# combinations of start/end/periods without freq
1084-
start, end = Timestamp('2017-01-01'), Timestamp('2017-01-06')
1121+
start = Timestamp('2017-01-01', tz=tz)
1122+
end = Timestamp('2017-01-06', tz=tz)
10851123
breaks = date_range(start=start, end=end)
10861124
expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
10871125

@@ -1099,7 +1137,8 @@ def test_construction_from_timestamp(self, closed, name):
10991137

11001138
# combinations of start/end/periods with fixed freq
11011139
freq = '2D'
1102-
start, end = Timestamp('2017-01-01'), Timestamp('2017-01-07')
1140+
start = Timestamp('2017-01-01', tz=tz)
1141+
end = Timestamp('2017-01-07', tz=tz)
11031142
breaks = date_range(start=start, end=end, freq=freq)
11041143
expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
11051144

@@ -1116,14 +1155,15 @@ def test_construction_from_timestamp(self, closed, name):
11161155
tm.assert_index_equal(result, expected)
11171156

11181157
# output truncates early if freq causes end to be skipped.
1119-
end = Timestamp('2017-01-08')
1158+
end = Timestamp('2017-01-08', tz=tz)
11201159
result = interval_range(start=start, end=end, freq=freq, name=name,
11211160
closed=closed)
11221161
tm.assert_index_equal(result, expected)
11231162

11241163
# combinations of start/end/periods with non-fixed freq
11251164
freq = 'M'
1126-
start, end = Timestamp('2017-01-01'), Timestamp('2017-12-31')
1165+
start = Timestamp('2017-01-01', tz=tz)
1166+
end = Timestamp('2017-12-31', tz=tz)
11271167
breaks = date_range(start=start, end=end, freq=freq)
11281168
expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
11291169

@@ -1140,7 +1180,7 @@ def test_construction_from_timestamp(self, closed, name):
11401180
tm.assert_index_equal(result, expected)
11411181

11421182
# output truncates early if freq causes end to be skipped.
1143-
end = Timestamp('2018-01-15')
1183+
end = Timestamp('2018-01-15', tz=tz)
11441184
result = interval_range(start=start, end=end, freq=freq, name=name,
11451185
closed=closed)
11461186
tm.assert_index_equal(result, expected)
@@ -1308,6 +1348,13 @@ def test_errors(self):
13081348
with tm.assert_raises_regex(ValueError, msg):
13091349
interval_range(end=Timedelta('1 day'), periods=10, freq='foo')
13101350

1351+
# mixed tz
1352+
start = Timestamp('2017-01-01', tz='US/Eastern')
1353+
end = Timestamp('2017-01-07', tz='US/Pacific')
1354+
msg = 'Start and end cannot both be tz-aware with different timezones'
1355+
with tm.assert_raises_regex(TypeError, msg):
1356+
interval_range(start=start, end=end)
1357+
13111358

13121359
class TestIntervalTree(object):
13131360
def setup_method(self, method):

0 commit comments

Comments
 (0)