From 3af3aa40a546006c3999fccca5fe1b28355fb8c5 Mon Sep 17 00:00:00 2001
From: jschendel <jschendel@users.noreply.github.com>
Date: Tue, 28 Nov 2017 21:11:29 -0700
Subject: [PATCH 1/3] BUG: Support IntervalIndex TZ Aware

---
 doc/source/whatsnew/v0.22.0.txt       |   1 +
 pandas/_libs/interval.pyx             |   8 ++
 pandas/core/indexes/interval.py       |  41 +++++---
 pandas/tests/indexes/test_interval.py | 136 +++++++++++++++++---------
 pandas/tests/scalar/test_interval.py  |  22 ++++-
 5 files changed, 150 insertions(+), 58 deletions(-)

diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt
index 4c716bf15d923..f55c6f696544e 100644
--- a/doc/source/whatsnew/v0.22.0.txt
+++ b/doc/source/whatsnew/v0.22.0.txt
@@ -132,6 +132,7 @@ Other Enhancements
 - :func:`pandas.read_clipboard` updated to use qtpy, falling back to PyQt5 and then PyQt4, adding compatibility with Python3 and multiple python-qt bindings (:issue:`17722`)
 - Improved wording of ``ValueError`` raised in :func:`read_csv` when the ``usecols`` argument cannot match all columns. (:issue:`17301`)
 - :func:`DataFrame.corrwith` now silently drops non-numeric columns when passed a Series. Before, an exception was raised (:issue:`18570`).
+- :class:`IntervalIndex` now supports time zone aware ``Interval`` objects (:issue:`18537`, :issue:`18538`)
 
 
 .. _whatsnew_0220.api_breaking:
diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx
index 822df1ce2b968..480ea5cb4fa80 100644
--- a/pandas/_libs/interval.pyx
+++ b/pandas/_libs/interval.pyx
@@ -6,6 +6,7 @@ cimport cython
 import cython
 from numpy cimport ndarray
 from tslib import Timestamp
+from tslibs.timezones cimport get_timezone
 
 from cpython.object cimport (Py_EQ, Py_NE, Py_GT, Py_LT, Py_GE, Py_LE,
                              PyObject_RichCompare)
@@ -119,6 +120,13 @@ cdef class Interval(IntervalMixin):
             raise ValueError(msg)
         if not left <= right:
             raise ValueError('left side of interval must be <= right side')
+        if (isinstance(left, Timestamp) and
+                get_timezone(left.tzinfo) != get_timezone(right.tzinfo)):
+            # GH 18538
+            msg = ("left and right must have the same time zone, got "
+                   "'{left_tz}' and '{right_tz}'")
+            raise ValueError(msg.format(left_tz=left.tzinfo,
+                                        right_tz=right.tzinfo))
         self.left = left
         self.right = right
         self.closed = closed
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index 02ac74e619fa4..afa26cb876b40 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -3,13 +3,14 @@
 import numpy as np
 
 from pandas.core.dtypes.missing import notna, isna
-from pandas.core.dtypes.generic import ABCPeriodIndex
+from pandas.core.dtypes.generic import ABCDatetimeIndex, ABCPeriodIndex
 from pandas.core.dtypes.dtypes import IntervalDtype
 from pandas.core.dtypes.cast import maybe_convert_platform
 from pandas.core.dtypes.common import (
     _ensure_platform_int,
     is_list_like,
     is_datetime_or_timedelta_dtype,
+    is_datetime64tz_dtype,
     is_integer_dtype,
     is_object_dtype,
     is_categorical_dtype,
@@ -28,7 +29,7 @@
     Interval, IntervalMixin, IntervalTree,
     intervals_to_interval_bounds)
 
-from pandas.core.indexes.datetimes import date_range
+from pandas.core.indexes.datetimes import DatetimeIndex, date_range
 from pandas.core.indexes.timedeltas import timedelta_range
 from pandas.core.indexes.multi import MultiIndex
 from pandas.compat.numpy import function as nv
@@ -54,7 +55,7 @@ def _get_next_label(label):
     dtype = getattr(label, 'dtype', type(label))
     if isinstance(label, (Timestamp, Timedelta)):
         dtype = 'datetime64'
-    if is_datetime_or_timedelta_dtype(dtype):
+    if is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype):
         return label + np.timedelta64(1, 'ns')
     elif is_integer_dtype(dtype):
         return label + 1
@@ -69,7 +70,7 @@ def _get_prev_label(label):
     dtype = getattr(label, 'dtype', type(label))
     if isinstance(label, (Timestamp, Timedelta)):
         dtype = 'datetime64'
-    if is_datetime_or_timedelta_dtype(dtype):
+    if is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype):
         return label - np.timedelta64(1, 'ns')
     elif is_integer_dtype(dtype):
         return label - 1
@@ -227,17 +228,21 @@ def _simple_new(cls, left, right, closed=None, name=None,
         # coerce dtypes to match if needed
         if is_float_dtype(left) and is_integer_dtype(right):
             right = right.astype(left.dtype)
-        if is_float_dtype(right) and is_integer_dtype(left):
+        elif is_float_dtype(right) and is_integer_dtype(left):
             left = left.astype(right.dtype)
 
         if type(left) != type(right):
-            raise ValueError("must not have differing left [{left}] "
-                             "and right [{right}] types"
-                             .format(left=type(left), right=type(right)))
-
-        if isinstance(left, ABCPeriodIndex):
-            raise ValueError("Period dtypes are not supported, "
-                             "use a PeriodIndex instead")
+            msg = ('must not have differing left [{ltype}] and right '
+                   '[{rtype}] types')
+            raise ValueError(msg.format(ltype=type(left).__name__,
+                                        rtype=type(right).__name__))
+        elif isinstance(left, ABCPeriodIndex):
+            msg = 'Period dtypes are not supported, use a PeriodIndex instead'
+            raise ValueError(msg)
+        elif isinstance(left, ABCDatetimeIndex) and left.tz != right.tz:
+            msg = ("left and right must have the same time zone, got "
+                   "'{left_tz}' and '{right_tz}'")
+            raise ValueError(msg.format(left_tz=left.tz, right_tz=right.tz))
 
         result._left = left
         result._right = right
@@ -657,8 +662,18 @@ def mid(self):
             return Index(0.5 * (self.left.values + self.right.values))
         except TypeError:
             # datetime safe version
+            tz = self.right.tz
+            freq = self.right.freq
             delta = self.right.values - self.left.values
-            return Index(self.left.values + 0.5 * delta)
+
+            # handle tz aware
+            if tz:
+                data = self.left.tz_localize(None) + 0.5 * delta
+                data = data.tz_localize(tz)
+            else:
+                data = self.left + 0.5 * delta
+
+            return DatetimeIndex(data, freq=freq, tz=tz)
 
     @cache_readonly
     def is_monotonic(self):
diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py
index dc06e51c6d8e7..1850ff2795a24 100644
--- a/pandas/tests/indexes/test_interval.py
+++ b/pandas/tests/indexes/test_interval.py
@@ -42,24 +42,37 @@ def create_index_with_nan(self, closed='right'):
             np.where(mask, np.arange(10), np.nan),
             np.where(mask, np.arange(1, 11), np.nan), closed=closed)
 
-    def test_constructors(self, closed, name):
-        left, right = Index([0, 1, 2, 3]), Index([1, 2, 3, 4])
+    @pytest.mark.parametrize('data', [
+        Index([0, 1, 2, 3, 4]),
+        Index(list('abcde')),
+        date_range('2017-01-01', periods=5),
+        date_range('2017-01-01', periods=5, tz='US/Eastern'),
+        timedelta_range('1 day', periods=5)])
+    def test_constructors(self, data, closed, name):
+        left, right = data[:-1], data[1:]
         ivs = [Interval(l, r, closed=closed) for l, r in lzip(left, right)]
         expected = IntervalIndex._simple_new(
             left=left, right=right, closed=closed, name=name)
 
+        # validate expected
+        assert expected.closed == closed
+        assert expected.name == name
+        assert expected.dtype.subtype == data.dtype
+        tm.assert_index_equal(expected.left, data[:-1])
+        tm.assert_index_equal(expected.right, data[1:])
+
+        # validated constructors
         result = IntervalIndex(ivs, name=name)
         tm.assert_index_equal(result, expected)
 
         result = IntervalIndex.from_intervals(ivs, name=name)
         tm.assert_index_equal(result, expected)
 
-        result = IntervalIndex.from_breaks(
-            np.arange(5), closed=closed, name=name)
+        result = IntervalIndex.from_breaks(data, closed=closed, name=name)
         tm.assert_index_equal(result, expected)
 
         result = IntervalIndex.from_arrays(
-            left.values, right.values, closed=closed, name=name)
+            left, right, closed=closed, name=name)
         tm.assert_index_equal(result, expected)
 
         result = IntervalIndex.from_tuples(
@@ -186,6 +199,9 @@ def test_constructors_errors(self):
             IntervalIndex.from_intervals([Interval(0, 1),
                                           Interval(1, 2, closed='left')])
 
+        with tm.assert_raises_regex(ValueError, msg):
+            IntervalIndex([Interval(0, 1), Interval(2, 3, closed='left')])
+
         with tm.assert_raises_regex(ValueError, msg):
             Index([Interval(0, 1), Interval(2, 3, closed='left')])
 
@@ -209,26 +225,24 @@ def test_constructors_errors(self):
         with tm.assert_raises_regex(ValueError, msg):
             IntervalIndex.from_arrays(range(10, -1, -1), range(9, -2, -1))
 
-    def test_constructors_datetimelike(self, closed):
+    @pytest.mark.parametrize('tz_left, tz_right', [
+        (None, 'UTC'), ('UTC', None), ('UTC', 'US/Eastern')])
+    def test_constructors_errors_tz(self, tz_left, tz_right):
+        # GH 18537
+        left = date_range('2017-01-01', periods=4, tz=tz_left)
+        right = date_range('2017-01-02', periods=4, tz=tz_right)
 
-        # DTI / TDI
-        for idx in [pd.date_range('20130101', periods=5),
-                    pd.timedelta_range('1 day', periods=5)]:
-            result = IntervalIndex.from_breaks(idx, closed=closed)
-            expected = IntervalIndex.from_breaks(idx.values, closed=closed)
-            tm.assert_index_equal(result, expected)
-
-            expected_scalar_type = type(idx[0])
-            i = result[0]
-            assert isinstance(i.left, expected_scalar_type)
-            assert isinstance(i.right, expected_scalar_type)
+        # don't need to check IntervalIndex(...) or from_intervals, since
+        # mixed tz are disallowed at the Interval level
+        with pytest.raises(ValueError):
+            IntervalIndex.from_arrays(left, right)
 
-    def test_constructors_error(self):
+        with pytest.raises(ValueError):
+            IntervalIndex.from_tuples(lzip(left, right))
 
-        # non-intervals
-        def f():
-            IntervalIndex.from_intervals([0.997, 4.0])
-        pytest.raises(TypeError, f)
+        with pytest.raises(ValueError):
+            breaks = left.tolist() + [right[-1]]
+            IntervalIndex.from_breaks(breaks)
 
     def test_properties(self, closed):
         index = self.create_index(closed=closed)
@@ -964,23 +978,46 @@ def test_sort_values(self, closed):
         expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)])
         tm.assert_index_equal(result, expected)
 
-    def test_datetime(self):
-        dates = date_range('2000', periods=3)
-        idx = IntervalIndex.from_breaks(dates)
-
-        tm.assert_index_equal(idx.left, dates[:2])
-        tm.assert_index_equal(idx.right, dates[-2:])
-
-        expected = date_range('2000-01-01T12:00', periods=2)
-        tm.assert_index_equal(idx.mid, expected)
-
-        assert Timestamp('2000-01-01T12') not in idx
-        assert Timestamp('2000-01-01T12') not in idx
-
-        target = date_range('1999-12-31T12:00', periods=7, freq='12H')
-        actual = idx.get_indexer(target)
+    @pytest.mark.parametrize('tz', [None, 'US/Eastern'])
+    def test_datetime(self, tz):
+        start = Timestamp('2000-01-01', tz=tz)
+        dates = date_range(start=start, periods=10)
+        index = IntervalIndex.from_breaks(dates)
+
+        # test mid
+        start = Timestamp('2000-01-01T12:00', tz=tz)
+        expected = date_range(start=start, periods=9)
+        tm.assert_index_equal(index.mid, expected)
+
+        # __contains__ doesn't check individual points
+        assert Timestamp('2000-01-01', tz=tz) not in index
+        assert Timestamp('2000-01-01T12', tz=tz) not in index
+        assert Timestamp('2000-01-02', tz=tz) not in index
+        iv_true = Interval(Timestamp('2000-01-01T08', tz=tz),
+                           Timestamp('2000-01-01T18', tz=tz))
+        iv_false = Interval(Timestamp('1999-12-31', tz=tz),
+                            Timestamp('2000-01-01', tz=tz))
+        assert iv_true in index
+        assert iv_false not in index
+
+        # .contains does check individual points
+        assert not index.contains(Timestamp('2000-01-01', tz=tz))
+        assert index.contains(Timestamp('2000-01-01T12', tz=tz))
+        assert index.contains(Timestamp('2000-01-02', tz=tz))
+        assert index.contains(iv_true)
+        assert not index.contains(iv_false)
+
+        # test get_indexer
+        start = Timestamp('1999-12-31T12:00', tz=tz)
+        target = date_range(start=start, periods=7, freq='12H')
+        actual = index.get_indexer(target)
+        expected = np.array([-1, -1, 0, 0, 1, 1, 2], dtype='intp')
+        tm.assert_numpy_array_equal(actual, expected)
 
-        expected = np.array([-1, -1, 0, 0, 1, 1, -1], dtype='intp')
+        start = Timestamp('2000-01-08T18:00', tz=tz)
+        target = date_range(start=start, periods=7, freq='6H')
+        actual = index.get_indexer(target)
+        expected = np.array([7, 7, 8, 8, 8, 8, -1], dtype='intp')
         tm.assert_numpy_array_equal(actual, expected)
 
     def test_append(self, closed):
@@ -1079,9 +1116,11 @@ def test_construction_from_numeric(self, closed, name):
                                 closed=closed)
         tm.assert_index_equal(result, expected)
 
-    def test_construction_from_timestamp(self, closed, name):
+    @pytest.mark.parametrize('tz', [None, 'US/Eastern'])
+    def test_construction_from_timestamp(self, closed, name, tz):
         # combinations of start/end/periods without freq
-        start, end = Timestamp('2017-01-01'), Timestamp('2017-01-06')
+        start = Timestamp('2017-01-01', tz=tz)
+        end = Timestamp('2017-01-06', tz=tz)
         breaks = date_range(start=start, end=end)
         expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
 
@@ -1099,7 +1138,8 @@ def test_construction_from_timestamp(self, closed, name):
 
         # combinations of start/end/periods with fixed freq
         freq = '2D'
-        start, end = Timestamp('2017-01-01'), Timestamp('2017-01-07')
+        start = Timestamp('2017-01-01', tz=tz)
+        end = Timestamp('2017-01-07', tz=tz)
         breaks = date_range(start=start, end=end, freq=freq)
         expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
 
@@ -1116,14 +1156,15 @@ def test_construction_from_timestamp(self, closed, name):
         tm.assert_index_equal(result, expected)
 
         # output truncates early if freq causes end to be skipped.
-        end = Timestamp('2017-01-08')
+        end = Timestamp('2017-01-08', tz=tz)
         result = interval_range(start=start, end=end, freq=freq, name=name,
                                 closed=closed)
         tm.assert_index_equal(result, expected)
 
         # combinations of start/end/periods with non-fixed freq
         freq = 'M'
-        start, end = Timestamp('2017-01-01'), Timestamp('2017-12-31')
+        start = Timestamp('2017-01-01', tz=tz)
+        end = Timestamp('2017-12-31', tz=tz)
         breaks = date_range(start=start, end=end, freq=freq)
         expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
 
@@ -1140,7 +1181,7 @@ def test_construction_from_timestamp(self, closed, name):
         tm.assert_index_equal(result, expected)
 
         # output truncates early if freq causes end to be skipped.
-        end = Timestamp('2018-01-15')
+        end = Timestamp('2018-01-15', tz=tz)
         result = interval_range(start=start, end=end, freq=freq, name=name,
                                 closed=closed)
         tm.assert_index_equal(result, expected)
@@ -1308,6 +1349,13 @@ def test_errors(self):
         with tm.assert_raises_regex(ValueError, msg):
             interval_range(end=Timedelta('1 day'), periods=10, freq='foo')
 
+        # mixed tz
+        start = Timestamp('2017-01-01', tz='US/Eastern')
+        end = Timestamp('2017-01-07', tz='US/Pacific')
+        msg = 'Start and end cannot both be tz-aware with different timezones'
+        with tm.assert_raises_regex(TypeError, msg):
+            interval_range(start=start, end=end)
+
 
 class TestIntervalTree(object):
     def setup_method(self, method):
diff --git a/pandas/tests/scalar/test_interval.py b/pandas/tests/scalar/test_interval.py
index d431db0b4ca4f..533a79656f174 100644
--- a/pandas/tests/scalar/test_interval.py
+++ b/pandas/tests/scalar/test_interval.py
@@ -1,6 +1,7 @@
 from __future__ import division
 
-from pandas import Interval
+from pandas import Interval, Timestamp
+from pandas.core.common import _any_none
 
 import pytest
 import pandas.util.testing as tm
@@ -137,3 +138,22 @@ def test_math_div(self, interval):
 
         with tm.assert_raises_regex(TypeError, msg):
             interval / 'foo'
+
+    def test_constructor_errors(self):
+        msg = "invalid option for 'closed': foo"
+        with tm.assert_raises_regex(ValueError, msg):
+            Interval(0, 1, closed='foo')
+
+        msg = 'left side of interval must be <= right side'
+        with tm.assert_raises_regex(ValueError, msg):
+            Interval(1, 0)
+
+    @pytest.mark.parametrize('tz_left, tz_right', [
+        (None, 'UTC'), ('UTC', None), ('UTC', 'US/Eastern')])
+    def test_constructor_errors_tz(self, tz_left, tz_right):
+        # GH 18538
+        left = Timestamp('2017-01-01', tz=tz_left)
+        right = Timestamp('2017-01-02', tz=tz_right)
+        error = TypeError if _any_none(tz_left, tz_right) else ValueError
+        with pytest.raises(error):
+            Interval(left, right)

From 070873dbe5ee38a8f233e6505cb01e0545214baf Mon Sep 17 00:00:00 2001
From: jschendel <jschendel@users.noreply.github.com>
Date: Thu, 30 Nov 2017 21:55:07 -0700
Subject: [PATCH 2/3] initial fixes

---
 pandas/core/indexes/interval.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index afa26cb876b40..0b10b25ea38d9 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -29,7 +29,7 @@
     Interval, IntervalMixin, IntervalTree,
     intervals_to_interval_bounds)
 
-from pandas.core.indexes.datetimes import DatetimeIndex, date_range
+from pandas.core.indexes.datetimes import date_range
 from pandas.core.indexes.timedeltas import timedelta_range
 from pandas.core.indexes.multi import MultiIndex
 from pandas.compat.numpy import function as nv
@@ -239,7 +239,8 @@ def _simple_new(cls, left, right, closed=None, name=None,
         elif isinstance(left, ABCPeriodIndex):
             msg = 'Period dtypes are not supported, use a PeriodIndex instead'
             raise ValueError(msg)
-        elif isinstance(left, ABCDatetimeIndex) and left.tz != right.tz:
+        elif (isinstance(left, ABCDatetimeIndex) and
+                str(left.tz) != str(right.tz)):
             msg = ("left and right must have the same time zone, got "
                    "'{left_tz}' and '{right_tz}'")
             raise ValueError(msg.format(left_tz=left.tz, right_tz=right.tz))
@@ -663,7 +664,6 @@ def mid(self):
         except TypeError:
             # datetime safe version
             tz = self.right.tz
-            freq = self.right.freq
             delta = self.right.values - self.left.values
 
             # handle tz aware
@@ -673,7 +673,7 @@ def mid(self):
             else:
                 data = self.left + 0.5 * delta
 
-            return DatetimeIndex(data, freq=freq, tz=tz)
+            return data
 
     @cache_readonly
     def is_monotonic(self):

From 79e255c3c2a4ad27fdc41f84d8fda0c71a963fc2 Mon Sep 17 00:00:00 2001
From: jschendel <jschendel@users.noreply.github.com>
Date: Thu, 7 Dec 2017 00:29:54 -0700
Subject: [PATCH 3/3] fix mid computation after bug fix

---
 pandas/core/indexes/interval.py | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index 0b10b25ea38d9..a32e79920db41 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -663,17 +663,8 @@ def mid(self):
             return Index(0.5 * (self.left.values + self.right.values))
         except TypeError:
             # datetime safe version
-            tz = self.right.tz
-            delta = self.right.values - self.left.values
-
-            # handle tz aware
-            if tz:
-                data = self.left.tz_localize(None) + 0.5 * delta
-                data = data.tz_localize(tz)
-            else:
-                data = self.left + 0.5 * delta
-
-            return data
+            delta = self.right - self.left
+            return self.left + 0.5 * delta
 
     @cache_readonly
     def is_monotonic(self):