From e06e3748fb6fac554aa8484b6a000f0b75102219 Mon Sep 17 00:00:00 2001
From: jschendel <jschendel@users.noreply.github.com>
Date: Sun, 31 Dec 2017 17:02:24 -0700
Subject: [PATCH 1/2] API: Prohibit non-numeric dtypes in IntervalIndex

---
 doc/source/whatsnew/v0.23.0.txt               |  1 +
 pandas/core/dtypes/dtypes.py                  |  9 ++-
 pandas/core/indexes/interval.py               | 29 ++++++--
 pandas/tests/dtypes/test_dtypes.py            | 13 +++-
 .../tests/indexes/interval/test_interval.py   | 74 ++++++++++++-------
 5 files changed, 91 insertions(+), 35 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index c7b1cb4379700..88d8585d8dc42 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -211,6 +211,7 @@ Other API Changes
 - Subtracting ``NaT`` from a :class:`Series` with ``dtype='datetime64[ns]'`` returns a ``Series`` with ``dtype='timedelta64[ns]'`` instead of ``dtype='datetime64[ns]'``(:issue:`18808`)
 - Operations between a :class:`Series` with dtype ``dtype='datetime64[ns]'`` and a :class:`PeriodIndex` will correctly raises ``TypeError`` (:issue:`18850`)
 - Subtraction of :class:`Series` with timezone-aware ``dtype='datetime64[ns]'`` with mis-matched timezones will raise ``TypeError`` instead of ``ValueError`` (issue:`18817`)
+- :class:`IntervalIndex` and ``IntervalDtype`` no longer support categorical, object, and string subtypes (:issue:`19016`)
 
 .. _whatsnew_0230.deprecations:
 
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index d1637873eb6e1..08773354d44d8 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -641,6 +641,8 @@ def __new__(cls, subtype=None):
         ----------
         subtype : the dtype of the Interval
         """
+        from pandas.core.dtypes.common import (
+            is_categorical_dtype, is_string_dtype, pandas_dtype)
 
         if isinstance(subtype, IntervalDtype):
             return subtype
@@ -659,7 +661,6 @@ def __new__(cls, subtype=None):
                 if m is not None:
                     subtype = m.group('subtype')
 
-            from pandas.core.dtypes.common import pandas_dtype
             try:
                 subtype = pandas_dtype(subtype)
             except TypeError:
@@ -670,6 +671,12 @@ def __new__(cls, subtype=None):
             u.subtype = None
             return u
 
+        if is_categorical_dtype(subtype) or is_string_dtype(subtype):
+            # GH 19016
+            msg = ('category, object, and string subtypes are not supported '
+                   'for IntervalDtype')
+            raise TypeError(msg)
+
         try:
             return cls._cache[str(subtype)]
         except KeyError:
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index def9b151f5c91..8550c3e9bf8f1 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -11,6 +11,8 @@
     is_list_like,
     is_datetime_or_timedelta_dtype,
     is_datetime64tz_dtype,
+    is_categorical_dtype,
+    is_string_dtype,
     is_integer_dtype,
     is_float_dtype,
     is_interval_dtype,
@@ -92,6 +94,18 @@ def _get_interval_closed_bounds(interval):
     return left, right
 
 
+def _maybe_convert_platform_interval(data):
+    """
+    Try to do platform conversion, with special casing for IntervalIndex
+    """
+    if isinstance(data, (list, tuple)) and len(data) == 0:
+        # GH 19016
+        # empty lists/tuples get object dtype by default, but this is not
+        # prohibited for IntervalIndex, so coerce to integer instead
+        return np.array([], dtype=np.intp)
+    return maybe_convert_platform(data)
+
+
 def _new_IntervalIndex(cls, d):
     """
     This is called upon unpickling, rather than the default which doesn't have
@@ -206,7 +220,7 @@ def __new__(cls, data, closed=None,
             if is_scalar(data):
                 cls._scalar_data_error(data)
 
-            data = maybe_convert_platform(data)
+            data = _maybe_convert_platform_interval(data)
             left, right, infer_closed = intervals_to_interval_bounds(data)
 
             if _all_not_none(closed, infer_closed) and closed != infer_closed:
@@ -242,6 +256,11 @@ def _simple_new(cls, left, right, closed=None, name=None,
                    '[{rtype}] types')
             raise ValueError(msg.format(ltype=type(left).__name__,
                                         rtype=type(right).__name__))
+        elif is_categorical_dtype(left.dtype) or is_string_dtype(left.dtype):
+            # GH 19016
+            msg = ('category, object, and string subtypes are not supported '
+                   'for IntervalIndex')
+            raise TypeError(msg)
         elif isinstance(left, ABCPeriodIndex):
             msg = 'Period dtypes are not supported, use a PeriodIndex instead'
             raise ValueError(msg)
@@ -403,7 +422,7 @@ def from_breaks(cls, breaks, closed='right', name=None, copy=False):
         IntervalIndex.from_tuples : Construct an IntervalIndex from a
                                     list/array of tuples
         """
-        breaks = maybe_convert_platform(breaks)
+        breaks = _maybe_convert_platform_interval(breaks)
 
         return cls.from_arrays(breaks[:-1], breaks[1:], closed,
                                name=name, copy=copy)
@@ -444,8 +463,8 @@ def from_arrays(cls, left, right, closed='right', name=None, copy=False):
         IntervalIndex.from_tuples : Construct an IntervalIndex from a
                                     list/array of tuples
         """
-        left = maybe_convert_platform(left)
-        right = maybe_convert_platform(right)
+        left = _maybe_convert_platform_interval(left)
+        right = _maybe_convert_platform_interval(right)
 
         return cls._simple_new(left, right, closed, name=name,
                                copy=copy, verify_integrity=True)
@@ -493,7 +512,7 @@ def from_intervals(cls, data, name=None, copy=False):
             left, right, closed = data.left, data.right, data.closed
             name = name or data.name
         else:
-            data = maybe_convert_platform(data)
+            data = _maybe_convert_platform_interval(data)
             left, right, closed = intervals_to_interval_bounds(data)
         return cls.from_arrays(left, right, closed, name=name, copy=False)
 
diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py
index d8e16482a414e..6a3715fd66159 100644
--- a/pandas/tests/dtypes/test_dtypes.py
+++ b/pandas/tests/dtypes/test_dtypes.py
@@ -152,7 +152,7 @@ def test_update_dtype(self, dtype, new_dtype):
         assert result.ordered is expected_ordered
 
     @pytest.mark.parametrize('bad_dtype', [
-        'foo', object, np.int64, PeriodDtype('Q'), IntervalDtype(object)])
+        'foo', object, np.int64, PeriodDtype('Q')])
     def test_update_dtype_errors(self, bad_dtype):
         dtype = CategoricalDtype(list('abc'), False)
         msg = 'a CategoricalDtype must be passed to perform an update, '
@@ -460,6 +460,17 @@ def test_construction(self):
             assert i.subtype == np.dtype('int64')
             assert is_interval_dtype(i)
 
+    @pytest.mark.parametrize('subtype', [
+        CategoricalDtype(list('abc'), False),
+        CategoricalDtype(list('wxyz'), True),
+        object, str, '<U10', 'interval[category]', 'interval[object]'])
+    def test_construction_not_supported(self, subtype):
+        # GH 19016
+        msg = ('category, object, and string subtypes are not supported '
+               'for IntervalDtype')
+        with tm.assert_raises_regex(TypeError, msg):
+            IntervalDtype(subtype)
+
     def test_construction_generic(self):
         # generic
         i = IntervalDtype('interval')
diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py
index e2f48f40e9b7a..dd673294b128f 100644
--- a/pandas/tests/indexes/interval/test_interval.py
+++ b/pandas/tests/indexes/interval/test_interval.py
@@ -4,7 +4,7 @@
 import numpy as np
 from pandas import (
     Interval, IntervalIndex, Index, isna, notna, interval_range, Timestamp,
-    Timedelta, date_range, timedelta_range)
+    Timedelta, date_range, timedelta_range, Categorical)
 from pandas.compat import lzip
 from pandas.core.common import _asarray_tuplesafe
 from pandas.tests.indexes.common import Base
@@ -42,7 +42,6 @@ def create_index_with_nan(self, closed='right'):
 
     @pytest.mark.parametrize('data', [
         Index([0, 1, 2, 3, 4]),
-        Index(list('abcde')),
         date_range('2017-01-01', periods=5),
         date_range('2017-01-01', periods=5, tz='US/Eastern'),
         timedelta_range('1 day', periods=5)])
@@ -138,10 +137,10 @@ def test_constructors_nan(self, closed, data):
         [],
         np.array([], dtype='int64'),
         np.array([], dtype='float64'),
-        np.array([], dtype=object)])
+        np.array([], dtype='datetime64[ns]')])
     def test_constructors_empty(self, data, closed):
         # GH 18421
-        expected_dtype = data.dtype if isinstance(data, np.ndarray) else object
+        expected_dtype = getattr(data, 'dtype', np.intp)
         expected_values = np.array([], dtype=object)
         expected_index = IntervalIndex(data, closed=closed)
 
@@ -223,6 +222,48 @@ def test_constructors_errors(self):
         with tm.assert_raises_regex(ValueError, msg):
             IntervalIndex.from_arrays(range(10, -1, -1), range(9, -2, -1))
 
+        # GH 19016: categorical data
+        data = Categorical(list('01234abcde'), ordered=True)
+        msg = ('category, object, and string subtypes are not supported '
+               'for IntervalIndex')
+
+        with tm.assert_raises_regex(TypeError, msg):
+            IntervalIndex.from_breaks(data)
+
+        with tm.assert_raises_regex(TypeError, msg):
+            IntervalIndex.from_arrays(data[:-1], data[1:])
+
+    @pytest.mark.parametrize('data', [
+        tuple('0123456789'),
+        list('abcdefghij'),
+        np.array(list('abcdefghij'), dtype=object),
+        np.array(list('abcdefghij'), dtype='<U1')])
+    def test_constructors_errors_string(self, data):
+        # GH 19016
+        left, right = data[:-1], data[1:]
+        tuples = lzip(left, right)
+        ivs = [Interval(l, r) for l, r in tuples] or data
+        msg = ('category, object, and string subtypes are not supported '
+               'for IntervalIndex')
+
+        with tm.assert_raises_regex(TypeError, msg):
+            IntervalIndex(ivs)
+
+        with tm.assert_raises_regex(TypeError, msg):
+            Index(ivs)
+
+        with tm.assert_raises_regex(TypeError, msg):
+            IntervalIndex.from_intervals(ivs)
+
+        with tm.assert_raises_regex(TypeError, msg):
+            IntervalIndex.from_breaks(data)
+
+        with tm.assert_raises_regex(TypeError, msg):
+            IntervalIndex.from_arrays(left, right)
+
+        with tm.assert_raises_regex(TypeError, msg):
+            IntervalIndex.from_tuples(tuples)
+
     @pytest.mark.parametrize('tz_left, tz_right', [
         (None, 'UTC'), ('UTC', None), ('UTC', 'US/Eastern')])
     def test_constructors_errors_tz(self, tz_left, tz_right):
@@ -298,18 +339,6 @@ def test_length(self, closed, breaks):
         expected = Index(iv.length if notna(iv) else iv for iv in index)
         tm.assert_index_equal(result, expected)
 
-    @pytest.mark.parametrize('breaks', [
-        list('abcdefgh'),
-        lzip(range(10), range(1, 11)),
-        [['A', 'B'], ['a', 'b'], ['c', 'd'], ['e', 'f']],
-        [Interval(0, 1), Interval(1, 2), Interval(3, 4), Interval(4, 5)]])
-    def test_length_errors(self, closed, breaks):
-        # GH 18789
-        index = IntervalIndex.from_breaks(breaks)
-        msg = 'IntervalIndex contains Intervals without defined length'
-        with tm.assert_raises_regex(TypeError, msg):
-            index.length
-
     def test_with_nans(self, closed):
         index = self.create_index(closed=closed)
         assert not index.hasnans
@@ -428,9 +457,7 @@ def test_delete(self, closed):
         interval_range(0, periods=10, closed='neither'),
         interval_range(1.7, periods=8, freq=2.5, closed='both'),
         interval_range(Timestamp('20170101'), periods=12, closed='left'),
-        interval_range(Timedelta('1 day'), periods=6, closed='right'),
-        IntervalIndex.from_tuples([('a', 'd'), ('e', 'j'), ('w', 'z')]),
-        IntervalIndex.from_tuples([(1, 2), ('a', 'z'), (3.14, 6.28)])])
+        interval_range(Timedelta('1 day'), periods=6, closed='right')])
     def test_insert(self, data):
         item = data[0]
         idx_item = IntervalIndex([item])
@@ -504,15 +531,6 @@ def test_unique(self, closed):
             [(0, 1), (0, 1), (2, 3)], closed=closed)
         assert not idx.is_unique
 
-        # unique mixed
-        idx = IntervalIndex.from_tuples([(0, 1), ('a', 'b')], closed=closed)
-        assert idx.is_unique
-
-        # duplicate mixed
-        idx = IntervalIndex.from_tuples(
-            [(0, 1), ('a', 'b'), (0, 1)], closed=closed)
-        assert not idx.is_unique
-
         # empty
         idx = IntervalIndex([], closed=closed)
         assert idx.is_unique

From ca6ebe8fdefae4be632d062110476c9b73f6c236 Mon Sep 17 00:00:00 2001
From: jschendel <jschendel@users.noreply.github.com>
Date: Wed, 3 Jan 2018 00:49:17 -0700
Subject: [PATCH 2/2] deprivatize and docstring

---
 pandas/core/indexes/interval.py | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index 8550c3e9bf8f1..fd1980f9ab429 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -94,16 +94,28 @@ def _get_interval_closed_bounds(interval):
     return left, right
 
 
-def _maybe_convert_platform_interval(data):
+def maybe_convert_platform_interval(values):
     """
-    Try to do platform conversion, with special casing for IntervalIndex
+    Try to do platform conversion, with special casing for IntervalIndex.
+    Wrapper around maybe_convert_platform that alters the default return
+    dtype in certain cases to be compatible with IntervalIndex.  For example,
+    empty lists return with integer dtype instead of object dtype, which is
+    prohibited for IntervalIndex.
+
+    Parameters
+    ----------
+    values : array-like
+
+    Returns
+    -------
+    array
     """
-    if isinstance(data, (list, tuple)) and len(data) == 0:
+    if isinstance(values, (list, tuple)) and len(values) == 0:
         # GH 19016
         # empty lists/tuples get object dtype by default, but this is not
         # prohibited for IntervalIndex, so coerce to integer instead
         return np.array([], dtype=np.intp)
-    return maybe_convert_platform(data)
+    return maybe_convert_platform(values)
 
 
 def _new_IntervalIndex(cls, d):
@@ -220,7 +232,7 @@ def __new__(cls, data, closed=None,
             if is_scalar(data):
                 cls._scalar_data_error(data)
 
-            data = _maybe_convert_platform_interval(data)
+            data = maybe_convert_platform_interval(data)
             left, right, infer_closed = intervals_to_interval_bounds(data)
 
             if _all_not_none(closed, infer_closed) and closed != infer_closed:
@@ -422,7 +434,7 @@ def from_breaks(cls, breaks, closed='right', name=None, copy=False):
         IntervalIndex.from_tuples : Construct an IntervalIndex from a
                                     list/array of tuples
         """
-        breaks = _maybe_convert_platform_interval(breaks)
+        breaks = maybe_convert_platform_interval(breaks)
 
         return cls.from_arrays(breaks[:-1], breaks[1:], closed,
                                name=name, copy=copy)
@@ -463,8 +475,8 @@ def from_arrays(cls, left, right, closed='right', name=None, copy=False):
         IntervalIndex.from_tuples : Construct an IntervalIndex from a
                                     list/array of tuples
         """
-        left = _maybe_convert_platform_interval(left)
-        right = _maybe_convert_platform_interval(right)
+        left = maybe_convert_platform_interval(left)
+        right = maybe_convert_platform_interval(right)
 
         return cls._simple_new(left, right, closed, name=name,
                                copy=copy, verify_integrity=True)
@@ -512,7 +524,7 @@ def from_intervals(cls, data, name=None, copy=False):
             left, right, closed = data.left, data.right, data.closed
             name = name or data.name
         else:
-            data = _maybe_convert_platform_interval(data)
+            data = maybe_convert_platform_interval(data)
             left, right, closed = intervals_to_interval_bounds(data)
         return cls.from_arrays(left, right, closed, name=name, copy=False)