diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 4d806f1f05a16..a62a737fbba31 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -211,6 +211,7 @@ Other API Changes - Subtracting ``NaT`` from a :class:`Series` with ``dtype='datetime64[ns]'`` returns a ``Series`` with ``dtype='timedelta64[ns]'`` instead of ``dtype='datetime64[ns]'``(:issue:`18808`) - Operations between a :class:`Series` with dtype ``dtype='datetime64[ns]'`` and a :class:`PeriodIndex` will correctly raises ``TypeError`` (:issue:`18850`) - Subtraction of :class:`Series` with timezone-aware ``dtype='datetime64[ns]'`` with mis-matched timezones will raise ``TypeError`` instead of ``ValueError`` (issue:`18817`) +- :class:`IntervalIndex` and ``IntervalDtype`` no longer support categorical, object, and string subtypes (:issue:`19016`) - The default ``Timedelta`` constructor now accepts an ``ISO 8601 Duration`` string as an argument (:issue:`19040`) .. _whatsnew_0230.deprecations: @@ -279,11 +280,11 @@ Performance Improvements Documentation Changes ~~~~~~~~~~~~~~~~~~~~~ -- Changed spelling of "numpy" to "NumPy", and "python" to "Python". (:issue:`19017`) +- Changed spelling of "numpy" to "NumPy", and "python" to "Python". (:issue:`19017`) - Consistency when introducing code samples, using either colon or period. Rewrote some sentences for greater clarity, added more dynamic references to functions, methods and classes. - (:issue:`18941`, :issue:`18948`, :issue:`18973`, :issue:`19017`) + (:issue:`18941`, :issue:`18948`, :issue:`18973`, :issue:`19017`) - .. _whatsnew_0230.bug_fixes: @@ -310,7 +311,7 @@ Conversion - Bug in :class:`DatetimeIndex` where adding or subtracting an array-like of ``DateOffset`` objects either raised (``np.array``, ``pd.Index``) or broadcast incorrectly (``pd.Series``) (:issue:`18849`) - Bug in :class:`Series` floor-division where operating on a scalar ``timedelta`` raises an exception (:issue:`18846`) - Bug in :class:`FY5253Quarter`, :class:`LastWeekOfMonth` where rollback and rollforward behavior was inconsistent with addition and subtraction behavior (:issue:`18854`) -- Bug in :class:`Index` constructor with ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (issue:`19032`) +- Bug in :class:`Index` constructor with ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (issue:`19032`) Indexing diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index d1637873eb6e1..08773354d44d8 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -641,6 +641,8 @@ def __new__(cls, subtype=None): ---------- subtype : the dtype of the Interval """ + from pandas.core.dtypes.common import ( + is_categorical_dtype, is_string_dtype, pandas_dtype) if isinstance(subtype, IntervalDtype): return subtype @@ -659,7 +661,6 @@ def __new__(cls, subtype=None): if m is not None: subtype = m.group('subtype') - from pandas.core.dtypes.common import pandas_dtype try: subtype = pandas_dtype(subtype) except TypeError: @@ -670,6 +671,12 @@ def __new__(cls, subtype=None): u.subtype = None return u + if is_categorical_dtype(subtype) or is_string_dtype(subtype): + # GH 19016 + msg = ('category, object, and string subtypes are not supported ' + 'for IntervalDtype') + raise TypeError(msg) + try: return cls._cache[str(subtype)] except KeyError: diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index def9b151f5c91..fd1980f9ab429 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -11,6 +11,8 @@ is_list_like, is_datetime_or_timedelta_dtype, is_datetime64tz_dtype, + is_categorical_dtype, + is_string_dtype, is_integer_dtype, is_float_dtype, is_interval_dtype, @@ -92,6 +94,30 @@ def _get_interval_closed_bounds(interval): return left, right +def maybe_convert_platform_interval(values): + """ + Try to do platform conversion, with special casing for IntervalIndex. + Wrapper around maybe_convert_platform that alters the default return + dtype in certain cases to be compatible with IntervalIndex. For example, + empty lists return with integer dtype instead of object dtype, which is + prohibited for IntervalIndex. + + Parameters + ---------- + values : array-like + + Returns + ------- + array + """ + if isinstance(values, (list, tuple)) and len(values) == 0: + # GH 19016 + # empty lists/tuples get object dtype by default, but this is not + # prohibited for IntervalIndex, so coerce to integer instead + return np.array([], dtype=np.intp) + return maybe_convert_platform(values) + + def _new_IntervalIndex(cls, d): """ This is called upon unpickling, rather than the default which doesn't have @@ -206,7 +232,7 @@ def __new__(cls, data, closed=None, if is_scalar(data): cls._scalar_data_error(data) - data = maybe_convert_platform(data) + data = maybe_convert_platform_interval(data) left, right, infer_closed = intervals_to_interval_bounds(data) if _all_not_none(closed, infer_closed) and closed != infer_closed: @@ -242,6 +268,11 @@ def _simple_new(cls, left, right, closed=None, name=None, '[{rtype}] types') raise ValueError(msg.format(ltype=type(left).__name__, rtype=type(right).__name__)) + elif is_categorical_dtype(left.dtype) or is_string_dtype(left.dtype): + # GH 19016 + msg = ('category, object, and string subtypes are not supported ' + 'for IntervalIndex') + raise TypeError(msg) elif isinstance(left, ABCPeriodIndex): msg = 'Period dtypes are not supported, use a PeriodIndex instead' raise ValueError(msg) @@ -403,7 +434,7 @@ def from_breaks(cls, breaks, closed='right', name=None, copy=False): IntervalIndex.from_tuples : Construct an IntervalIndex from a list/array of tuples """ - breaks = maybe_convert_platform(breaks) + breaks = maybe_convert_platform_interval(breaks) return cls.from_arrays(breaks[:-1], breaks[1:], closed, name=name, copy=copy) @@ -444,8 +475,8 @@ def from_arrays(cls, left, right, closed='right', name=None, copy=False): IntervalIndex.from_tuples : Construct an IntervalIndex from a list/array of tuples """ - left = maybe_convert_platform(left) - right = maybe_convert_platform(right) + left = maybe_convert_platform_interval(left) + right = maybe_convert_platform_interval(right) return cls._simple_new(left, right, closed, name=name, copy=copy, verify_integrity=True) @@ -493,7 +524,7 @@ def from_intervals(cls, data, name=None, copy=False): left, right, closed = data.left, data.right, data.closed name = name or data.name else: - data = maybe_convert_platform(data) + data = maybe_convert_platform_interval(data) left, right, closed = intervals_to_interval_bounds(data) return cls.from_arrays(left, right, closed, name=name, copy=False) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index d8e16482a414e..6a3715fd66159 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -152,7 +152,7 @@ def test_update_dtype(self, dtype, new_dtype): assert result.ordered is expected_ordered @pytest.mark.parametrize('bad_dtype', [ - 'foo', object, np.int64, PeriodDtype('Q'), IntervalDtype(object)]) + 'foo', object, np.int64, PeriodDtype('Q')]) def test_update_dtype_errors(self, bad_dtype): dtype = CategoricalDtype(list('abc'), False) msg = 'a CategoricalDtype must be passed to perform an update, ' @@ -460,6 +460,17 @@ def test_construction(self): assert i.subtype == np.dtype('int64') assert is_interval_dtype(i) + @pytest.mark.parametrize('subtype', [ + CategoricalDtype(list('abc'), False), + CategoricalDtype(list('wxyz'), True), + object, str, '