From e06e3748fb6fac554aa8484b6a000f0b75102219 Mon Sep 17 00:00:00 2001 From: jschendel Date: Sun, 31 Dec 2017 17:02:24 -0700 Subject: [PATCH 1/2] API: Prohibit non-numeric dtypes in IntervalIndex --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/dtypes/dtypes.py | 9 ++- pandas/core/indexes/interval.py | 29 ++++++-- pandas/tests/dtypes/test_dtypes.py | 13 +++- .../tests/indexes/interval/test_interval.py | 74 ++++++++++++------- 5 files changed, 91 insertions(+), 35 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index c7b1cb4379700..88d8585d8dc42 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -211,6 +211,7 @@ Other API Changes - Subtracting ``NaT`` from a :class:`Series` with ``dtype='datetime64[ns]'`` returns a ``Series`` with ``dtype='timedelta64[ns]'`` instead of ``dtype='datetime64[ns]'``(:issue:`18808`) - Operations between a :class:`Series` with dtype ``dtype='datetime64[ns]'`` and a :class:`PeriodIndex` will correctly raises ``TypeError`` (:issue:`18850`) - Subtraction of :class:`Series` with timezone-aware ``dtype='datetime64[ns]'`` with mis-matched timezones will raise ``TypeError`` instead of ``ValueError`` (issue:`18817`) +- :class:`IntervalIndex` and ``IntervalDtype`` no longer support categorical, object, and string subtypes (:issue:`19016`) .. _whatsnew_0230.deprecations: diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index d1637873eb6e1..08773354d44d8 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -641,6 +641,8 @@ def __new__(cls, subtype=None): ---------- subtype : the dtype of the Interval """ + from pandas.core.dtypes.common import ( + is_categorical_dtype, is_string_dtype, pandas_dtype) if isinstance(subtype, IntervalDtype): return subtype @@ -659,7 +661,6 @@ def __new__(cls, subtype=None): if m is not None: subtype = m.group('subtype') - from pandas.core.dtypes.common import pandas_dtype try: subtype = pandas_dtype(subtype) except TypeError: @@ -670,6 +671,12 @@ def __new__(cls, subtype=None): u.subtype = None return u + if is_categorical_dtype(subtype) or is_string_dtype(subtype): + # GH 19016 + msg = ('category, object, and string subtypes are not supported ' + 'for IntervalDtype') + raise TypeError(msg) + try: return cls._cache[str(subtype)] except KeyError: diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index def9b151f5c91..8550c3e9bf8f1 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -11,6 +11,8 @@ is_list_like, is_datetime_or_timedelta_dtype, is_datetime64tz_dtype, + is_categorical_dtype, + is_string_dtype, is_integer_dtype, is_float_dtype, is_interval_dtype, @@ -92,6 +94,18 @@ def _get_interval_closed_bounds(interval): return left, right +def _maybe_convert_platform_interval(data): + """ + Try to do platform conversion, with special casing for IntervalIndex + """ + if isinstance(data, (list, tuple)) and len(data) == 0: + # GH 19016 + # empty lists/tuples get object dtype by default, but this is not + # prohibited for IntervalIndex, so coerce to integer instead + return np.array([], dtype=np.intp) + return maybe_convert_platform(data) + + def _new_IntervalIndex(cls, d): """ This is called upon unpickling, rather than the default which doesn't have @@ -206,7 +220,7 @@ def __new__(cls, data, closed=None, if is_scalar(data): cls._scalar_data_error(data) - data = maybe_convert_platform(data) + data = _maybe_convert_platform_interval(data) left, right, infer_closed = intervals_to_interval_bounds(data) if _all_not_none(closed, infer_closed) and closed != infer_closed: @@ -242,6 +256,11 @@ def _simple_new(cls, left, right, closed=None, name=None, '[{rtype}] types') raise ValueError(msg.format(ltype=type(left).__name__, rtype=type(right).__name__)) + elif is_categorical_dtype(left.dtype) or is_string_dtype(left.dtype): + # GH 19016 + msg = ('category, object, and string subtypes are not supported ' + 'for IntervalIndex') + raise TypeError(msg) elif isinstance(left, ABCPeriodIndex): msg = 'Period dtypes are not supported, use a PeriodIndex instead' raise ValueError(msg) @@ -403,7 +422,7 @@ def from_breaks(cls, breaks, closed='right', name=None, copy=False): IntervalIndex.from_tuples : Construct an IntervalIndex from a list/array of tuples """ - breaks = maybe_convert_platform(breaks) + breaks = _maybe_convert_platform_interval(breaks) return cls.from_arrays(breaks[:-1], breaks[1:], closed, name=name, copy=copy) @@ -444,8 +463,8 @@ def from_arrays(cls, left, right, closed='right', name=None, copy=False): IntervalIndex.from_tuples : Construct an IntervalIndex from a list/array of tuples """ - left = maybe_convert_platform(left) - right = maybe_convert_platform(right) + left = _maybe_convert_platform_interval(left) + right = _maybe_convert_platform_interval(right) return cls._simple_new(left, right, closed, name=name, copy=copy, verify_integrity=True) @@ -493,7 +512,7 @@ def from_intervals(cls, data, name=None, copy=False): left, right, closed = data.left, data.right, data.closed name = name or data.name else: - data = maybe_convert_platform(data) + data = _maybe_convert_platform_interval(data) left, right, closed = intervals_to_interval_bounds(data) return cls.from_arrays(left, right, closed, name=name, copy=False) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index d8e16482a414e..6a3715fd66159 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -152,7 +152,7 @@ def test_update_dtype(self, dtype, new_dtype): assert result.ordered is expected_ordered @pytest.mark.parametrize('bad_dtype', [ - 'foo', object, np.int64, PeriodDtype('Q'), IntervalDtype(object)]) + 'foo', object, np.int64, PeriodDtype('Q')]) def test_update_dtype_errors(self, bad_dtype): dtype = CategoricalDtype(list('abc'), False) msg = 'a CategoricalDtype must be passed to perform an update, ' @@ -460,6 +460,17 @@ def test_construction(self): assert i.subtype == np.dtype('int64') assert is_interval_dtype(i) + @pytest.mark.parametrize('subtype', [ + CategoricalDtype(list('abc'), False), + CategoricalDtype(list('wxyz'), True), + object, str, ' Date: Wed, 3 Jan 2018 00:49:17 -0700 Subject: [PATCH 2/2] deprivatize and docstring --- pandas/core/indexes/interval.py | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 8550c3e9bf8f1..fd1980f9ab429 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -94,16 +94,28 @@ def _get_interval_closed_bounds(interval): return left, right -def _maybe_convert_platform_interval(data): +def maybe_convert_platform_interval(values): """ - Try to do platform conversion, with special casing for IntervalIndex + Try to do platform conversion, with special casing for IntervalIndex. + Wrapper around maybe_convert_platform that alters the default return + dtype in certain cases to be compatible with IntervalIndex. For example, + empty lists return with integer dtype instead of object dtype, which is + prohibited for IntervalIndex. + + Parameters + ---------- + values : array-like + + Returns + ------- + array """ - if isinstance(data, (list, tuple)) and len(data) == 0: + if isinstance(values, (list, tuple)) and len(values) == 0: # GH 19016 # empty lists/tuples get object dtype by default, but this is not # prohibited for IntervalIndex, so coerce to integer instead return np.array([], dtype=np.intp) - return maybe_convert_platform(data) + return maybe_convert_platform(values) def _new_IntervalIndex(cls, d): @@ -220,7 +232,7 @@ def __new__(cls, data, closed=None, if is_scalar(data): cls._scalar_data_error(data) - data = _maybe_convert_platform_interval(data) + data = maybe_convert_platform_interval(data) left, right, infer_closed = intervals_to_interval_bounds(data) if _all_not_none(closed, infer_closed) and closed != infer_closed: @@ -422,7 +434,7 @@ def from_breaks(cls, breaks, closed='right', name=None, copy=False): IntervalIndex.from_tuples : Construct an IntervalIndex from a list/array of tuples """ - breaks = _maybe_convert_platform_interval(breaks) + breaks = maybe_convert_platform_interval(breaks) return cls.from_arrays(breaks[:-1], breaks[1:], closed, name=name, copy=copy) @@ -463,8 +475,8 @@ def from_arrays(cls, left, right, closed='right', name=None, copy=False): IntervalIndex.from_tuples : Construct an IntervalIndex from a list/array of tuples """ - left = _maybe_convert_platform_interval(left) - right = _maybe_convert_platform_interval(right) + left = maybe_convert_platform_interval(left) + right = maybe_convert_platform_interval(right) return cls._simple_new(left, right, closed, name=name, copy=copy, verify_integrity=True) @@ -512,7 +524,7 @@ def from_intervals(cls, data, name=None, copy=False): left, right, closed = data.left, data.right, data.closed name = name or data.name else: - data = _maybe_convert_platform_interval(data) + data = maybe_convert_platform_interval(data) left, right, closed = intervals_to_interval_bounds(data) return cls.from_arrays(left, right, closed, name=name, copy=False)