Skip to content

Commit c2a9126

Browse files
committed
API: Prohibit non-numeric dtypes in IntervalIndex
1 parent c19bdc9 commit c2a9126

File tree

5 files changed

+91
-35
lines changed

5 files changed

+91
-35
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,7 @@ Other API Changes
208208
- In :func:`read_excel`, the ``comment`` argument is now exposed as a named parameter (:issue:`18735`)
209209
- Rearranged the order of keyword arguments in :func:`read_excel()` to align with :func:`read_csv()` (:issue:`16672`)
210210
- The options ``html.border`` and ``mode.use_inf_as_null`` were deprecated in prior versions, these will now show ``FutureWarning`` rather than a ``DeprecationWarning`` (:issue:`19003`)
211+
- :class:`IntervalIndex` and ``IntervalDtype`` no longer support categorical, object, and string subtypes (:issue:`19016`)
211212

212213
.. _whatsnew_0230.deprecations:
213214

pandas/core/dtypes/dtypes.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -641,6 +641,8 @@ def __new__(cls, subtype=None):
641641
----------
642642
subtype : the dtype of the Interval
643643
"""
644+
from pandas.core.dtypes.common import (
645+
is_categorical_dtype, is_string_dtype, pandas_dtype)
644646

645647
if isinstance(subtype, IntervalDtype):
646648
return subtype
@@ -659,7 +661,6 @@ def __new__(cls, subtype=None):
659661
if m is not None:
660662
subtype = m.group('subtype')
661663

662-
from pandas.core.dtypes.common import pandas_dtype
663664
try:
664665
subtype = pandas_dtype(subtype)
665666
except TypeError:
@@ -670,6 +671,12 @@ def __new__(cls, subtype=None):
670671
u.subtype = None
671672
return u
672673

674+
if is_categorical_dtype(subtype) or is_string_dtype(subtype):
675+
# GH 19016
676+
msg = ('category, object, and string subtypes are not supported '
677+
'for IntervalDtype')
678+
raise TypeError(msg)
679+
673680
try:
674681
return cls._cache[str(subtype)]
675682
except KeyError:

pandas/core/indexes/interval.py

+24-5
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
is_list_like,
1212
is_datetime_or_timedelta_dtype,
1313
is_datetime64tz_dtype,
14+
is_categorical_dtype,
15+
is_string_dtype,
1416
is_integer_dtype,
1517
is_float_dtype,
1618
is_interval_dtype,
@@ -92,6 +94,18 @@ def _get_interval_closed_bounds(interval):
9294
return left, right
9395

9496

97+
def _maybe_convert_platform_interval(data):
98+
"""
99+
Try to do platform conversion, with special casing for IntervalIndex
100+
"""
101+
if isinstance(data, (list, tuple)) and len(data) == 0:
102+
# GH 19016
103+
# empty lists/tuples get object dtype by default, but this is not
104+
# prohibited for IntervalIndex, so coerce to integer instead
105+
return np.array([], dtype=np.intp)
106+
return maybe_convert_platform(data)
107+
108+
95109
def _new_IntervalIndex(cls, d):
96110
"""
97111
This is called upon unpickling, rather than the default which doesn't have
@@ -206,7 +220,7 @@ def __new__(cls, data, closed=None,
206220
if is_scalar(data):
207221
cls._scalar_data_error(data)
208222

209-
data = maybe_convert_platform(data)
223+
data = _maybe_convert_platform_interval(data)
210224
left, right, infer_closed = intervals_to_interval_bounds(data)
211225

212226
if _all_not_none(closed, infer_closed) and closed != infer_closed:
@@ -242,6 +256,11 @@ def _simple_new(cls, left, right, closed=None, name=None,
242256
'[{rtype}] types')
243257
raise ValueError(msg.format(ltype=type(left).__name__,
244258
rtype=type(right).__name__))
259+
elif is_categorical_dtype(left.dtype) or is_string_dtype(left.dtype):
260+
# GH 19016
261+
msg = ('category, object, and string subtypes are not supported '
262+
'for IntervalIndex')
263+
raise TypeError(msg)
245264
elif isinstance(left, ABCPeriodIndex):
246265
msg = 'Period dtypes are not supported, use a PeriodIndex instead'
247266
raise ValueError(msg)
@@ -403,7 +422,7 @@ def from_breaks(cls, breaks, closed='right', name=None, copy=False):
403422
IntervalIndex.from_tuples : Construct an IntervalIndex from a
404423
list/array of tuples
405424
"""
406-
breaks = maybe_convert_platform(breaks)
425+
breaks = _maybe_convert_platform_interval(breaks)
407426

408427
return cls.from_arrays(breaks[:-1], breaks[1:], closed,
409428
name=name, copy=copy)
@@ -444,8 +463,8 @@ def from_arrays(cls, left, right, closed='right', name=None, copy=False):
444463
IntervalIndex.from_tuples : Construct an IntervalIndex from a
445464
list/array of tuples
446465
"""
447-
left = maybe_convert_platform(left)
448-
right = maybe_convert_platform(right)
466+
left = _maybe_convert_platform_interval(left)
467+
right = _maybe_convert_platform_interval(right)
449468

450469
return cls._simple_new(left, right, closed, name=name,
451470
copy=copy, verify_integrity=True)
@@ -493,7 +512,7 @@ def from_intervals(cls, data, name=None, copy=False):
493512
left, right, closed = data.left, data.right, data.closed
494513
name = name or data.name
495514
else:
496-
data = maybe_convert_platform(data)
515+
data = _maybe_convert_platform_interval(data)
497516
left, right, closed = intervals_to_interval_bounds(data)
498517
return cls.from_arrays(left, right, closed, name=name, copy=False)
499518

pandas/tests/dtypes/test_dtypes.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ def test_update_dtype(self, dtype, new_dtype):
152152
assert result.ordered is expected_ordered
153153

154154
@pytest.mark.parametrize('bad_dtype', [
155-
'foo', object, np.int64, PeriodDtype('Q'), IntervalDtype(object)])
155+
'foo', object, np.int64, PeriodDtype('Q')])
156156
def test_update_dtype_errors(self, bad_dtype):
157157
dtype = CategoricalDtype(list('abc'), False)
158158
msg = 'a CategoricalDtype must be passed to perform an update, '
@@ -460,6 +460,17 @@ def test_construction(self):
460460
assert i.subtype == np.dtype('int64')
461461
assert is_interval_dtype(i)
462462

463+
@pytest.mark.parametrize('subtype', [
464+
CategoricalDtype(list('abc'), False),
465+
CategoricalDtype(list('wxyz'), True),
466+
object, str, '<U10', 'interval[category]', 'interval[object]'])
467+
def test_construction_not_supported(self, subtype):
468+
# GH 19016
469+
msg = ('category, object, and string subtypes are not supported '
470+
'for IntervalDtype')
471+
with tm.assert_raises_regex(TypeError, msg):
472+
IntervalDtype(subtype)
473+
463474
def test_construction_generic(self):
464475
# generic
465476
i = IntervalDtype('interval')

pandas/tests/indexes/interval/test_interval.py

+46-28
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import numpy as np
55
from pandas import (
66
Interval, IntervalIndex, Index, isna, notna, interval_range, Timestamp,
7-
Timedelta, date_range, timedelta_range)
7+
Timedelta, date_range, timedelta_range, Categorical)
88
from pandas.compat import lzip
99
from pandas.core.common import _asarray_tuplesafe
1010
from pandas.tests.indexes.common import Base
@@ -42,7 +42,6 @@ def create_index_with_nan(self, closed='right'):
4242

4343
@pytest.mark.parametrize('data', [
4444
Index([0, 1, 2, 3, 4]),
45-
Index(list('abcde')),
4645
date_range('2017-01-01', periods=5),
4746
date_range('2017-01-01', periods=5, tz='US/Eastern'),
4847
timedelta_range('1 day', periods=5)])
@@ -138,10 +137,10 @@ def test_constructors_nan(self, closed, data):
138137
[],
139138
np.array([], dtype='int64'),
140139
np.array([], dtype='float64'),
141-
np.array([], dtype=object)])
140+
np.array([], dtype='datetime64[ns]')])
142141
def test_constructors_empty(self, data, closed):
143142
# GH 18421
144-
expected_dtype = data.dtype if isinstance(data, np.ndarray) else object
143+
expected_dtype = getattr(data, 'dtype', np.intp)
145144
expected_values = np.array([], dtype=object)
146145
expected_index = IntervalIndex(data, closed=closed)
147146

@@ -223,6 +222,48 @@ def test_constructors_errors(self):
223222
with tm.assert_raises_regex(ValueError, msg):
224223
IntervalIndex.from_arrays(range(10, -1, -1), range(9, -2, -1))
225224

225+
# GH 19016: categorical data
226+
data = Categorical(list('01234abcde'), ordered=True)
227+
msg = ('category, object, and string subtypes are not supported '
228+
'for IntervalIndex')
229+
230+
with tm.assert_raises_regex(TypeError, msg):
231+
IntervalIndex.from_breaks(data)
232+
233+
with tm.assert_raises_regex(TypeError, msg):
234+
IntervalIndex.from_arrays(data[:-1], data[1:])
235+
236+
@pytest.mark.parametrize('data', [
237+
tuple('0123456789'),
238+
list('abcdefghij'),
239+
np.array(list('abcdefghij'), dtype=object),
240+
np.array(list('abcdefghij'), dtype='<U1')])
241+
def test_constructors_errors_string(self, data):
242+
# GH 19016
243+
left, right = data[:-1], data[1:]
244+
tuples = lzip(left, right)
245+
ivs = [Interval(l, r) for l, r in tuples] or data
246+
msg = ('category, object, and string subtypes are not supported '
247+
'for IntervalIndex')
248+
249+
with tm.assert_raises_regex(TypeError, msg):
250+
IntervalIndex(ivs)
251+
252+
with tm.assert_raises_regex(TypeError, msg):
253+
Index(ivs)
254+
255+
with tm.assert_raises_regex(TypeError, msg):
256+
IntervalIndex.from_intervals(ivs)
257+
258+
with tm.assert_raises_regex(TypeError, msg):
259+
IntervalIndex.from_breaks(data)
260+
261+
with tm.assert_raises_regex(TypeError, msg):
262+
IntervalIndex.from_arrays(left, right)
263+
264+
with tm.assert_raises_regex(TypeError, msg):
265+
IntervalIndex.from_tuples(tuples)
266+
226267
@pytest.mark.parametrize('tz_left, tz_right', [
227268
(None, 'UTC'), ('UTC', None), ('UTC', 'US/Eastern')])
228269
def test_constructors_errors_tz(self, tz_left, tz_right):
@@ -298,18 +339,6 @@ def test_length(self, closed, breaks):
298339
expected = Index(iv.length if notna(iv) else iv for iv in index)
299340
tm.assert_index_equal(result, expected)
300341

301-
@pytest.mark.parametrize('breaks', [
302-
list('abcdefgh'),
303-
lzip(range(10), range(1, 11)),
304-
[['A', 'B'], ['a', 'b'], ['c', 'd'], ['e', 'f']],
305-
[Interval(0, 1), Interval(1, 2), Interval(3, 4), Interval(4, 5)]])
306-
def test_length_errors(self, closed, breaks):
307-
# GH 18789
308-
index = IntervalIndex.from_breaks(breaks)
309-
msg = 'IntervalIndex contains Intervals without defined length'
310-
with tm.assert_raises_regex(TypeError, msg):
311-
index.length
312-
313342
def test_with_nans(self, closed):
314343
index = self.create_index(closed=closed)
315344
assert not index.hasnans
@@ -428,9 +457,7 @@ def test_delete(self, closed):
428457
interval_range(0, periods=10, closed='neither'),
429458
interval_range(1.7, periods=8, freq=2.5, closed='both'),
430459
interval_range(Timestamp('20170101'), periods=12, closed='left'),
431-
interval_range(Timedelta('1 day'), periods=6, closed='right'),
432-
IntervalIndex.from_tuples([('a', 'd'), ('e', 'j'), ('w', 'z')]),
433-
IntervalIndex.from_tuples([(1, 2), ('a', 'z'), (3.14, 6.28)])])
460+
interval_range(Timedelta('1 day'), periods=6, closed='right')])
434461
def test_insert(self, data):
435462
item = data[0]
436463
idx_item = IntervalIndex([item])
@@ -504,15 +531,6 @@ def test_unique(self, closed):
504531
[(0, 1), (0, 1), (2, 3)], closed=closed)
505532
assert not idx.is_unique
506533

507-
# unique mixed
508-
idx = IntervalIndex.from_tuples([(0, 1), ('a', 'b')], closed=closed)
509-
assert idx.is_unique
510-
511-
# duplicate mixed
512-
idx = IntervalIndex.from_tuples(
513-
[(0, 1), ('a', 'b'), (0, 1)], closed=closed)
514-
assert not idx.is_unique
515-
516534
# empty
517535
idx = IntervalIndex([], closed=closed)
518536
assert idx.is_unique

0 commit comments

Comments
 (0)