Skip to content

Commit 856c92b

Browse files
jschendeljreback
authored andcommitted
API: Prohibit non-numeric dtypes in IntervalIndex (#19022)
1 parent c6166b0 commit 856c92b

File tree

5 files changed

+106
-38
lines changed

5 files changed

+106
-38
lines changed

doc/source/whatsnew/v0.23.0.txt

+4-3
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@ Other API Changes
211211
- Subtracting ``NaT`` from a :class:`Series` with ``dtype='datetime64[ns]'`` returns a ``Series`` with ``dtype='timedelta64[ns]'`` instead of ``dtype='datetime64[ns]'``(:issue:`18808`)
212212
- Operations between a :class:`Series` with dtype ``dtype='datetime64[ns]'`` and a :class:`PeriodIndex` will correctly raises ``TypeError`` (:issue:`18850`)
213213
- Subtraction of :class:`Series` with timezone-aware ``dtype='datetime64[ns]'`` with mis-matched timezones will raise ``TypeError`` instead of ``ValueError`` (issue:`18817`)
214+
- :class:`IntervalIndex` and ``IntervalDtype`` no longer support categorical, object, and string subtypes (:issue:`19016`)
214215
- The default ``Timedelta`` constructor now accepts an ``ISO 8601 Duration`` string as an argument (:issue:`19040`)
215216

216217
.. _whatsnew_0230.deprecations:
@@ -279,11 +280,11 @@ Performance Improvements
279280
Documentation Changes
280281
~~~~~~~~~~~~~~~~~~~~~
281282

282-
- Changed spelling of "numpy" to "NumPy", and "python" to "Python". (:issue:`19017`)
283+
- Changed spelling of "numpy" to "NumPy", and "python" to "Python". (:issue:`19017`)
283284
- Consistency when introducing code samples, using either colon or period.
284285
Rewrote some sentences for greater clarity, added more dynamic references
285286
to functions, methods and classes.
286-
(:issue:`18941`, :issue:`18948`, :issue:`18973`, :issue:`19017`)
287+
(:issue:`18941`, :issue:`18948`, :issue:`18973`, :issue:`19017`)
287288
-
288289

289290
.. _whatsnew_0230.bug_fixes:
@@ -310,7 +311,7 @@ Conversion
310311
- Bug in :class:`DatetimeIndex` where adding or subtracting an array-like of ``DateOffset`` objects either raised (``np.array``, ``pd.Index``) or broadcast incorrectly (``pd.Series``) (:issue:`18849`)
311312
- Bug in :class:`Series` floor-division where operating on a scalar ``timedelta`` raises an exception (:issue:`18846`)
312313
- Bug in :class:`FY5253Quarter`, :class:`LastWeekOfMonth` where rollback and rollforward behavior was inconsistent with addition and subtraction behavior (:issue:`18854`)
313-
- Bug in :class:`Index` constructor with ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (issue:`19032`)
314+
- Bug in :class:`Index` constructor with ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (issue:`19032`)
314315

315316

316317
Indexing

pandas/core/dtypes/dtypes.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -641,6 +641,8 @@ def __new__(cls, subtype=None):
641641
----------
642642
subtype : the dtype of the Interval
643643
"""
644+
from pandas.core.dtypes.common import (
645+
is_categorical_dtype, is_string_dtype, pandas_dtype)
644646

645647
if isinstance(subtype, IntervalDtype):
646648
return subtype
@@ -659,7 +661,6 @@ def __new__(cls, subtype=None):
659661
if m is not None:
660662
subtype = m.group('subtype')
661663

662-
from pandas.core.dtypes.common import pandas_dtype
663664
try:
664665
subtype = pandas_dtype(subtype)
665666
except TypeError:
@@ -670,6 +671,12 @@ def __new__(cls, subtype=None):
670671
u.subtype = None
671672
return u
672673

674+
if is_categorical_dtype(subtype) or is_string_dtype(subtype):
675+
# GH 19016
676+
msg = ('category, object, and string subtypes are not supported '
677+
'for IntervalDtype')
678+
raise TypeError(msg)
679+
673680
try:
674681
return cls._cache[str(subtype)]
675682
except KeyError:

pandas/core/indexes/interval.py

+36-5
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
is_list_like,
1212
is_datetime_or_timedelta_dtype,
1313
is_datetime64tz_dtype,
14+
is_categorical_dtype,
15+
is_string_dtype,
1416
is_integer_dtype,
1517
is_float_dtype,
1618
is_interval_dtype,
@@ -92,6 +94,30 @@ def _get_interval_closed_bounds(interval):
9294
return left, right
9395

9496

97+
def maybe_convert_platform_interval(values):
98+
"""
99+
Try to do platform conversion, with special casing for IntervalIndex.
100+
Wrapper around maybe_convert_platform that alters the default return
101+
dtype in certain cases to be compatible with IntervalIndex. For example,
102+
empty lists return with integer dtype instead of object dtype, which is
103+
prohibited for IntervalIndex.
104+
105+
Parameters
106+
----------
107+
values : array-like
108+
109+
Returns
110+
-------
111+
array
112+
"""
113+
if isinstance(values, (list, tuple)) and len(values) == 0:
114+
# GH 19016
115+
# empty lists/tuples get object dtype by default, but this is not
116+
# prohibited for IntervalIndex, so coerce to integer instead
117+
return np.array([], dtype=np.intp)
118+
return maybe_convert_platform(values)
119+
120+
95121
def _new_IntervalIndex(cls, d):
96122
"""
97123
This is called upon unpickling, rather than the default which doesn't have
@@ -206,7 +232,7 @@ def __new__(cls, data, closed=None,
206232
if is_scalar(data):
207233
cls._scalar_data_error(data)
208234

209-
data = maybe_convert_platform(data)
235+
data = maybe_convert_platform_interval(data)
210236
left, right, infer_closed = intervals_to_interval_bounds(data)
211237

212238
if _all_not_none(closed, infer_closed) and closed != infer_closed:
@@ -242,6 +268,11 @@ def _simple_new(cls, left, right, closed=None, name=None,
242268
'[{rtype}] types')
243269
raise ValueError(msg.format(ltype=type(left).__name__,
244270
rtype=type(right).__name__))
271+
elif is_categorical_dtype(left.dtype) or is_string_dtype(left.dtype):
272+
# GH 19016
273+
msg = ('category, object, and string subtypes are not supported '
274+
'for IntervalIndex')
275+
raise TypeError(msg)
245276
elif isinstance(left, ABCPeriodIndex):
246277
msg = 'Period dtypes are not supported, use a PeriodIndex instead'
247278
raise ValueError(msg)
@@ -403,7 +434,7 @@ def from_breaks(cls, breaks, closed='right', name=None, copy=False):
403434
IntervalIndex.from_tuples : Construct an IntervalIndex from a
404435
list/array of tuples
405436
"""
406-
breaks = maybe_convert_platform(breaks)
437+
breaks = maybe_convert_platform_interval(breaks)
407438

408439
return cls.from_arrays(breaks[:-1], breaks[1:], closed,
409440
name=name, copy=copy)
@@ -444,8 +475,8 @@ def from_arrays(cls, left, right, closed='right', name=None, copy=False):
444475
IntervalIndex.from_tuples : Construct an IntervalIndex from a
445476
list/array of tuples
446477
"""
447-
left = maybe_convert_platform(left)
448-
right = maybe_convert_platform(right)
478+
left = maybe_convert_platform_interval(left)
479+
right = maybe_convert_platform_interval(right)
449480

450481
return cls._simple_new(left, right, closed, name=name,
451482
copy=copy, verify_integrity=True)
@@ -493,7 +524,7 @@ def from_intervals(cls, data, name=None, copy=False):
493524
left, right, closed = data.left, data.right, data.closed
494525
name = name or data.name
495526
else:
496-
data = maybe_convert_platform(data)
527+
data = maybe_convert_platform_interval(data)
497528
left, right, closed = intervals_to_interval_bounds(data)
498529
return cls.from_arrays(left, right, closed, name=name, copy=False)
499530

pandas/tests/dtypes/test_dtypes.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ def test_update_dtype(self, dtype, new_dtype):
152152
assert result.ordered is expected_ordered
153153

154154
@pytest.mark.parametrize('bad_dtype', [
155-
'foo', object, np.int64, PeriodDtype('Q'), IntervalDtype(object)])
155+
'foo', object, np.int64, PeriodDtype('Q')])
156156
def test_update_dtype_errors(self, bad_dtype):
157157
dtype = CategoricalDtype(list('abc'), False)
158158
msg = 'a CategoricalDtype must be passed to perform an update, '
@@ -460,6 +460,17 @@ def test_construction(self):
460460
assert i.subtype == np.dtype('int64')
461461
assert is_interval_dtype(i)
462462

463+
@pytest.mark.parametrize('subtype', [
464+
CategoricalDtype(list('abc'), False),
465+
CategoricalDtype(list('wxyz'), True),
466+
object, str, '<U10', 'interval[category]', 'interval[object]'])
467+
def test_construction_not_supported(self, subtype):
468+
# GH 19016
469+
msg = ('category, object, and string subtypes are not supported '
470+
'for IntervalDtype')
471+
with tm.assert_raises_regex(TypeError, msg):
472+
IntervalDtype(subtype)
473+
463474
def test_construction_generic(self):
464475
# generic
465476
i = IntervalDtype('interval')

pandas/tests/indexes/interval/test_interval.py

+46-28
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import numpy as np
55
from pandas import (
66
Interval, IntervalIndex, Index, isna, notna, interval_range, Timestamp,
7-
Timedelta, date_range, timedelta_range)
7+
Timedelta, date_range, timedelta_range, Categorical)
88
from pandas.compat import lzip
99
from pandas.core.common import _asarray_tuplesafe
1010
from pandas.tests.indexes.common import Base
@@ -42,7 +42,6 @@ def create_index_with_nan(self, closed='right'):
4242

4343
@pytest.mark.parametrize('data', [
4444
Index([0, 1, 2, 3, 4]),
45-
Index(list('abcde')),
4645
date_range('2017-01-01', periods=5),
4746
date_range('2017-01-01', periods=5, tz='US/Eastern'),
4847
timedelta_range('1 day', periods=5)])
@@ -138,10 +137,10 @@ def test_constructors_nan(self, closed, data):
138137
[],
139138
np.array([], dtype='int64'),
140139
np.array([], dtype='float64'),
141-
np.array([], dtype=object)])
140+
np.array([], dtype='datetime64[ns]')])
142141
def test_constructors_empty(self, data, closed):
143142
# GH 18421
144-
expected_dtype = data.dtype if isinstance(data, np.ndarray) else object
143+
expected_dtype = getattr(data, 'dtype', np.intp)
145144
expected_values = np.array([], dtype=object)
146145
expected_index = IntervalIndex(data, closed=closed)
147146

@@ -223,6 +222,48 @@ def test_constructors_errors(self):
223222
with tm.assert_raises_regex(ValueError, msg):
224223
IntervalIndex.from_arrays(range(10, -1, -1), range(9, -2, -1))
225224

225+
# GH 19016: categorical data
226+
data = Categorical(list('01234abcde'), ordered=True)
227+
msg = ('category, object, and string subtypes are not supported '
228+
'for IntervalIndex')
229+
230+
with tm.assert_raises_regex(TypeError, msg):
231+
IntervalIndex.from_breaks(data)
232+
233+
with tm.assert_raises_regex(TypeError, msg):
234+
IntervalIndex.from_arrays(data[:-1], data[1:])
235+
236+
@pytest.mark.parametrize('data', [
237+
tuple('0123456789'),
238+
list('abcdefghij'),
239+
np.array(list('abcdefghij'), dtype=object),
240+
np.array(list('abcdefghij'), dtype='<U1')])
241+
def test_constructors_errors_string(self, data):
242+
# GH 19016
243+
left, right = data[:-1], data[1:]
244+
tuples = lzip(left, right)
245+
ivs = [Interval(l, r) for l, r in tuples] or data
246+
msg = ('category, object, and string subtypes are not supported '
247+
'for IntervalIndex')
248+
249+
with tm.assert_raises_regex(TypeError, msg):
250+
IntervalIndex(ivs)
251+
252+
with tm.assert_raises_regex(TypeError, msg):
253+
Index(ivs)
254+
255+
with tm.assert_raises_regex(TypeError, msg):
256+
IntervalIndex.from_intervals(ivs)
257+
258+
with tm.assert_raises_regex(TypeError, msg):
259+
IntervalIndex.from_breaks(data)
260+
261+
with tm.assert_raises_regex(TypeError, msg):
262+
IntervalIndex.from_arrays(left, right)
263+
264+
with tm.assert_raises_regex(TypeError, msg):
265+
IntervalIndex.from_tuples(tuples)
266+
226267
@pytest.mark.parametrize('tz_left, tz_right', [
227268
(None, 'UTC'), ('UTC', None), ('UTC', 'US/Eastern')])
228269
def test_constructors_errors_tz(self, tz_left, tz_right):
@@ -298,18 +339,6 @@ def test_length(self, closed, breaks):
298339
expected = Index(iv.length if notna(iv) else iv for iv in index)
299340
tm.assert_index_equal(result, expected)
300341

301-
@pytest.mark.parametrize('breaks', [
302-
list('abcdefgh'),
303-
lzip(range(10), range(1, 11)),
304-
[['A', 'B'], ['a', 'b'], ['c', 'd'], ['e', 'f']],
305-
[Interval(0, 1), Interval(1, 2), Interval(3, 4), Interval(4, 5)]])
306-
def test_length_errors(self, closed, breaks):
307-
# GH 18789
308-
index = IntervalIndex.from_breaks(breaks)
309-
msg = 'IntervalIndex contains Intervals without defined length'
310-
with tm.assert_raises_regex(TypeError, msg):
311-
index.length
312-
313342
def test_with_nans(self, closed):
314343
index = self.create_index(closed=closed)
315344
assert not index.hasnans
@@ -428,9 +457,7 @@ def test_delete(self, closed):
428457
interval_range(0, periods=10, closed='neither'),
429458
interval_range(1.7, periods=8, freq=2.5, closed='both'),
430459
interval_range(Timestamp('20170101'), periods=12, closed='left'),
431-
interval_range(Timedelta('1 day'), periods=6, closed='right'),
432-
IntervalIndex.from_tuples([('a', 'd'), ('e', 'j'), ('w', 'z')]),
433-
IntervalIndex.from_tuples([(1, 2), ('a', 'z'), (3.14, 6.28)])])
460+
interval_range(Timedelta('1 day'), periods=6, closed='right')])
434461
def test_insert(self, data):
435462
item = data[0]
436463
idx_item = IntervalIndex([item])
@@ -504,15 +531,6 @@ def test_unique(self, closed):
504531
[(0, 1), (0, 1), (2, 3)], closed=closed)
505532
assert not idx.is_unique
506533

507-
# unique mixed
508-
idx = IntervalIndex.from_tuples([(0, 1), ('a', 'b')], closed=closed)
509-
assert idx.is_unique
510-
511-
# duplicate mixed
512-
idx = IntervalIndex.from_tuples(
513-
[(0, 1), ('a', 'b'), (0, 1)], closed=closed)
514-
assert not idx.is_unique
515-
516534
# empty
517535
idx = IntervalIndex([], closed=closed)
518536
assert idx.is_unique

0 commit comments

Comments
 (0)