-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
API: Prohibit non-numeric dtypes in IntervalIndex #19022
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,8 @@ | |
is_list_like, | ||
is_datetime_or_timedelta_dtype, | ||
is_datetime64tz_dtype, | ||
is_categorical_dtype, | ||
is_string_dtype, | ||
is_integer_dtype, | ||
is_float_dtype, | ||
is_interval_dtype, | ||
|
@@ -92,6 +94,30 @@ def _get_interval_closed_bounds(interval): | |
return left, right | ||
|
||
|
||
def maybe_convert_platform_interval(values): | ||
""" | ||
Try to do platform conversion, with special casing for IntervalIndex. | ||
Wrapper around maybe_convert_platform that alters the default return | ||
dtype in certain cases to be compatible with IntervalIndex. For example, | ||
empty lists return with integer dtype instead of object dtype, which is | ||
prohibited for IntervalIndex. | ||
|
||
Parameters | ||
---------- | ||
values : array-like | ||
|
||
Returns | ||
------- | ||
array | ||
""" | ||
if isinstance(values, (list, tuple)) and len(values) == 0: | ||
# GH 19016 | ||
# empty lists/tuples get object dtype by default, but this is not | ||
# prohibited for IntervalIndex, so coerce to integer instead | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The comment here refers to the existing behavior of constructing an In [2]: pd.IntervalIndex([])
Out[2]:
IntervalIndex([]
closed='right',
dtype='interval[object]') The changes in this PR caused this to raise, since the inferred subtype was object, which is now being disallowed. Changed this to be instead be integer by default. Don't have any strong reason for choosing integer, so can change. |
||
return np.array([], dtype=np.intp) | ||
return maybe_convert_platform(values) | ||
|
||
|
||
def _new_IntervalIndex(cls, d): | ||
""" | ||
This is called upon unpickling, rather than the default which doesn't have | ||
|
@@ -206,7 +232,7 @@ def __new__(cls, data, closed=None, | |
if is_scalar(data): | ||
cls._scalar_data_error(data) | ||
|
||
data = maybe_convert_platform(data) | ||
data = maybe_convert_platform_interval(data) | ||
left, right, infer_closed = intervals_to_interval_bounds(data) | ||
|
||
if _all_not_none(closed, infer_closed) and closed != infer_closed: | ||
|
@@ -242,6 +268,11 @@ def _simple_new(cls, left, right, closed=None, name=None, | |
'[{rtype}] types') | ||
raise ValueError(msg.format(ltype=type(left).__name__, | ||
rtype=type(right).__name__)) | ||
elif is_categorical_dtype(left.dtype) or is_string_dtype(left.dtype): | ||
# GH 19016 | ||
msg = ('category, object, and string subtypes are not supported ' | ||
'for IntervalIndex') | ||
raise TypeError(msg) | ||
elif isinstance(left, ABCPeriodIndex): | ||
msg = 'Period dtypes are not supported, use a PeriodIndex instead' | ||
raise ValueError(msg) | ||
|
@@ -403,7 +434,7 @@ def from_breaks(cls, breaks, closed='right', name=None, copy=False): | |
IntervalIndex.from_tuples : Construct an IntervalIndex from a | ||
list/array of tuples | ||
""" | ||
breaks = maybe_convert_platform(breaks) | ||
breaks = maybe_convert_platform_interval(breaks) | ||
|
||
return cls.from_arrays(breaks[:-1], breaks[1:], closed, | ||
name=name, copy=copy) | ||
|
@@ -444,8 +475,8 @@ def from_arrays(cls, left, right, closed='right', name=None, copy=False): | |
IntervalIndex.from_tuples : Construct an IntervalIndex from a | ||
list/array of tuples | ||
""" | ||
left = maybe_convert_platform(left) | ||
right = maybe_convert_platform(right) | ||
left = maybe_convert_platform_interval(left) | ||
right = maybe_convert_platform_interval(right) | ||
|
||
return cls._simple_new(left, right, closed, name=name, | ||
copy=copy, verify_integrity=True) | ||
|
@@ -493,7 +524,7 @@ def from_intervals(cls, data, name=None, copy=False): | |
left, right, closed = data.left, data.right, data.closed | ||
name = name or data.name | ||
else: | ||
data = maybe_convert_platform(data) | ||
data = maybe_convert_platform_interval(data) | ||
left, right, closed = intervals_to_interval_bounds(data) | ||
return cls.from_arrays(left, right, closed, name=name, copy=False) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,7 @@ | |
import numpy as np | ||
from pandas import ( | ||
Interval, IntervalIndex, Index, isna, notna, interval_range, Timestamp, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe your test for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
Timedelta, date_range, timedelta_range) | ||
Timedelta, date_range, timedelta_range, Categorical) | ||
from pandas.compat import lzip | ||
from pandas.core.common import _asarray_tuplesafe | ||
from pandas.tests.indexes.common import Base | ||
|
@@ -42,7 +42,6 @@ def create_index_with_nan(self, closed='right'): | |
|
||
@pytest.mark.parametrize('data', [ | ||
Index([0, 1, 2, 3, 4]), | ||
Index(list('abcde')), | ||
date_range('2017-01-01', periods=5), | ||
date_range('2017-01-01', periods=5, tz='US/Eastern'), | ||
timedelta_range('1 day', periods=5)]) | ||
|
@@ -138,10 +137,10 @@ def test_constructors_nan(self, closed, data): | |
[], | ||
np.array([], dtype='int64'), | ||
np.array([], dtype='float64'), | ||
np.array([], dtype=object)]) | ||
np.array([], dtype='datetime64[ns]')]) | ||
def test_constructors_empty(self, data, closed): | ||
# GH 18421 | ||
expected_dtype = data.dtype if isinstance(data, np.ndarray) else object | ||
expected_dtype = getattr(data, 'dtype', np.intp) | ||
expected_values = np.array([], dtype=object) | ||
expected_index = IntervalIndex(data, closed=closed) | ||
|
||
|
@@ -223,6 +222,48 @@ def test_constructors_errors(self): | |
with tm.assert_raises_regex(ValueError, msg): | ||
IntervalIndex.from_arrays(range(10, -1, -1), range(9, -2, -1)) | ||
|
||
# GH 19016: categorical data | ||
data = Categorical(list('01234abcde'), ordered=True) | ||
msg = ('category, object, and string subtypes are not supported ' | ||
'for IntervalIndex') | ||
|
||
with tm.assert_raises_regex(TypeError, msg): | ||
IntervalIndex.from_breaks(data) | ||
|
||
with tm.assert_raises_regex(TypeError, msg): | ||
IntervalIndex.from_arrays(data[:-1], data[1:]) | ||
|
||
@pytest.mark.parametrize('data', [ | ||
tuple('0123456789'), | ||
list('abcdefghij'), | ||
np.array(list('abcdefghij'), dtype=object), | ||
np.array(list('abcdefghij'), dtype='<U1')]) | ||
def test_constructors_errors_string(self, data): | ||
# GH 19016 | ||
left, right = data[:-1], data[1:] | ||
tuples = lzip(left, right) | ||
ivs = [Interval(l, r) for l, r in tuples] or data | ||
msg = ('category, object, and string subtypes are not supported ' | ||
'for IntervalIndex') | ||
|
||
with tm.assert_raises_regex(TypeError, msg): | ||
IntervalIndex(ivs) | ||
|
||
with tm.assert_raises_regex(TypeError, msg): | ||
Index(ivs) | ||
|
||
with tm.assert_raises_regex(TypeError, msg): | ||
IntervalIndex.from_intervals(ivs) | ||
|
||
with tm.assert_raises_regex(TypeError, msg): | ||
IntervalIndex.from_breaks(data) | ||
|
||
with tm.assert_raises_regex(TypeError, msg): | ||
IntervalIndex.from_arrays(left, right) | ||
|
||
with tm.assert_raises_regex(TypeError, msg): | ||
IntervalIndex.from_tuples(tuples) | ||
|
||
@pytest.mark.parametrize('tz_left, tz_right', [ | ||
(None, 'UTC'), ('UTC', None), ('UTC', 'US/Eastern')]) | ||
def test_constructors_errors_tz(self, tz_left, tz_right): | ||
|
@@ -298,18 +339,6 @@ def test_length(self, closed, breaks): | |
expected = Index(iv.length if notna(iv) else iv for iv in index) | ||
tm.assert_index_equal(result, expected) | ||
|
||
@pytest.mark.parametrize('breaks', [ | ||
list('abcdefgh'), | ||
lzip(range(10), range(1, 11)), | ||
[['A', 'B'], ['a', 'b'], ['c', 'd'], ['e', 'f']], | ||
[Interval(0, 1), Interval(1, 2), Interval(3, 4), Interval(4, 5)]]) | ||
def test_length_errors(self, closed, breaks): | ||
# GH 18789 | ||
index = IntervalIndex.from_breaks(breaks) | ||
msg = 'IntervalIndex contains Intervals without defined length' | ||
with tm.assert_raises_regex(TypeError, msg): | ||
index.length | ||
|
||
def test_with_nans(self, closed): | ||
index = self.create_index(closed=closed) | ||
assert not index.hasnans | ||
|
@@ -428,9 +457,7 @@ def test_delete(self, closed): | |
interval_range(0, periods=10, closed='neither'), | ||
interval_range(1.7, periods=8, freq=2.5, closed='both'), | ||
interval_range(Timestamp('20170101'), periods=12, closed='left'), | ||
interval_range(Timedelta('1 day'), periods=6, closed='right'), | ||
IntervalIndex.from_tuples([('a', 'd'), ('e', 'j'), ('w', 'z')]), | ||
IntervalIndex.from_tuples([(1, 2), ('a', 'z'), (3.14, 6.28)])]) | ||
interval_range(Timedelta('1 day'), periods=6, closed='right')]) | ||
def test_insert(self, data): | ||
item = data[0] | ||
idx_item = IntervalIndex([item]) | ||
|
@@ -504,15 +531,6 @@ def test_unique(self, closed): | |
[(0, 1), (0, 1), (2, 3)], closed=closed) | ||
assert not idx.is_unique | ||
|
||
# unique mixed | ||
idx = IntervalIndex.from_tuples([(0, 1), ('a', 'b')], closed=closed) | ||
assert idx.is_unique | ||
|
||
# duplicate mixed | ||
idx = IntervalIndex.from_tuples( | ||
[(0, 1), ('a', 'b'), (0, 1)], closed=closed) | ||
assert not idx.is_unique | ||
|
||
# empty | ||
idx = IntervalIndex([], closed=closed) | ||
assert idx.is_unique | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you add a doc-string, you can de-privatize (no leading _)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done