Skip to content

Commit de4b384

Browse files
jschendeljreback
authored andcommitted
BUG: Fix IntervalIndex constructor inconsistencies (#18424)
1 parent 154c416 commit de4b384

File tree

5 files changed

+134
-57
lines changed

5 files changed

+134
-57
lines changed

doc/source/whatsnew/v0.22.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ Other API Changes
7474
- `tseries.frequencies.get_freq_group()` and `tseries.frequencies.DAYS` are removed from the public API (:issue:`18034`)
7575
- :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`)
7676
- :func:`Dataframe.unstack` will now default to filling with ``np.nan`` for ``object`` columns. (:issue:`12815`)
77+
- :class:`IntervalIndex` constructor will raise if the ``closed`` parameter conflicts with how the input data is inferred to be closed (:issue:`18421`)
7778

7879

7980
.. _whatsnew_0220.deprecations:
@@ -137,6 +138,7 @@ Indexing
137138
- Bug in :func:`Series.truncate` which raises ``TypeError`` with a monotonic ``PeriodIndex`` (:issue:`17717`)
138139
- Bug in :func:`DataFrame.groupby` where tuples were interpreted as lists of keys rather than as keys (:issue:`17979`, :issue:`18249`)
139140
- Bug in :func:`MultiIndex.remove_unused_levels`` which would fill nan values (:issue:`18417`)
141+
- Bug in :class:`IntervalIndex` where empty and purely NA data was constructed inconsistently depending on the construction method (:issue:`18421`)
140142
-
141143

142144
I/O

pandas/_libs/interval.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -211,8 +211,8 @@ cpdef intervals_to_interval_bounds(ndarray intervals):
211211
int64_t n = len(intervals)
212212
ndarray left, right
213213

214-
left = np.empty(n, dtype=object)
215-
right = np.empty(n, dtype=object)
214+
left = np.empty(n, dtype=intervals.dtype)
215+
right = np.empty(n, dtype=intervals.dtype)
216216

217217
for i in range(len(intervals)):
218218
interval = intervals[i]

pandas/core/indexes/interval.py

+39-18
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from pandas.core.dtypes.missing import notna, isna
66
from pandas.core.dtypes.generic import ABCPeriodIndex
77
from pandas.core.dtypes.dtypes import IntervalDtype
8+
from pandas.core.dtypes.cast import maybe_convert_platform
89
from pandas.core.dtypes.common import (
910
_ensure_platform_int,
1011
is_list_like,
@@ -31,7 +32,9 @@
3132
from pandas.core.indexes.timedeltas import timedelta_range
3233
from pandas.core.indexes.multi import MultiIndex
3334
from pandas.compat.numpy import function as nv
34-
from pandas.core import common as com
35+
from pandas.core.common import (
36+
_all_not_none, _any_none, _asarray_tuplesafe, _count_not_none,
37+
is_bool_indexer, _maybe_box_datetimelike, _not_none)
3538
from pandas.util._decorators import cache_readonly, Appender
3639
from pandas.core.config import get_option
3740
from pandas.tseries.frequencies import to_offset
@@ -176,7 +179,7 @@ class IntervalIndex(IntervalMixin, Index):
176179

177180
_mask = None
178181

179-
def __new__(cls, data, closed='right',
182+
def __new__(cls, data, closed=None,
180183
name=None, copy=False, dtype=None,
181184
fastpath=False, verify_integrity=True):
182185

@@ -197,8 +200,17 @@ def __new__(cls, data, closed='right',
197200
if is_scalar(data):
198201
cls._scalar_data_error(data)
199202

200-
data = IntervalIndex.from_intervals(data, name=name)
201-
left, right, closed = data.left, data.right, data.closed
203+
data = maybe_convert_platform(data)
204+
left, right, infer_closed = intervals_to_interval_bounds(data)
205+
206+
if _all_not_none(closed, infer_closed) and closed != infer_closed:
207+
# GH 18421
208+
msg = ("conflicting values for closed: constructor got "
209+
"'{closed}', inferred from data '{infer_closed}'"
210+
.format(closed=closed, infer_closed=infer_closed))
211+
raise ValueError(msg)
212+
213+
closed = closed or infer_closed
202214

203215
return cls._simple_new(left, right, closed, name,
204216
copy=copy, verify_integrity=verify_integrity)
@@ -376,7 +388,8 @@ def from_breaks(cls, breaks, closed='right', name=None, copy=False):
376388
IntervalIndex.from_tuples : Construct an IntervalIndex from a
377389
list/array of tuples
378390
"""
379-
breaks = np.asarray(breaks)
391+
breaks = maybe_convert_platform(breaks)
392+
380393
return cls.from_arrays(breaks[:-1], breaks[1:], closed,
381394
name=name, copy=copy)
382395

@@ -416,8 +429,9 @@ def from_arrays(cls, left, right, closed='right', name=None, copy=False):
416429
IntervalIndex.from_tuples : Construct an IntervalIndex from a
417430
list/array of tuples
418431
"""
419-
left = np.asarray(left)
420-
right = np.asarray(right)
432+
left = maybe_convert_platform(left)
433+
right = maybe_convert_platform(right)
434+
421435
return cls._simple_new(left, right, closed, name=name,
422436
copy=copy, verify_integrity=True)
423437

@@ -460,8 +474,12 @@ def from_intervals(cls, data, name=None, copy=False):
460474
IntervalIndex.from_tuples : Construct an IntervalIndex from a
461475
list/array of tuples
462476
"""
463-
data = np.asarray(data)
464-
left, right, closed = intervals_to_interval_bounds(data)
477+
if isinstance(data, IntervalIndex):
478+
left, right, closed = data.left, data.right, data.closed
479+
name = name or data.name
480+
else:
481+
data = maybe_convert_platform(data)
482+
left, right, closed = intervals_to_interval_bounds(data)
465483
return cls.from_arrays(left, right, closed, name=name, copy=False)
466484

467485
@classmethod
@@ -497,8 +515,11 @@ def from_tuples(cls, data, closed='right', name=None, copy=False):
497515
IntervalIndex.from_intervals : Construct an IntervalIndex from an array
498516
of Interval objects
499517
"""
500-
left = []
501-
right = []
518+
if len(data):
519+
left, right = [], []
520+
else:
521+
left = right = data
522+
502523
for d in data:
503524

504525
if isna(d):
@@ -517,7 +538,7 @@ def from_tuples(cls, data, closed='right', name=None, copy=False):
517538
return cls.from_arrays(left, right, closed, name=name, copy=False)
518539

519540
def to_tuples(self):
520-
return Index(com._asarray_tuplesafe(zip(self.left, self.right)))
541+
return Index(_asarray_tuplesafe(zip(self.left, self.right)))
521542

522543
@cache_readonly
523544
def _multiindex(self):
@@ -838,7 +859,7 @@ def get_loc(self, key, method=None):
838859
return self._engine.get_loc(key)
839860

840861
def get_value(self, series, key):
841-
if com.is_bool_indexer(key):
862+
if is_bool_indexer(key):
842863
loc = key
843864
elif is_list_like(key):
844865
loc = self.get_indexer(key)
@@ -1166,7 +1187,7 @@ def _is_type_compatible(a, b):
11661187
return ((is_number(a) and is_number(b)) or
11671188
(is_ts_compat(a) and is_ts_compat(b)) or
11681189
(is_td_compat(a) and is_td_compat(b)) or
1169-
com._any_none(a, b))
1190+
_any_none(a, b))
11701191

11711192

11721193
def interval_range(start=None, end=None, periods=None, freq=None,
@@ -1244,13 +1265,13 @@ def interval_range(start=None, end=None, periods=None, freq=None,
12441265
--------
12451266
IntervalIndex : an Index of intervals that are all closed on the same side.
12461267
"""
1247-
if com._count_not_none(start, end, periods) != 2:
1268+
if _count_not_none(start, end, periods) != 2:
12481269
raise ValueError('Of the three parameters: start, end, and periods, '
12491270
'exactly two must be specified')
12501271

1251-
start = com._maybe_box_datetimelike(start)
1252-
end = com._maybe_box_datetimelike(end)
1253-
endpoint = next(com._not_none(start, end))
1272+
start = _maybe_box_datetimelike(start)
1273+
end = _maybe_box_datetimelike(end)
1274+
endpoint = next(_not_none(start, end))
12541275

12551276
if not _is_valid_endpoint(start):
12561277
msg = 'start must be numeric or datetime-like, got {start}'

pandas/tests/indexes/test_interval.py

+90-36
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from pandas import (Interval, IntervalIndex, Index, isna,
77
interval_range, Timestamp, Timedelta,
88
compat, date_range, timedelta_range, DateOffset)
9-
from pandas.compat import zip
9+
from pandas.compat import lzip
1010
from pandas.tseries.offsets import Day
1111
from pandas._libs.interval import IntervalTree
1212
from pandas.tests.indexes.common import Base
@@ -38,7 +38,7 @@ def create_index_with_nan(self, closed='right'):
3838
@pytest.mark.parametrize('name', [None, 'foo'])
3939
def test_constructors(self, closed, name):
4040
left, right = Index([0, 1, 2, 3]), Index([1, 2, 3, 4])
41-
ivs = [Interval(l, r, closed=closed) for l, r in zip(left, right)]
41+
ivs = [Interval(l, r, closed=closed) for l, r in lzip(left, right)]
4242
expected = IntervalIndex._simple_new(
4343
left=left, right=right, closed=closed, name=name)
4444

@@ -57,7 +57,7 @@ def test_constructors(self, closed, name):
5757
tm.assert_index_equal(result, expected)
5858

5959
result = IntervalIndex.from_tuples(
60-
zip(left, right), closed=closed, name=name)
60+
lzip(left, right), closed=closed, name=name)
6161
tm.assert_index_equal(result, expected)
6262

6363
result = Index(ivs, name=name)
@@ -68,6 +68,9 @@ def test_constructors(self, closed, name):
6868
tm.assert_index_equal(Index(expected), expected)
6969
tm.assert_index_equal(IntervalIndex(expected), expected)
7070

71+
result = IntervalIndex.from_intervals(expected)
72+
tm.assert_index_equal(result, expected)
73+
7174
result = IntervalIndex.from_intervals(
7275
expected.values, name=expected.name)
7376
tm.assert_index_equal(result, expected)
@@ -86,63 +89,118 @@ def test_constructors(self, closed, name):
8689
breaks, closed=expected.closed, name=expected.name)
8790
tm.assert_index_equal(result, expected)
8891

89-
def test_constructors_other(self):
90-
91-
# all-nan
92-
result = IntervalIndex.from_intervals([np.nan])
93-
expected = np.array([np.nan], dtype=object)
94-
tm.assert_numpy_array_equal(result.values, expected)
95-
96-
# empty
97-
result = IntervalIndex.from_intervals([])
98-
expected = np.array([], dtype=object)
99-
tm.assert_numpy_array_equal(result.values, expected)
92+
@pytest.mark.parametrize('data', [[np.nan], [np.nan] * 2, [np.nan] * 50])
93+
def test_constructors_nan(self, closed, data):
94+
# GH 18421
95+
expected_values = np.array(data, dtype=object)
96+
expected_idx = IntervalIndex(data, closed=closed)
97+
98+
# validate the expected index
99+
assert expected_idx.closed == closed
100+
tm.assert_numpy_array_equal(expected_idx.values, expected_values)
101+
102+
result = IntervalIndex.from_tuples(data, closed=closed)
103+
tm.assert_index_equal(result, expected_idx)
104+
tm.assert_numpy_array_equal(result.values, expected_values)
105+
106+
result = IntervalIndex.from_breaks([np.nan] + data, closed=closed)
107+
tm.assert_index_equal(result, expected_idx)
108+
tm.assert_numpy_array_equal(result.values, expected_values)
109+
110+
result = IntervalIndex.from_arrays(data, data, closed=closed)
111+
tm.assert_index_equal(result, expected_idx)
112+
tm.assert_numpy_array_equal(result.values, expected_values)
113+
114+
if closed == 'right':
115+
# Can't specify closed for IntervalIndex.from_intervals
116+
result = IntervalIndex.from_intervals(data)
117+
tm.assert_index_equal(result, expected_idx)
118+
tm.assert_numpy_array_equal(result.values, expected_values)
119+
120+
@pytest.mark.parametrize('data', [
121+
[],
122+
np.array([], dtype='int64'),
123+
np.array([], dtype='float64'),
124+
np.array([], dtype=object)])
125+
def test_constructors_empty(self, data, closed):
126+
# GH 18421
127+
expected_dtype = data.dtype if isinstance(data, np.ndarray) else object
128+
expected_values = np.array([], dtype=object)
129+
expected_index = IntervalIndex(data, closed=closed)
130+
131+
# validate the expected index
132+
assert expected_index.empty
133+
assert expected_index.closed == closed
134+
assert expected_index.dtype.subtype == expected_dtype
135+
tm.assert_numpy_array_equal(expected_index.values, expected_values)
136+
137+
result = IntervalIndex.from_tuples(data, closed=closed)
138+
tm.assert_index_equal(result, expected_index)
139+
tm.assert_numpy_array_equal(result.values, expected_values)
140+
141+
result = IntervalIndex.from_breaks(data, closed=closed)
142+
tm.assert_index_equal(result, expected_index)
143+
tm.assert_numpy_array_equal(result.values, expected_values)
144+
145+
result = IntervalIndex.from_arrays(data, data, closed=closed)
146+
tm.assert_index_equal(result, expected_index)
147+
tm.assert_numpy_array_equal(result.values, expected_values)
148+
149+
if closed == 'right':
150+
# Can't specify closed for IntervalIndex.from_intervals
151+
result = IntervalIndex.from_intervals(data)
152+
tm.assert_index_equal(result, expected_index)
153+
tm.assert_numpy_array_equal(result.values, expected_values)
100154

101155
def test_constructors_errors(self):
102156

103157
# scalar
104-
msg = ('IntervalIndex(...) must be called with a collection of '
158+
msg = ('IntervalIndex\(...\) must be called with a collection of '
105159
'some kind, 5 was passed')
106-
with pytest.raises(TypeError, message=msg):
160+
with tm.assert_raises_regex(TypeError, msg):
107161
IntervalIndex(5)
108162

109163
# not an interval
110-
msg = "type <class 'numpy.int32'> with value 0 is not an interval"
111-
with pytest.raises(TypeError, message=msg):
164+
msg = ("type <(class|type) 'numpy.int64'> with value 0 "
165+
"is not an interval")
166+
with tm.assert_raises_regex(TypeError, msg):
112167
IntervalIndex([0, 1])
113168

114-
with pytest.raises(TypeError, message=msg):
169+
with tm.assert_raises_regex(TypeError, msg):
115170
IntervalIndex.from_intervals([0, 1])
116171

117172
# invalid closed
118173
msg = "invalid options for 'closed': invalid"
119-
with pytest.raises(ValueError, message=msg):
174+
with tm.assert_raises_regex(ValueError, msg):
120175
IntervalIndex.from_arrays([0, 1], [1, 2], closed='invalid')
121176

122-
# mismatched closed
177+
# mismatched closed within intervals
123178
msg = 'intervals must all be closed on the same side'
124-
with pytest.raises(ValueError, message=msg):
179+
with tm.assert_raises_regex(ValueError, msg):
125180
IntervalIndex.from_intervals([Interval(0, 1),
126181
Interval(1, 2, closed='left')])
127182

128-
with pytest.raises(ValueError, message=msg):
129-
IntervalIndex.from_arrays([0, 10], [3, 5])
130-
131-
with pytest.raises(ValueError, message=msg):
183+
with tm.assert_raises_regex(ValueError, msg):
132184
Index([Interval(0, 1), Interval(2, 3, closed='left')])
133185

186+
# mismatched closed inferred from intervals vs constructor.
187+
msg = 'conflicting values for closed'
188+
with tm.assert_raises_regex(ValueError, msg):
189+
iv = [Interval(0, 1, closed='both'), Interval(1, 2, closed='both')]
190+
IntervalIndex(iv, closed='neither')
191+
134192
# no point in nesting periods in an IntervalIndex
135193
msg = 'Period dtypes are not supported, use a PeriodIndex instead'
136-
with pytest.raises(ValueError, message=msg):
194+
with tm.assert_raises_regex(ValueError, msg):
137195
IntervalIndex.from_breaks(
138196
pd.period_range('2000-01-01', periods=3))
139197

140198
# decreasing breaks/arrays
141199
msg = 'left side of interval must be <= right side'
142-
with pytest.raises(ValueError, message=msg):
200+
with tm.assert_raises_regex(ValueError, msg):
143201
IntervalIndex.from_breaks(range(10, -1, -1))
144202

145-
with pytest.raises(ValueError, message=msg):
203+
with tm.assert_raises_regex(ValueError, msg):
146204
IntervalIndex.from_arrays(range(10, -1, -1), range(9, -2, -1))
147205

148206
def test_constructors_datetimelike(self, closed):
@@ -865,23 +923,23 @@ def test_is_non_overlapping_monotonic(self, closed):
865923
idx = IntervalIndex.from_tuples(tpls, closed=closed)
866924
assert idx.is_non_overlapping_monotonic is True
867925

868-
idx = IntervalIndex.from_tuples(reversed(tpls), closed=closed)
926+
idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
869927
assert idx.is_non_overlapping_monotonic is True
870928

871929
# Should be False in all cases (overlapping)
872930
tpls = [(0, 2), (1, 3), (4, 5), (6, 7)]
873931
idx = IntervalIndex.from_tuples(tpls, closed=closed)
874932
assert idx.is_non_overlapping_monotonic is False
875933

876-
idx = IntervalIndex.from_tuples(reversed(tpls), closed=closed)
934+
idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
877935
assert idx.is_non_overlapping_monotonic is False
878936

879937
# Should be False in all cases (non-monotonic)
880938
tpls = [(0, 1), (2, 3), (6, 7), (4, 5)]
881939
idx = IntervalIndex.from_tuples(tpls, closed=closed)
882940
assert idx.is_non_overlapping_monotonic is False
883941

884-
idx = IntervalIndex.from_tuples(reversed(tpls), closed=closed)
942+
idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
885943
assert idx.is_non_overlapping_monotonic is False
886944

887945
# Should be False for closed='both', overwise True (GH16560)
@@ -1054,10 +1112,6 @@ def test_constructor_coverage(self):
10541112
end=end.to_pydatetime())
10551113
tm.assert_index_equal(result, expected)
10561114

1057-
result = pd.interval_range(start=start.tz_localize('UTC'),
1058-
end=end.tz_localize('UTC'))
1059-
tm.assert_index_equal(result, expected)
1060-
10611115
result = pd.interval_range(start=start.asm8, end=end.asm8)
10621116
tm.assert_index_equal(result, expected)
10631117

pandas/tests/indexing/test_interval.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def test_getitem_with_scalar(self):
5454
def test_nonoverlapping_monotonic(self, direction, closed):
5555
tpls = [(0, 1), (2, 3), (4, 5)]
5656
if direction == 'decreasing':
57-
tpls = reversed(tpls)
57+
tpls = tpls[::-1]
5858

5959
idx = IntervalIndex.from_tuples(tpls, closed=closed)
6060
s = Series(list('abc'), idx)

0 commit comments

Comments
 (0)