Skip to content

Commit 5853b79

Browse files
jschendeljreback
authored andcommitted
BUG: IntervalIndex set op bugs for empty results (pandas-dev#19112)
1 parent 8912efc commit 5853b79

File tree

3 files changed

+76
-6
lines changed

3 files changed

+76
-6
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,7 @@ Indexing
407407
- Bug in :func:`MultiIndex.__contains__` where non-tuple keys would return ``True`` even if they had been dropped (:issue:`19027`)
408408
- Bug in :func:`MultiIndex.set_labels` which would cause casting (and potentially clipping) of the new labels if the ``level`` argument is not 0 or a list like [0, 1, ... ] (:issue:`19057`)
409409
- Bug in ``str.extractall`` when there were no matches empty :class:`Index` was returned instead of appropriate :class:`MultiIndex` (:issue:`19034`)
410+
- Bug in :class:`IntervalIndex` where set operations that returned an empty ``IntervalIndex`` had the wrong dtype (:issue:`19101`)
410411

411412
I/O
412413
^^^

pandas/core/indexes/interval.py

+19-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from pandas.core.dtypes.missing import notna, isna
66
from pandas.core.dtypes.generic import ABCDatetimeIndex, ABCPeriodIndex
77
from pandas.core.dtypes.dtypes import IntervalDtype
8-
from pandas.core.dtypes.cast import maybe_convert_platform
8+
from pandas.core.dtypes.cast import maybe_convert_platform, find_common_type
99
from pandas.core.dtypes.common import (
1010
_ensure_platform_int,
1111
is_list_like,
@@ -16,6 +16,7 @@
1616
is_integer_dtype,
1717
is_float_dtype,
1818
is_interval_dtype,
19+
is_object_dtype,
1920
is_scalar,
2021
is_float,
2122
is_number,
@@ -1289,9 +1290,25 @@ def func(self, other):
12891290
msg = ('can only do set operations between two IntervalIndex '
12901291
'objects that are closed on the same side')
12911292
other = self._as_like_interval_index(other, msg)
1293+
1294+
# GH 19016: ensure set op will not return a prohibited dtype
1295+
subtypes = [self.dtype.subtype, other.dtype.subtype]
1296+
common_subtype = find_common_type(subtypes)
1297+
if is_object_dtype(common_subtype):
1298+
msg = ('can only do {op} between two IntervalIndex '
1299+
'objects that have compatible dtypes')
1300+
raise TypeError(msg.format(op=op_name))
1301+
12921302
result = getattr(self._multiindex, op_name)(other._multiindex)
12931303
result_name = self.name if self.name == other.name else None
1294-
return type(self).from_tuples(result.values, closed=self.closed,
1304+
1305+
# GH 19101: ensure empty results have correct dtype
1306+
if result.empty:
1307+
result = result.values.astype(self.dtype.subtype)
1308+
else:
1309+
result = result.values
1310+
1311+
return type(self).from_tuples(result, closed=self.closed,
12951312
name=result_name)
12961313
return func
12971314

pandas/tests/indexes/interval/test_interval.py

+56-4
Original file line numberDiff line numberDiff line change
@@ -880,6 +880,16 @@ def test_union(self, closed):
880880
tm.assert_index_equal(index.union(index), index)
881881
tm.assert_index_equal(index.union(index[:1]), index)
882882

883+
# GH 19101: empty result, same dtype
884+
index = IntervalIndex(np.array([], dtype='int64'), closed=closed)
885+
result = index.union(index)
886+
tm.assert_index_equal(result, index)
887+
888+
# GH 19101: empty result, different dtypes
889+
other = IntervalIndex(np.array([], dtype='float64'), closed=closed)
890+
result = index.union(other)
891+
tm.assert_index_equal(result, index)
892+
883893
def test_intersection(self, closed):
884894
index = self.create_index(closed=closed)
885895
other = IntervalIndex.from_breaks(range(5, 13), closed=closed)
@@ -893,14 +903,48 @@ def test_intersection(self, closed):
893903

894904
tm.assert_index_equal(index.intersection(index), index)
895905

906+
# GH 19101: empty result, same dtype
907+
other = IntervalIndex.from_breaks(range(300, 314), closed=closed)
908+
expected = IntervalIndex(np.array([], dtype='int64'), closed=closed)
909+
result = index.intersection(other)
910+
tm.assert_index_equal(result, expected)
911+
912+
# GH 19101: empty result, different dtypes
913+
breaks = np.arange(300, 314, dtype='float64')
914+
other = IntervalIndex.from_breaks(breaks, closed=closed)
915+
result = index.intersection(other)
916+
tm.assert_index_equal(result, expected)
917+
896918
def test_difference(self, closed):
897919
index = self.create_index(closed=closed)
898920
tm.assert_index_equal(index.difference(index[:1]), index[1:])
899921

922+
# GH 19101: empty result, same dtype
923+
result = index.difference(index)
924+
expected = IntervalIndex(np.array([], dtype='int64'), closed=closed)
925+
tm.assert_index_equal(result, expected)
926+
927+
# GH 19101: empty result, different dtypes
928+
other = IntervalIndex.from_arrays(index.left.astype('float64'),
929+
index.right, closed=closed)
930+
result = index.difference(other)
931+
tm.assert_index_equal(result, expected)
932+
900933
def test_symmetric_difference(self, closed):
901-
idx = self.create_index(closed=closed)
902-
result = idx[1:].symmetric_difference(idx[:-1])
903-
expected = IntervalIndex([idx[0], idx[-1]])
934+
index = self.create_index(closed=closed)
935+
result = index[1:].symmetric_difference(index[:-1])
936+
expected = IntervalIndex([index[0], index[-1]])
937+
tm.assert_index_equal(result, expected)
938+
939+
# GH 19101: empty result, same dtype
940+
result = index.symmetric_difference(index)
941+
expected = IntervalIndex(np.array([], dtype='int64'), closed=closed)
942+
tm.assert_index_equal(result, expected)
943+
944+
# GH 19101: empty result, different dtypes
945+
other = IntervalIndex.from_arrays(index.left.astype('float64'),
946+
index.right, closed=closed)
947+
result = index.symmetric_difference(other)
904948
tm.assert_index_equal(result, expected)
905949

906950
@pytest.mark.parametrize('op_name', [
@@ -909,17 +953,25 @@ def test_set_operation_errors(self, closed, op_name):
909953
index = self.create_index(closed=closed)
910954
set_op = getattr(index, op_name)
911955

912-
# test errors
956+
# non-IntervalIndex
913957
msg = ('can only do set operations between two IntervalIndex objects '
914958
'that are closed on the same side')
915959
with tm.assert_raises_regex(ValueError, msg):
916960
set_op(Index([1, 2, 3]))
917961

962+
# mixed closed
918963
for other_closed in {'right', 'left', 'both', 'neither'} - {closed}:
919964
other = self.create_index(closed=other_closed)
920965
with tm.assert_raises_regex(ValueError, msg):
921966
set_op(other)
922967

968+
# GH 19016: incompatible dtypes
969+
other = interval_range(Timestamp('20180101'), periods=9, closed=closed)
970+
msg = ('can only do {op} between two IntervalIndex objects that have '
971+
'compatible dtypes').format(op=op_name)
972+
with tm.assert_raises_regex(TypeError, msg):
973+
set_op(other)
974+
923975
def test_isin(self, closed):
924976
index = self.create_index(closed=closed)
925977

0 commit comments

Comments
 (0)