Skip to content

Commit 39edcd3

Browse files
committed
BUG: Index.difference of itself doesn't preserve type
1 parent b669112 commit 39edcd3

File tree

4 files changed

+32
-4
lines changed

4 files changed

+32
-4
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -932,6 +932,7 @@ Indexing
932932
- Bug in :func:`IntervalIndex.symmetric_difference` where the symmetric difference with a non-``IntervalIndex`` did not raise (:issue:`18475`)
933933
- Bug in :class:`IntervalIndex` where set operations that returned an empty ``IntervalIndex`` had the wrong dtype (:issue:`19101`)
934934
- Bug in :meth:`DataFrame.drop_duplicates` where no ``KeyError`` is raised when passing in columns that don't exist on the ``DataFrame`` (issue:`19726`)
935+
- Bug in :meth:`Index.difference` when taking difference of an ``Index`` with itself (:issue:`20040`)
935936

936937

937938
MultiIndex

pandas/core/indexes/base.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -458,7 +458,7 @@ def _simple_new(cls, values, name=None, dtype=None, **kwargs):
458458
Must be careful not to recurse.
459459
"""
460460
if not hasattr(values, 'dtype'):
461-
if values is None and dtype is not None:
461+
if (values is None or len(values) == 0) and dtype is not None:
462462
values = np.empty(0, dtype=dtype)
463463
else:
464464
values = np.array(values, copy=False)
@@ -492,6 +492,8 @@ def _shallow_copy(self, values=None, **kwargs):
492492
values = self.values
493493
attributes = self._get_attributes_dict()
494494
attributes.update(kwargs)
495+
if len(values) == 0 and 'dtype' not in kwargs:
496+
attributes['dtype'] = self.dtype
495497
return self._simple_new(values, **attributes)
496498

497499
def _shallow_copy_with_infer(self, values=None, **kwargs):
@@ -512,6 +514,8 @@ def _shallow_copy_with_infer(self, values=None, **kwargs):
512514
attributes = self._get_attributes_dict()
513515
attributes.update(kwargs)
514516
attributes['copy'] = False
517+
if len(values) == 0 and 'dtype' not in kwargs:
518+
attributes['dtype'] = self.dtype
515519
if self._infer_as_myclass:
516520
try:
517521
return self._constructor(values, **attributes)
@@ -2511,7 +2515,7 @@ def difference(self, other):
25112515
self._assert_can_do_setop(other)
25122516

25132517
if self.equals(other):
2514-
return Index([], name=self.name)
2518+
return self._shallow_copy([])
25152519

25162520
other, result_name = self._convert_can_do_setop(other)
25172521

pandas/core/indexes/multi.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2733,7 +2733,7 @@ def intersection(self, other):
27332733
other_tuples = other._ndarray_values
27342734
uniq_tuples = sorted(set(self_tuples) & set(other_tuples))
27352735
if len(uniq_tuples) == 0:
2736-
return MultiIndex(levels=[[]] * self.nlevels,
2736+
return MultiIndex(levels=self.levels,
27372737
labels=[[]] * self.nlevels,
27382738
names=result_names, verify_integrity=False)
27392739
else:
@@ -2755,7 +2755,7 @@ def difference(self, other):
27552755
return self
27562756

27572757
if self.equals(other):
2758-
return MultiIndex(levels=[[]] * self.nlevels,
2758+
return MultiIndex(levels=self.levels,
27592759
labels=[[]] * self.nlevels,
27602760
names=result_names, verify_integrity=False)
27612761

pandas/tests/indexes/test_base.py

+23
Original file line numberDiff line numberDiff line change
@@ -1034,6 +1034,29 @@ def test_symmetric_difference(self):
10341034
assert tm.equalContents(result, expected)
10351035
assert result.name == 'new_name'
10361036

1037+
def test_difference_type(self):
1038+
# GH 20040
1039+
# If taking difference of a set and itself, it
1040+
# needs to preserve the type of the index
1041+
skip_index_keys = ['repeats']
1042+
for key, id in self.indices.items():
1043+
if key not in skip_index_keys:
1044+
result = id.difference(id)
1045+
expected = id.drop(id)
1046+
tm.assert_index_equal(result, expected)
1047+
1048+
def test_intersection_difference(self):
1049+
# GH 20040
1050+
# Test that the intersection of an index with an
1051+
# empty index produces the same index as the difference
1052+
# of an index with itself. Test for all types
1053+
skip_index_keys = ['repeats']
1054+
for key, id in self.indices.items():
1055+
if key not in skip_index_keys:
1056+
inter = id.intersection(id.drop(id))
1057+
diff = id.difference(id)
1058+
tm.assert_index_equal(inter, diff)
1059+
10371060
def test_is_numeric(self):
10381061
assert not self.dateIndex.is_numeric()
10391062
assert not self.strIndex.is_numeric()

0 commit comments

Comments
 (0)