Skip to content

Commit ddad5c0

Browse files
jbrockmendelJulianWgs
authored andcommitted
BUG: MultiIndex.difference incorrectly raising TypeError when indexes contain non-sortable entries (pandas-dev#41915)
1 parent 3093551 commit ddad5c0

File tree

4 files changed

+13
-36
lines changed

4 files changed

+13
-36
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1042,6 +1042,7 @@ MultiIndex
10421042
- Bug in :meth:`MultiIndex.intersection` duplicating ``NaN`` in the result (:issue:`38623`)
10431043
- Bug in :meth:`MultiIndex.equals` incorrectly returning ``True`` when the :class:`MultiIndex` contained ``NaN`` even when they are differently ordered (:issue:`38439`)
10441044
- Bug in :meth:`MultiIndex.intersection` always returning an empty result when intersecting with :class:`CategoricalIndex` (:issue:`38653`)
1045+
- Bug in :meth:`MultiIndex.difference` incorrectly raising ``TypeError`` when indexes contain non-sortable entries (:issue:`41915`)
10451046
- Bug in :meth:`MultiIndex.reindex` raising a ``ValueError`` when used on an empty :class:`MultiIndex` and indexing only a specific level (:issue:`41170`)
10461047
- Bug in :meth:`MultiIndex.reindex` raising ``TypeError`` when reindexing against a flat :class:`Index` (:issue:`41707`)
10471048

pandas/core/indexes/base.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -3091,7 +3091,7 @@ def intersection(self, other, sort=False):
30913091
return this.intersection(other, sort=sort)
30923092

30933093
result = self._intersection(other, sort=sort)
3094-
return self._wrap_setop_result(other, result)
3094+
return self._wrap_intersection_result(other, result)
30953095

30963096
def _intersection(self, other: Index, sort=False):
30973097
"""
@@ -3113,6 +3113,10 @@ def _intersection(self, other: Index, sort=False):
31133113
res_values = _maybe_try_sort(res_values, sort)
31143114
return res_values
31153115

3116+
def _wrap_intersection_result(self, other, result):
3117+
# We will override for MultiIndex to handle empty results
3118+
return self._wrap_setop_result(other, result)
3119+
31163120
def _intersection_via_get_indexer(self, other: Index, sort) -> ArrayLike:
31173121
"""
31183122
Find the intersection of two Indexes using get_indexer.

pandas/core/indexes/multi.py

+4-31
Original file line numberDiff line numberDiff line change
@@ -3577,50 +3577,23 @@ def _maybe_match_names(self, other):
35773577
names.append(None)
35783578
return names
35793579

3580-
def _intersection(self, other, sort=False) -> MultiIndex:
3580+
def _wrap_intersection_result(self, other, result):
35813581
other, result_names = self._convert_can_do_setop(other)
3582-
other = other.astype(object, copy=False)
35833582

3584-
uniq_tuples = None # flag whether _inner_indexer was successful
3585-
if self.is_monotonic and other.is_monotonic:
3586-
try:
3587-
inner_tuples = self._inner_indexer(other)[0]
3588-
sort = False # inner_tuples is already sorted
3589-
except TypeError:
3590-
pass
3591-
else:
3592-
uniq_tuples = algos.unique(inner_tuples)
3593-
3594-
if uniq_tuples is None:
3595-
uniq_tuples = self._intersection_via_get_indexer(other, sort)
3596-
3597-
if sort is None:
3598-
uniq_tuples = sorted(uniq_tuples)
3599-
3600-
if len(uniq_tuples) == 0:
3583+
if len(result) == 0:
36013584
return MultiIndex(
36023585
levels=self.levels,
36033586
codes=[[]] * self.nlevels,
36043587
names=result_names,
36053588
verify_integrity=False,
36063589
)
36073590
else:
3608-
return MultiIndex.from_arrays(
3609-
zip(*uniq_tuples), sortorder=0, names=result_names
3610-
)
3591+
return MultiIndex.from_arrays(zip(*result), sortorder=0, names=result_names)
36113592

36123593
def _difference(self, other, sort) -> MultiIndex:
36133594
other, result_names = self._convert_can_do_setop(other)
36143595

3615-
this = self._get_unique_index()
3616-
3617-
indexer = this.get_indexer(other)
3618-
indexer = indexer.take((indexer != -1).nonzero()[0])
3619-
3620-
label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True)
3621-
difference = this._values.take(label_diff)
3622-
if sort is None:
3623-
difference = sorted(difference)
3596+
difference = super()._difference(other, sort)
36243597

36253598
if len(difference) == 0:
36263599
return MultiIndex(

pandas/tests/indexes/multi/test_setops.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -216,11 +216,10 @@ def test_difference_sort_incomparable():
216216

217217
other = MultiIndex.from_product([[3, pd.Timestamp("2000"), 4], ["c", "d"]])
218218
# sort=None, the default
219-
# MultiIndex.difference deviates here from other difference
220-
# implementations in not catching the TypeError
221-
msg = "'<' not supported between instances of 'Timestamp' and 'int'"
222-
with pytest.raises(TypeError, match=msg):
219+
msg = "sort order is undefined for incomparable objects"
220+
with tm.assert_produces_warning(RuntimeWarning, match=msg):
223221
result = idx.difference(other)
222+
tm.assert_index_equal(result, idx)
224223

225224
# sort=False
226225
result = idx.difference(other, sort=False)

0 commit comments

Comments
 (0)