Skip to content

Commit 9fceb51

Browse files
toobazgfyoung
authored andcommitted
BUG: Try to sort result of Index.union rather than guessing sortability
closes #17376
1 parent 7c14e4f commit 9fceb51

File tree

4 files changed

+33
-73
lines changed

4 files changed

+33
-73
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -920,6 +920,7 @@ Numeric
920920
Indexing
921921
^^^^^^^^
922922

923+
- Bug in the order of the result of ``Index.union()`` when indexes contain tuples (:issue:`17376`)
923924
- Bug in :class:`Index` construction from list of mixed type tuples (:issue:`18505`)
924925
- Bug in :func:`Index.drop` when passing a list of both tuples and non-tuples (:issue:`18304`)
925926
- Bug in :meth:`~DataFrame.drop`, :meth:`~Panel.drop`, :meth:`~Series.drop`, :meth:`~Index.drop` where no ``KeyError`` is raised when dropping a non-existent element from an axis that contains duplicates (:issue:`19186`)

pandas/core/indexes/base.py

+7-21
Original file line numberDiff line numberDiff line change
@@ -2406,35 +2406,21 @@ def union(self, other):
24062406
value_set = set(lvals)
24072407
result.extend([x for x in rvals if x not in value_set])
24082408
else:
2409-
indexer = self.get_indexer(other)
2410-
indexer, = (indexer == -1).nonzero()
2411-
2409+
indexer = np.where(self.get_indexer(other) == -1)[0]
24122410
if len(indexer) > 0:
24132411
other_diff = algos.take_nd(rvals, indexer,
24142412
allow_fill=False)
24152413
result = _concat._concat_compat((lvals, other_diff))
24162414

2417-
try:
2418-
lvals[0] < other_diff[0]
2419-
except TypeError as e:
2420-
warnings.warn("%s, sort order is undefined for "
2421-
"incomparable objects" % e, RuntimeWarning,
2422-
stacklevel=3)
2423-
else:
2424-
types = frozenset((self.inferred_type,
2425-
other.inferred_type))
2426-
if not types & _unsortable_types:
2427-
result.sort()
2428-
24292415
else:
24302416
result = lvals
24312417

2432-
try:
2433-
result = np.sort(result)
2434-
except TypeError as e:
2435-
warnings.warn("%s, sort order is undefined for "
2436-
"incomparable objects" % e, RuntimeWarning,
2437-
stacklevel=3)
2418+
try:
2419+
result = sorting.safe_sort(result)
2420+
except TypeError as e:
2421+
warnings.warn("%s, sort order is undefined for "
2422+
"incomparable objects" % e, RuntimeWarning,
2423+
stacklevel=3)
24382424

24392425
# for subclasses
24402426
return self._wrap_union_result(other, result)

pandas/tests/indexes/test_base.py

+20-31
Original file line numberDiff line numberDiff line change
@@ -784,8 +784,7 @@ def test_union(self):
784784
expected = Index(list('ab'), name='A')
785785
tm.assert_index_equal(union, expected)
786786

787-
with tm.assert_produces_warning(RuntimeWarning):
788-
firstCat = self.strIndex.union(self.dateIndex)
787+
firstCat = self.strIndex.union(self.dateIndex)
789788
secondCat = self.strIndex.union(self.strIndex)
790789

791790
if self.dateIndex.dtype == np.object_:
@@ -1462,19 +1461,19 @@ def test_tuple_union_bug(self):
14621461
(2, 'B'), (1, 'C'), (2, 'C')],
14631462
dtype=[('num', int), ('let', 'a1')])
14641463

1465-
idx1 = pandas.Index(aidx1)
1466-
idx2 = pandas.Index(aidx2)
1464+
idx1 = Index(aidx1)
1465+
idx2 = Index(aidx2)
14671466

1468-
# intersection broken?
1467+
# intersection
14691468
int_idx = idx1.intersection(idx2)
1469+
expected = idx1 # pandas.Index(sorted(set(idx1) & set(idx2)))
14701470
# needs to be 1d like idx1 and idx2
1471-
expected = idx1[:4] # pandas.Index(sorted(set(idx1) & set(idx2)))
14721471
assert int_idx.ndim == 1
14731472
tm.assert_index_equal(int_idx, expected)
14741473

1475-
# union broken
1474+
# GH 17376 (union)
14761475
union_idx = idx1.union(idx2)
1477-
expected = idx2
1476+
expected = idx2.sort_values()
14781477
assert union_idx.ndim == 1
14791478
tm.assert_index_equal(union_idx, expected)
14801479

@@ -1664,13 +1663,19 @@ def test_outer_join_sort(self):
16641663
left_idx = Index(np.random.permutation(15))
16651664
right_idx = tm.makeDateIndex(10)
16661665

1667-
with tm.assert_produces_warning(RuntimeWarning):
1666+
if PY3:
1667+
with tm.assert_produces_warning(RuntimeWarning):
1668+
joined = left_idx.join(right_idx, how='outer')
1669+
else:
16681670
joined = left_idx.join(right_idx, how='outer')
16691671

16701672
# right_idx in this case because DatetimeIndex has join precedence over
16711673
# Int64Index
1672-
with tm.assert_produces_warning(RuntimeWarning):
1673-
expected = right_idx.astype(object).union(left_idx.astype(object))
1674+
if PY3:
1675+
with tm.assert_produces_warning(RuntimeWarning):
1676+
expected = right_idx.astype(object).union(left_idx)
1677+
else:
1678+
expected = right_idx.astype(object).union(left_idx)
16741679
tm.assert_index_equal(joined, expected)
16751680

16761681
def test_nan_first_take_datetime(self):
@@ -2059,10 +2064,7 @@ def test_copy_name(self):
20592064
s1 = Series(2, index=first)
20602065
s2 = Series(3, index=second[:-1])
20612066

2062-
warning_type = RuntimeWarning if PY3 else None
2063-
with tm.assert_produces_warning(warning_type):
2064-
# Python 3: Unorderable types
2065-
s3 = s1 * s2
2067+
s3 = s1 * s2
20662068

20672069
assert s3.index.name == 'mario'
20682070

@@ -2095,27 +2097,14 @@ def test_union_base(self):
20952097
first = idx[3:]
20962098
second = idx[:5]
20972099

2098-
if PY3:
2099-
with tm.assert_produces_warning(RuntimeWarning):
2100-
# unorderable types
2101-
result = first.union(second)
2102-
expected = Index(['b', 2, 'c', 0, 'a', 1])
2103-
tm.assert_index_equal(result, expected)
2104-
else:
2105-
result = first.union(second)
2106-
expected = Index(['b', 2, 'c', 0, 'a', 1])
2107-
tm.assert_index_equal(result, expected)
2100+
expected = Index([0, 1, 2, 'a', 'b', 'c'])
2101+
result = first.union(second)
2102+
tm.assert_index_equal(result, expected)
21082103

21092104
# GH 10149
21102105
cases = [klass(second.values)
21112106
for klass in [np.array, Series, list]]
21122107
for case in cases:
2113-
if PY3:
2114-
with tm.assert_produces_warning(RuntimeWarning):
2115-
# unorderable types
2116-
result = first.union(case)
2117-
assert tm.equalContents(result, idx)
2118-
else:
21192108
result = first.union(case)
21202109
assert tm.equalContents(result, idx)
21212110

pandas/tests/series/test_operators.py

+5-21
Original file line numberDiff line numberDiff line change
@@ -431,11 +431,7 @@ def test_comparison_label_based(self):
431431
assert_series_equal(result, a[a])
432432

433433
for e in [Series(['z'])]:
434-
if compat.PY3:
435-
with tm.assert_produces_warning(RuntimeWarning):
436-
result = a[a | e]
437-
else:
438-
result = a[a | e]
434+
result = a[a | e]
439435
assert_series_equal(result, a[a])
440436

441437
# vs scalars
@@ -1472,24 +1468,12 @@ def test_operators_bitwise(self):
14721468
pytest.raises(TypeError, lambda: s_0123 & [0.1, 4, 3.14, 2])
14731469

14741470
# s_0123 will be all false now because of reindexing like s_tft
1475-
if compat.PY3:
1476-
# unable to sort incompatible object via .union.
1477-
exp = Series([False] * 7, index=['b', 'c', 'a', 0, 1, 2, 3])
1478-
with tm.assert_produces_warning(RuntimeWarning):
1479-
assert_series_equal(s_tft & s_0123, exp)
1480-
else:
1481-
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
1482-
assert_series_equal(s_tft & s_0123, exp)
1471+
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
1472+
assert_series_equal(s_tft & s_0123, exp)
14831473

14841474
# s_tft will be all false now because of reindexing like s_0123
1485-
if compat.PY3:
1486-
# unable to sort incompatible object via .union.
1487-
exp = Series([False] * 7, index=[0, 1, 2, 3, 'b', 'c', 'a'])
1488-
with tm.assert_produces_warning(RuntimeWarning):
1489-
assert_series_equal(s_0123 & s_tft, exp)
1490-
else:
1491-
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
1492-
assert_series_equal(s_0123 & s_tft, exp)
1475+
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
1476+
assert_series_equal(s_0123 & s_tft, exp)
14931477

14941478
assert_series_equal(s_0123 & False, Series([False] * 4))
14951479
assert_series_equal(s_0123 ^ False, Series([False, True, True, True]))

0 commit comments

Comments
 (0)