Skip to content

Commit 204cfc5

Browse files
toobazgfyoung
authored andcommitted
BUG: Try to sort result of Index.union rather than guessing sortability
closes #17376
1 parent 1d73cf3 commit 204cfc5

File tree

4 files changed

+33
-73
lines changed

4 files changed

+33
-73
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -920,6 +920,7 @@ Numeric
920920
Indexing
921921
^^^^^^^^
922922

923+
- Bug in the order of the result of ``Index.union()`` when indexes contain tuples (:issue:`17376`)
923924
- Bug in :class:`Index` construction from list of mixed type tuples (:issue:`18505`)
924925
- Bug in :func:`Index.drop` when passing a list of both tuples and non-tuples (:issue:`18304`)
925926
- Bug in :meth:`~DataFrame.drop`, :meth:`~Panel.drop`, :meth:`~Series.drop`, :meth:`~Index.drop` where no ``KeyError`` is raised when dropping a non-existent element from an axis that contains duplicates (:issue:`19186`)

pandas/core/indexes/base.py

+7-21
Original file line numberDiff line numberDiff line change
@@ -2380,35 +2380,21 @@ def union(self, other):
23802380
value_set = set(lvals)
23812381
result.extend([x for x in rvals if x not in value_set])
23822382
else:
2383-
indexer = self.get_indexer(other)
2384-
indexer, = (indexer == -1).nonzero()
2385-
2383+
indexer = np.where(self.get_indexer(other) == -1)[0]
23862384
if len(indexer) > 0:
23872385
other_diff = algos.take_nd(rvals, indexer,
23882386
allow_fill=False)
23892387
result = _concat._concat_compat((lvals, other_diff))
23902388

2391-
try:
2392-
lvals[0] < other_diff[0]
2393-
except TypeError as e:
2394-
warnings.warn("%s, sort order is undefined for "
2395-
"incomparable objects" % e, RuntimeWarning,
2396-
stacklevel=3)
2397-
else:
2398-
types = frozenset((self.inferred_type,
2399-
other.inferred_type))
2400-
if not types & _unsortable_types:
2401-
result.sort()
2402-
24032389
else:
24042390
result = lvals
24052391

2406-
try:
2407-
result = np.sort(result)
2408-
except TypeError as e:
2409-
warnings.warn("%s, sort order is undefined for "
2410-
"incomparable objects" % e, RuntimeWarning,
2411-
stacklevel=3)
2392+
try:
2393+
result = sorting.safe_sort(result)
2394+
except TypeError as e:
2395+
warnings.warn("%s, sort order is undefined for "
2396+
"incomparable objects" % e, RuntimeWarning,
2397+
stacklevel=3)
24122398

24132399
# for subclasses
24142400
return self._wrap_union_result(other, result)

pandas/tests/indexes/test_base.py

+20-31
Original file line numberDiff line numberDiff line change
@@ -784,8 +784,7 @@ def test_union(self):
784784
expected = Index(list('ab'), name='A')
785785
tm.assert_index_equal(union, expected)
786786

787-
with tm.assert_produces_warning(RuntimeWarning):
788-
firstCat = self.strIndex.union(self.dateIndex)
787+
firstCat = self.strIndex.union(self.dateIndex)
789788
secondCat = self.strIndex.union(self.strIndex)
790789

791790
if self.dateIndex.dtype == np.object_:
@@ -1462,19 +1461,19 @@ def test_tuple_union_bug(self):
14621461
(2, 'B'), (1, 'C'), (2, 'C')],
14631462
dtype=[('num', int), ('let', 'a1')])
14641463

1465-
idx1 = pandas.Index(aidx1)
1466-
idx2 = pandas.Index(aidx2)
1464+
idx1 = Index(aidx1)
1465+
idx2 = Index(aidx2)
14671466

1468-
# intersection broken?
1467+
# intersection
14691468
int_idx = idx1.intersection(idx2)
1469+
expected = idx1 # pandas.Index(sorted(set(idx1) & set(idx2)))
14701470
# needs to be 1d like idx1 and idx2
1471-
expected = idx1[:4] # pandas.Index(sorted(set(idx1) & set(idx2)))
14721471
assert int_idx.ndim == 1
14731472
tm.assert_index_equal(int_idx, expected)
14741473

1475-
# union broken
1474+
# GH 17376 (union)
14761475
union_idx = idx1.union(idx2)
1477-
expected = idx2
1476+
expected = idx2.sort_values()
14781477
assert union_idx.ndim == 1
14791478
tm.assert_index_equal(union_idx, expected)
14801479

@@ -1664,13 +1663,19 @@ def test_outer_join_sort(self):
16641663
left_idx = Index(np.random.permutation(15))
16651664
right_idx = tm.makeDateIndex(10)
16661665

1667-
with tm.assert_produces_warning(RuntimeWarning):
1666+
if PY3:
1667+
with tm.assert_produces_warning(RuntimeWarning):
1668+
joined = left_idx.join(right_idx, how='outer')
1669+
else:
16681670
joined = left_idx.join(right_idx, how='outer')
16691671

16701672
# right_idx in this case because DatetimeIndex has join precedence over
16711673
# Int64Index
1672-
with tm.assert_produces_warning(RuntimeWarning):
1673-
expected = right_idx.astype(object).union(left_idx.astype(object))
1674+
if PY3:
1675+
with tm.assert_produces_warning(RuntimeWarning):
1676+
expected = right_idx.astype(object).union(left_idx)
1677+
else:
1678+
expected = right_idx.astype(object).union(left_idx)
16741679
tm.assert_index_equal(joined, expected)
16751680

16761681
def test_nan_first_take_datetime(self):
@@ -2059,10 +2064,7 @@ def test_copy_name(self):
20592064
s1 = Series(2, index=first)
20602065
s2 = Series(3, index=second[:-1])
20612066

2062-
warning_type = RuntimeWarning if PY3 else None
2063-
with tm.assert_produces_warning(warning_type):
2064-
# Python 3: Unorderable types
2065-
s3 = s1 * s2
2067+
s3 = s1 * s2
20662068

20672069
assert s3.index.name == 'mario'
20682070

@@ -2095,27 +2097,14 @@ def test_union_base(self):
20952097
first = idx[3:]
20962098
second = idx[:5]
20972099

2098-
if PY3:
2099-
with tm.assert_produces_warning(RuntimeWarning):
2100-
# unorderable types
2101-
result = first.union(second)
2102-
expected = Index(['b', 2, 'c', 0, 'a', 1])
2103-
tm.assert_index_equal(result, expected)
2104-
else:
2105-
result = first.union(second)
2106-
expected = Index(['b', 2, 'c', 0, 'a', 1])
2107-
tm.assert_index_equal(result, expected)
2100+
expected = Index([0, 1, 2, 'a', 'b', 'c'])
2101+
result = first.union(second)
2102+
tm.assert_index_equal(result, expected)
21082103

21092104
# GH 10149
21102105
cases = [klass(second.values)
21112106
for klass in [np.array, Series, list]]
21122107
for case in cases:
2113-
if PY3:
2114-
with tm.assert_produces_warning(RuntimeWarning):
2115-
# unorderable types
2116-
result = first.union(case)
2117-
assert tm.equalContents(result, idx)
2118-
else:
21192108
result = first.union(case)
21202109
assert tm.equalContents(result, idx)
21212110

pandas/tests/series/test_operators.py

+5-21
Original file line numberDiff line numberDiff line change
@@ -431,11 +431,7 @@ def test_comparison_label_based(self):
431431
assert_series_equal(result, a[a])
432432

433433
for e in [Series(['z'])]:
434-
if compat.PY3:
435-
with tm.assert_produces_warning(RuntimeWarning):
436-
result = a[a | e]
437-
else:
438-
result = a[a | e]
434+
result = a[a | e]
439435
assert_series_equal(result, a[a])
440436

441437
# vs scalars
@@ -1472,24 +1468,12 @@ def test_operators_bitwise(self):
14721468
pytest.raises(TypeError, lambda: s_0123 & [0.1, 4, 3.14, 2])
14731469

14741470
# s_0123 will be all false now because of reindexing like s_tft
1475-
if compat.PY3:
1476-
# unable to sort incompatible object via .union.
1477-
exp = Series([False] * 7, index=['b', 'c', 'a', 0, 1, 2, 3])
1478-
with tm.assert_produces_warning(RuntimeWarning):
1479-
assert_series_equal(s_tft & s_0123, exp)
1480-
else:
1481-
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
1482-
assert_series_equal(s_tft & s_0123, exp)
1471+
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
1472+
assert_series_equal(s_tft & s_0123, exp)
14831473

14841474
# s_tft will be all false now because of reindexing like s_0123
1485-
if compat.PY3:
1486-
# unable to sort incompatible object via .union.
1487-
exp = Series([False] * 7, index=[0, 1, 2, 3, 'b', 'c', 'a'])
1488-
with tm.assert_produces_warning(RuntimeWarning):
1489-
assert_series_equal(s_0123 & s_tft, exp)
1490-
else:
1491-
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
1492-
assert_series_equal(s_0123 & s_tft, exp)
1475+
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
1476+
assert_series_equal(s_0123 & s_tft, exp)
14931477

14941478
assert_series_equal(s_0123 & False, Series([False] * 4))
14951479
assert_series_equal(s_0123 ^ False, Series([False, True, True, True]))

0 commit comments

Comments
 (0)