Skip to content

Commit b7a09c4

Browse files
toobazgfyoung
authored andcommitted
BUG: Try to sort result of Index.union rather than guessing sortability
closes pandas-dev#17376
1 parent 5f271eb commit b7a09c4

File tree

4 files changed

+33
-73
lines changed

4 files changed

+33
-73
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -861,6 +861,7 @@ Numeric
861861
Indexing
862862
^^^^^^^^
863863

864+
- Bug in the order of the result of ``Index.union()`` when indexes contain tuples (:issue:`17376`)
864865
- Bug in :class:`Index` construction from list of mixed type tuples (:issue:`18505`)
865866
- Bug in :func:`Index.drop` when passing a list of both tuples and non-tuples (:issue:`18304`)
866867
- Bug in :meth:`~DataFrame.drop`, :meth:`~Panel.drop`, :meth:`~Series.drop`, :meth:`~Index.drop` where no ``KeyError`` is raised when dropping a non-existent element from an axis that contains duplicates (:issue:`19186`)

pandas/core/indexes/base.py

+7-21
Original file line numberDiff line numberDiff line change
@@ -2338,35 +2338,21 @@ def union(self, other):
23382338
value_set = set(lvals)
23392339
result.extend([x for x in rvals if x not in value_set])
23402340
else:
2341-
indexer = self.get_indexer(other)
2342-
indexer, = (indexer == -1).nonzero()
2343-
2341+
indexer = np.where(self.get_indexer(other) == -1)[0]
23442342
if len(indexer) > 0:
23452343
other_diff = algos.take_nd(rvals, indexer,
23462344
allow_fill=False)
23472345
result = _concat._concat_compat((lvals, other_diff))
23482346

2349-
try:
2350-
lvals[0] < other_diff[0]
2351-
except TypeError as e:
2352-
warnings.warn("%s, sort order is undefined for "
2353-
"incomparable objects" % e, RuntimeWarning,
2354-
stacklevel=3)
2355-
else:
2356-
types = frozenset((self.inferred_type,
2357-
other.inferred_type))
2358-
if not types & _unsortable_types:
2359-
result.sort()
2360-
23612347
else:
23622348
result = lvals
23632349

2364-
try:
2365-
result = np.sort(result)
2366-
except TypeError as e:
2367-
warnings.warn("%s, sort order is undefined for "
2368-
"incomparable objects" % e, RuntimeWarning,
2369-
stacklevel=3)
2350+
try:
2351+
result = sorting.safe_sort(result)
2352+
except TypeError as e:
2353+
warnings.warn("%s, sort order is undefined for "
2354+
"incomparable objects" % e, RuntimeWarning,
2355+
stacklevel=3)
23702356

23712357
# for subclasses
23722358
return self._wrap_union_result(other, result)

pandas/tests/indexes/test_base.py

+20-31
Original file line numberDiff line numberDiff line change
@@ -785,8 +785,7 @@ def test_union(self):
785785
expected = Index(list('ab'), name='A')
786786
tm.assert_index_equal(union, expected)
787787

788-
with tm.assert_produces_warning(RuntimeWarning):
789-
firstCat = self.strIndex.union(self.dateIndex)
788+
firstCat = self.strIndex.union(self.dateIndex)
790789
secondCat = self.strIndex.union(self.strIndex)
791790

792791
if self.dateIndex.dtype == np.object_:
@@ -1463,19 +1462,19 @@ def test_tuple_union_bug(self):
14631462
(2, 'B'), (1, 'C'), (2, 'C')],
14641463
dtype=[('num', int), ('let', 'a1')])
14651464

1466-
idx1 = pandas.Index(aidx1)
1467-
idx2 = pandas.Index(aidx2)
1465+
idx1 = Index(aidx1)
1466+
idx2 = Index(aidx2)
14681467

1469-
# intersection broken?
1468+
# intersection
14701469
int_idx = idx1.intersection(idx2)
1470+
expected = idx1 # pandas.Index(sorted(set(idx1) & set(idx2)))
14711471
# needs to be 1d like idx1 and idx2
1472-
expected = idx1[:4] # pandas.Index(sorted(set(idx1) & set(idx2)))
14731472
assert int_idx.ndim == 1
14741473
tm.assert_index_equal(int_idx, expected)
14751474

1476-
# union broken
1475+
# GH 17376 (union)
14771476
union_idx = idx1.union(idx2)
1478-
expected = idx2
1477+
expected = idx2.sort_values()
14791478
assert union_idx.ndim == 1
14801479
tm.assert_index_equal(union_idx, expected)
14811480

@@ -1665,13 +1664,19 @@ def test_outer_join_sort(self):
16651664
left_idx = Index(np.random.permutation(15))
16661665
right_idx = tm.makeDateIndex(10)
16671666

1668-
with tm.assert_produces_warning(RuntimeWarning):
1667+
if PY3:
1668+
with tm.assert_produces_warning(RuntimeWarning):
1669+
joined = left_idx.join(right_idx, how='outer')
1670+
else:
16691671
joined = left_idx.join(right_idx, how='outer')
16701672

16711673
# right_idx in this case because DatetimeIndex has join precedence over
16721674
# Int64Index
1673-
with tm.assert_produces_warning(RuntimeWarning):
1674-
expected = right_idx.astype(object).union(left_idx.astype(object))
1675+
if PY3:
1676+
with tm.assert_produces_warning(RuntimeWarning):
1677+
expected = right_idx.astype(object).union(left_idx)
1678+
else:
1679+
expected = right_idx.astype(object).union(left_idx)
16751680
tm.assert_index_equal(joined, expected)
16761681

16771682
def test_nan_first_take_datetime(self):
@@ -2060,10 +2065,7 @@ def test_copy_name(self):
20602065
s1 = Series(2, index=first)
20612066
s2 = Series(3, index=second[:-1])
20622067

2063-
warning_type = RuntimeWarning if PY3 else None
2064-
with tm.assert_produces_warning(warning_type):
2065-
# Python 3: Unorderable types
2066-
s3 = s1 * s2
2068+
s3 = s1 * s2
20672069

20682070
assert s3.index.name == 'mario'
20692071

@@ -2096,27 +2098,14 @@ def test_union_base(self):
20962098
first = idx[3:]
20972099
second = idx[:5]
20982100

2099-
if PY3:
2100-
with tm.assert_produces_warning(RuntimeWarning):
2101-
# unorderable types
2102-
result = first.union(second)
2103-
expected = Index(['b', 2, 'c', 0, 'a', 1])
2104-
tm.assert_index_equal(result, expected)
2105-
else:
2106-
result = first.union(second)
2107-
expected = Index(['b', 2, 'c', 0, 'a', 1])
2108-
tm.assert_index_equal(result, expected)
2101+
expected = Index([0, 1, 2, 'a', 'b', 'c'])
2102+
result = first.union(second)
2103+
tm.assert_index_equal(result, expected)
21092104

21102105
# GH 10149
21112106
cases = [klass(second.values)
21122107
for klass in [np.array, Series, list]]
21132108
for case in cases:
2114-
if PY3:
2115-
with tm.assert_produces_warning(RuntimeWarning):
2116-
# unorderable types
2117-
result = first.union(case)
2118-
assert tm.equalContents(result, idx)
2119-
else:
21202109
result = first.union(case)
21212110
assert tm.equalContents(result, idx)
21222111

pandas/tests/series/test_operators.py

+5-21
Original file line numberDiff line numberDiff line change
@@ -431,11 +431,7 @@ def test_comparison_label_based(self):
431431
assert_series_equal(result, a[a])
432432

433433
for e in [Series(['z'])]:
434-
if compat.PY3:
435-
with tm.assert_produces_warning(RuntimeWarning):
436-
result = a[a | e]
437-
else:
438-
result = a[a | e]
434+
result = a[a | e]
439435
assert_series_equal(result, a[a])
440436

441437
# vs scalars
@@ -1472,24 +1468,12 @@ def test_operators_bitwise(self):
14721468
pytest.raises(TypeError, lambda: s_0123 & [0.1, 4, 3.14, 2])
14731469

14741470
# s_0123 will be all false now because of reindexing like s_tft
1475-
if compat.PY3:
1476-
# unable to sort incompatible object via .union.
1477-
exp = Series([False] * 7, index=['b', 'c', 'a', 0, 1, 2, 3])
1478-
with tm.assert_produces_warning(RuntimeWarning):
1479-
assert_series_equal(s_tft & s_0123, exp)
1480-
else:
1481-
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
1482-
assert_series_equal(s_tft & s_0123, exp)
1471+
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
1472+
assert_series_equal(s_tft & s_0123, exp)
14831473

14841474
# s_tft will be all false now because of reindexing like s_0123
1485-
if compat.PY3:
1486-
# unable to sort incompatible object via .union.
1487-
exp = Series([False] * 7, index=[0, 1, 2, 3, 'b', 'c', 'a'])
1488-
with tm.assert_produces_warning(RuntimeWarning):
1489-
assert_series_equal(s_0123 & s_tft, exp)
1490-
else:
1491-
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
1492-
assert_series_equal(s_0123 & s_tft, exp)
1475+
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
1476+
assert_series_equal(s_0123 & s_tft, exp)
14931477

14941478
assert_series_equal(s_0123 & False, Series([False] * 4))
14951479
assert_series_equal(s_0123 ^ False, Series([False, True, True, True]))

0 commit comments

Comments
 (0)