Skip to content

Commit f0f7d2b

Browse files
committed
DEPR: Deprecate sort=None for union and implement sort=True
1 parent 4e4f5bd commit f0f7d2b

File tree

3 files changed

+90
-71
lines changed

3 files changed

+90
-71
lines changed

pandas/core/indexes/base.py

+24-6
Original file line numberDiff line numberDiff line change
@@ -2263,9 +2263,10 @@ def _get_reconciled_name_object(self, other):
22632263
return self
22642264

22652265
def _validate_sort_keyword(self, sort):
2266-
if sort not in [None, False]:
2266+
if sort not in [None, True, False]:
22672267
raise ValueError("The 'sort' keyword only takes the values of "
2268-
"None or False; {0} was passed.".format(sort))
2268+
"None, True or False; {0} was "
2269+
"passed.".format(sort))
22692270

22702271
def union(self, other, sort=None):
22712272
"""
@@ -2284,6 +2285,12 @@ def union(self, other, sort=None):
22842285
3. Some values in `self` or `other` cannot be compared.
22852286
A RuntimeWarning is issued in this case.
22862287
2288+
.. deprecated:: 0.25.0
2289+
2290+
* True : Sort the result, except when some values in `self`
2291+
or `other` cannot be compared. A RuntimeWarning is issued
2292+
in this case
2293+
22872294
* False : do not sort the result.
22882295
22892296
.. versionadded:: 0.24.0
@@ -2309,11 +2316,22 @@ def union(self, other, sort=None):
23092316
self._assert_can_do_setop(other)
23102317
other = ensure_index(other)
23112318

2319+
if sort is None:
2320+
warnings.warn("sort='None' is deprecated, and will be "
2321+
"removed in a future version.",
2322+
FutureWarning, stacklevel=2)
2323+
23122324
if len(other) == 0 or self.equals(other):
2313-
return self._get_reconciled_name_object(other)
2325+
res = self._get_reconciled_name_object(other)
2326+
if sort:
2327+
res = res.sort_values()
2328+
return res
23142329

23152330
if len(self) == 0:
2316-
return other._get_reconciled_name_object(self)
2331+
res = other._get_reconciled_name_object(self)
2332+
if sort:
2333+
res = res.sort_values()
2334+
return res
23172335

23182336
# TODO: is_dtype_union_equal is a hack around
23192337
# 1. buggy set ops with duplicates (GH #13432)
@@ -2334,7 +2352,7 @@ def union(self, other, sort=None):
23342352
else:
23352353
rvals = other._values
23362354

2337-
if sort is None and self.is_monotonic and other.is_monotonic:
2355+
if sort is not False and self.is_monotonic and other.is_monotonic:
23382356
try:
23392357
result = self._outer_indexer(lvals, rvals)[0]
23402358
except TypeError:
@@ -2356,7 +2374,7 @@ def union(self, other, sort=None):
23562374
else:
23572375
result = lvals
23582376

2359-
if sort is None:
2377+
if sort is not False:
23602378
try:
23612379
result = sorting.safe_sort(result)
23622380
except TypeError as e:

pandas/tests/indexes/multi/test_set_ops.py

-10
Original file line numberDiff line numberDiff line change
@@ -358,13 +358,3 @@ def test_union_sort_other_incomparable_sort():
358358
idx = pd.MultiIndex.from_product([[1, pd.Timestamp('2000')], ['a', 'b']])
359359
with pytest.raises(TypeError, match='Cannot compare'):
360360
idx.union(idx[:1], sort=True)
361-
362-
363-
@pytest.mark.parametrize("method", ['union', 'intersection', 'difference',
364-
'symmetric_difference'])
365-
def test_setops_disallow_true(method):
366-
idx1 = pd.MultiIndex.from_product([['a', 'b'], [1, 2]])
367-
idx2 = pd.MultiIndex.from_product([['b', 'c'], [1, 2]])
368-
369-
with pytest.raises(ValueError, match="The 'sort' keyword only takes"):
370-
getattr(idx1, method)(idx2, sort=True)

pandas/tests/indexes/test_base.py

+66-55
Original file line numberDiff line numberDiff line change
@@ -783,32 +783,38 @@ def test_intersection_equal_sort_true(self):
783783
sorted_ = pd.Index(['a', 'b', 'c'])
784784
tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_)
785785

786-
@pytest.mark.parametrize("sort", [None, False])
786+
@pytest.mark.parametrize("sort", [None, True, False])
787787
def test_chained_union(self, sort):
788788
# Chained unions handles names correctly
789789
i1 = Index([1, 2], name='i1')
790790
i2 = Index([5, 6], name='i2')
791791
i3 = Index([3, 4], name='i3')
792-
union = i1.union(i2.union(i3, sort=sort), sort=sort)
793-
expected = i1.union(i2, sort=sort).union(i3, sort=sort)
792+
793+
warning = FutureWarning if sort is None else None
794+
with tm.assert_produces_warning(warning):
795+
union = i1.union(i2.union(i3, sort=sort), sort=sort)
796+
expected = i1.union(i2, sort=sort).union(i3, sort=sort)
794797
tm.assert_index_equal(union, expected)
795798

796799
j1 = Index([1, 2], name='j1')
797800
j2 = Index([], name='j2')
798801
j3 = Index([], name='j3')
799-
union = j1.union(j2.union(j3, sort=sort), sort=sort)
800-
expected = j1.union(j2, sort=sort).union(j3, sort=sort)
802+
with tm.assert_produces_warning(warning):
803+
union = j1.union(j2.union(j3, sort=sort), sort=sort)
804+
expected = j1.union(j2, sort=sort).union(j3, sort=sort)
801805
tm.assert_index_equal(union, expected)
802806

803-
@pytest.mark.parametrize("sort", [None, False])
807+
@pytest.mark.parametrize("sort", [None, True, False])
804808
def test_union(self, sort):
805809
# TODO: Replace with fixturesult
806810
first = self.strIndex[5:20]
807811
second = self.strIndex[:10]
808812
everything = self.strIndex[:20]
809813

810-
union = first.union(second, sort=sort)
811-
if sort is None:
814+
warning = FutureWarning if sort is None else None
815+
with tm.assert_produces_warning(warning):
816+
union = first.union(second, sort=sort)
817+
if sort is not False:
812818
tm.assert_index_equal(union, everything.sort_values())
813819
assert tm.equalContents(union, everything)
814820

@@ -819,21 +825,14 @@ def test_union_sort_other_special(self, slice_):
819825
idx = pd.Index([1, 0, 2])
820826
# default, sort=None
821827
other = idx[slice_]
822-
tm.assert_index_equal(idx.union(other), idx)
823-
tm.assert_index_equal(other.union(idx), idx)
828+
with tm.assert_produces_warning(FutureWarning):
829+
tm.assert_index_equal(idx.union(other), idx)
830+
tm.assert_index_equal(other.union(idx), idx)
824831

825832
# sort=False
826833
tm.assert_index_equal(idx.union(other, sort=False), idx)
827834

828-
@pytest.mark.xfail(reason="Not implemented")
829-
@pytest.mark.parametrize('slice_', [slice(None), slice(0)])
830-
def test_union_sort_special_true(self, slice_):
831-
# TODO decide on True behaviour
832835
# sort=True
833-
idx = pd.Index([1, 0, 2])
834-
# default, sort=None
835-
other = idx[slice_]
836-
837836
result = idx.union(other, sort=True)
838837
expected = pd.Index([0, 1, 2])
839838
tm.assert_index_equal(result, expected)
@@ -842,31 +841,29 @@ def test_union_sort_other_incomparable(self):
842841
# https://github.com/pandas-dev/pandas/issues/24959
843842
idx = pd.Index([1, pd.Timestamp('2000')])
844843
# default (sort=None)
845-
with tm.assert_produces_warning(RuntimeWarning):
844+
with tm.assert_produces_warning(RuntimeWarning,
845+
raise_on_extra_warnings=False):
846846
result = idx.union(idx[:1])
847-
848847
tm.assert_index_equal(result, idx)
849848

850849
# sort=None
851-
with tm.assert_produces_warning(RuntimeWarning):
850+
with tm.assert_produces_warning(RuntimeWarning,
851+
raise_on_extra_warnings=False):
852852
result = idx.union(idx[:1], sort=None)
853853
tm.assert_index_equal(result, idx)
854854

855+
# sort=True
856+
with tm.assert_produces_warning(RuntimeWarning):
857+
result = idx.union(idx[:1], sort=True)
858+
tm.assert_index_equal(result, idx)
859+
855860
# sort=False
856861
result = idx.union(idx[:1], sort=False)
857862
tm.assert_index_equal(result, idx)
858863

859-
@pytest.mark.xfail(reason="Not implemented")
860-
def test_union_sort_other_incomparable_true(self):
861-
# TODO decide on True behaviour
862-
# sort=True
863-
idx = pd.Index([1, pd.Timestamp('2000')])
864-
with pytest.raises(TypeError, match='.*'):
865-
idx.union(idx[:1], sort=True)
866-
867864
@pytest.mark.parametrize("klass", [
868865
np.array, Series, list])
869-
@pytest.mark.parametrize("sort", [None, False])
866+
@pytest.mark.parametrize("sort", [None, True, False])
870867
def test_union_from_iterables(self, klass, sort):
871868
# GH 10149
872869
# TODO: Replace with fixturesult
@@ -875,51 +872,68 @@ def test_union_from_iterables(self, klass, sort):
875872
everything = self.strIndex[:20]
876873

877874
case = klass(second.values)
878-
result = first.union(case, sort=sort)
879-
if sort is None:
875+
876+
warning = FutureWarning if sort is None else None
877+
with tm.assert_produces_warning(warning):
878+
result = first.union(case, sort=sort)
879+
880+
if sort is not False:
880881
tm.assert_index_equal(result, everything.sort_values())
881882
assert tm.equalContents(result, everything)
882883

883-
@pytest.mark.parametrize("sort", [None, False])
884+
@pytest.mark.parametrize("sort", [None, True, False])
884885
def test_union_identity(self, sort):
885886
# TODO: replace with fixturesult
886887
first = self.strIndex[5:20]
887888

888-
union = first.union(first, sort=sort)
889+
warning = FutureWarning if sort is None else None
890+
with tm.assert_produces_warning(warning):
891+
union = first.union(first, sort=sort)
892+
889893
# i.e. identity is not preserved when sort is True
890894
assert (union is first) is (not sort)
891895

892-
union = first.union([], sort=sort)
896+
with tm.assert_produces_warning(warning):
897+
union = first.union([], sort=sort)
893898
assert (union is first) is (not sort)
894899

895-
union = Index([]).union(first, sort=sort)
900+
with tm.assert_produces_warning(warning):
901+
union = Index([]).union(first, sort=sort)
896902
assert (union is first) is (not sort)
897903

898904
@pytest.mark.parametrize("first_list", [list('ba'), list()])
899905
@pytest.mark.parametrize("second_list", [list('ab'), list()])
900906
@pytest.mark.parametrize("first_name, second_name, expected_name", [
901907
('A', 'B', None), (None, 'B', None), ('A', None, None)])
902-
@pytest.mark.parametrize("sort", [None, False])
908+
@pytest.mark.parametrize("sort", [None, True, False])
903909
def test_union_name_preservation(self, first_list, second_list, first_name,
904910
second_name, expected_name, sort):
905911
first = Index(first_list, name=first_name)
906912
second = Index(second_list, name=second_name)
907-
union = first.union(second, sort=sort)
913+
914+
warning = FutureWarning if sort is None else None
915+
with tm.assert_produces_warning(warning):
916+
union = first.union(second, sort=sort)
908917

909918
vals = set(first_list).union(second_list)
910919

911920
if sort is None and len(first_list) > 0 and len(second_list) > 0:
912921
expected = Index(sorted(vals), name=expected_name)
913922
tm.assert_index_equal(union, expected)
923+
elif sort:
924+
expected = Index(sorted(vals), name=expected_name)
925+
tm.assert_index_equal(union, expected)
914926
else:
915927
expected = Index(vals, name=expected_name)
916928
assert tm.equalContents(union, expected)
917929

918-
@pytest.mark.parametrize("sort", [None, False])
930+
@pytest.mark.parametrize("sort", [None, True, False])
919931
def test_union_dt_as_obj(self, sort):
920932
# TODO: Replace with fixturesult
921-
firstCat = self.strIndex.union(self.dateIndex)
922-
secondCat = self.strIndex.union(self.strIndex)
933+
warning = FutureWarning if sort is None else None
934+
with tm.assert_produces_warning(warning, check_stacklevel=False):
935+
firstCat = self.strIndex.union(self.dateIndex, sort=sort)
936+
secondCat = self.strIndex.union(self.strIndex, sort=sort)
923937

924938
if self.dateIndex.dtype == np.object_:
925939
appended = np.append(self.strIndex, self.dateIndex)
@@ -932,15 +946,6 @@ def test_union_dt_as_obj(self, sort):
932946
tm.assert_contains_all(self.strIndex, secondCat)
933947
tm.assert_contains_all(self.dateIndex, firstCat)
934948

935-
@pytest.mark.parametrize("method", ['union', 'intersection', 'difference',
936-
'symmetric_difference'])
937-
def test_setops_disallow_true(self, method):
938-
idx1 = pd.Index(['a', 'b'])
939-
idx2 = pd.Index(['b', 'c'])
940-
941-
with pytest.raises(ValueError, match="The 'sort' keyword only takes"):
942-
getattr(idx1, method)(idx2, sort=True)
943-
944949
def test_map_identity_mapping(self):
945950
# GH 12766
946951
# TODO: replace with fixture
@@ -1707,7 +1712,9 @@ def test_tuple_union_bug(self, method, expected, sort):
17071712
(2, 'B'), (1, 'C'), (2, 'C')],
17081713
dtype=[('num', int), ('let', 'a1')]))
17091714

1710-
result = getattr(index1, method)(index2, sort=sort)
1715+
warning = FutureWarning if method == 'union' else None
1716+
with tm.assert_produces_warning(warning):
1717+
result = getattr(index1, method)(index2, sort=sort)
17111718
assert result.ndim == 1
17121719

17131720
expected = Index(expected)
@@ -1917,12 +1924,14 @@ def test_outer_join_sort(self):
19171924
left_index = Index(np.random.permutation(15))
19181925
right_index = tm.makeDateIndex(10)
19191926

1920-
with tm.assert_produces_warning(RuntimeWarning):
1927+
with tm.assert_produces_warning(RuntimeWarning,
1928+
raise_on_extra_warnings=False):
19211929
result = left_index.join(right_index, how='outer')
19221930

19231931
# right_index in this case because DatetimeIndex has join precedence
19241932
# over Int64Index
1925-
with tm.assert_produces_warning(RuntimeWarning):
1933+
with tm.assert_produces_warning(RuntimeWarning,
1934+
raise_on_extra_warnings=False):
19261935
expected = right_index.astype(object).union(
19271936
left_index.astype(object))
19281937

@@ -2233,7 +2242,8 @@ def test_union_base(self):
22332242
first = index[3:]
22342243
second = index[:5]
22352244

2236-
result = first.union(second)
2245+
with tm.assert_produces_warning(FutureWarning):
2246+
result = first.union(second)
22372247

22382248
expected = Index([0, 1, 2, 'a', 'b', 'c'])
22392249
tm.assert_index_equal(result, expected)
@@ -2246,7 +2256,8 @@ def test_union_different_type_base(self, klass):
22462256
first = index[3:]
22472257
second = index[:5]
22482258

2249-
result = first.union(klass(second.values))
2259+
with tm.assert_produces_warning(FutureWarning):
2260+
result = first.union(klass(second.values))
22502261

22512262
assert tm.equalContents(result, index)
22522263

0 commit comments

Comments
 (0)