Skip to content

Commit 923b4d2

Browse files
committed
DEPR: Deprecate sort=None for union and implement sort=True
1 parent 0610a60 commit 923b4d2

File tree

3 files changed

+90
-71
lines changed

3 files changed

+90
-71
lines changed

pandas/core/indexes/base.py

+24-6
Original file line numberDiff line numberDiff line change
@@ -2247,9 +2247,10 @@ def _get_reconciled_name_object(self, other):
22472247
return self
22482248

22492249
def _validate_sort_keyword(self, sort):
2250-
if sort not in [None, False]:
2250+
if sort not in [None, True, False]:
22512251
raise ValueError("The 'sort' keyword only takes the values of "
2252-
"None or False; {0} was passed.".format(sort))
2252+
"None, True or False; {0} was "
2253+
"passed.".format(sort))
22532254

22542255
def union(self, other, sort=None):
22552256
"""
@@ -2268,6 +2269,12 @@ def union(self, other, sort=None):
22682269
3. Some values in `self` or `other` cannot be compared.
22692270
A RuntimeWarning is issued in this case.
22702271
2272+
.. deprecated:: 0.25.0
2273+
2274+
* True : Sort the result, except when some values in `self`
2275+
or `other` cannot be compared. A RuntimeWarning is issued
2276+
in this case
2277+
22712278
* False : do not sort the result.
22722279
22732280
.. versionadded:: 0.24.0
@@ -2293,11 +2300,22 @@ def union(self, other, sort=None):
22932300
self._assert_can_do_setop(other)
22942301
other = ensure_index(other)
22952302

2303+
if sort is None:
2304+
warnings.warn("sort='None' is deprecated, and will be "
2305+
"removed in a future version.",
2306+
FutureWarning, stacklevel=2)
2307+
22962308
if len(other) == 0 or self.equals(other):
2297-
return self._get_reconciled_name_object(other)
2309+
res = self._get_reconciled_name_object(other)
2310+
if sort:
2311+
res = res.sort_values()
2312+
return res
22982313

22992314
if len(self) == 0:
2300-
return other._get_reconciled_name_object(self)
2315+
res = other._get_reconciled_name_object(self)
2316+
if sort:
2317+
res = res.sort_values()
2318+
return res
23012319

23022320
# TODO: is_dtype_union_equal is a hack around
23032321
# 1. buggy set ops with duplicates (GH #13432)
@@ -2318,7 +2336,7 @@ def union(self, other, sort=None):
23182336
else:
23192337
rvals = other._values
23202338

2321-
if sort is None and self.is_monotonic and other.is_monotonic:
2339+
if sort is not False and self.is_monotonic and other.is_monotonic:
23222340
try:
23232341
result = self._outer_indexer(lvals, rvals)[0]
23242342
except TypeError:
@@ -2340,7 +2358,7 @@ def union(self, other, sort=None):
23402358
else:
23412359
result = lvals
23422360

2343-
if sort is None:
2361+
if sort is not False:
23442362
try:
23452363
result = sorting.safe_sort(result)
23462364
except TypeError as e:

pandas/tests/indexes/multi/test_set_ops.py

-10
Original file line numberDiff line numberDiff line change
@@ -360,13 +360,3 @@ def test_union_sort_other_incomparable_sort():
360360
idx = pd.MultiIndex.from_product([[1, pd.Timestamp('2000')], ['a', 'b']])
361361
with pytest.raises(TypeError, match='Cannot compare'):
362362
idx.union(idx[:1], sort=True)
363-
364-
365-
@pytest.mark.parametrize("method", ['union', 'intersection', 'difference',
366-
'symmetric_difference'])
367-
def test_setops_disallow_true(method):
368-
idx1 = pd.MultiIndex.from_product([['a', 'b'], [1, 2]])
369-
idx2 = pd.MultiIndex.from_product([['b', 'c'], [1, 2]])
370-
371-
with pytest.raises(ValueError, match="The 'sort' keyword only takes"):
372-
getattr(idx1, method)(idx2, sort=True)

pandas/tests/indexes/test_base.py

+66-55
Original file line numberDiff line numberDiff line change
@@ -790,32 +790,38 @@ def test_intersection_equal_sort_true(self):
790790
sorted_ = pd.Index(['a', 'b', 'c'])
791791
tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_)
792792

793-
@pytest.mark.parametrize("sort", [None, False])
793+
@pytest.mark.parametrize("sort", [None, True, False])
794794
def test_chained_union(self, sort):
795795
# Chained unions handles names correctly
796796
i1 = Index([1, 2], name='i1')
797797
i2 = Index([5, 6], name='i2')
798798
i3 = Index([3, 4], name='i3')
799-
union = i1.union(i2.union(i3, sort=sort), sort=sort)
800-
expected = i1.union(i2, sort=sort).union(i3, sort=sort)
799+
800+
warning = FutureWarning if sort is None else None
801+
with tm.assert_produces_warning(warning):
802+
union = i1.union(i2.union(i3, sort=sort), sort=sort)
803+
expected = i1.union(i2, sort=sort).union(i3, sort=sort)
801804
tm.assert_index_equal(union, expected)
802805

803806
j1 = Index([1, 2], name='j1')
804807
j2 = Index([], name='j2')
805808
j3 = Index([], name='j3')
806-
union = j1.union(j2.union(j3, sort=sort), sort=sort)
807-
expected = j1.union(j2, sort=sort).union(j3, sort=sort)
809+
with tm.assert_produces_warning(warning):
810+
union = j1.union(j2.union(j3, sort=sort), sort=sort)
811+
expected = j1.union(j2, sort=sort).union(j3, sort=sort)
808812
tm.assert_index_equal(union, expected)
809813

810-
@pytest.mark.parametrize("sort", [None, False])
814+
@pytest.mark.parametrize("sort", [None, True, False])
811815
def test_union(self, sort):
812816
# TODO: Replace with fixturesult
813817
first = self.strIndex[5:20]
814818
second = self.strIndex[:10]
815819
everything = self.strIndex[:20]
816820

817-
union = first.union(second, sort=sort)
818-
if sort is None:
821+
warning = FutureWarning if sort is None else None
822+
with tm.assert_produces_warning(warning):
823+
union = first.union(second, sort=sort)
824+
if sort is not False:
819825
tm.assert_index_equal(union, everything.sort_values())
820826
assert tm.equalContents(union, everything)
821827

@@ -826,21 +832,14 @@ def test_union_sort_other_special(self, slice_):
826832
idx = pd.Index([1, 0, 2])
827833
# default, sort=None
828834
other = idx[slice_]
829-
tm.assert_index_equal(idx.union(other), idx)
830-
tm.assert_index_equal(other.union(idx), idx)
835+
with tm.assert_produces_warning(FutureWarning):
836+
tm.assert_index_equal(idx.union(other), idx)
837+
tm.assert_index_equal(other.union(idx), idx)
831838

832839
# sort=False
833840
tm.assert_index_equal(idx.union(other, sort=False), idx)
834841

835-
@pytest.mark.xfail(reason="Not implemented")
836-
@pytest.mark.parametrize('slice_', [slice(None), slice(0)])
837-
def test_union_sort_special_true(self, slice_):
838-
# TODO decide on True behaviour
839842
# sort=True
840-
idx = pd.Index([1, 0, 2])
841-
# default, sort=None
842-
other = idx[slice_]
843-
844843
result = idx.union(other, sort=True)
845844
expected = pd.Index([0, 1, 2])
846845
tm.assert_index_equal(result, expected)
@@ -849,31 +848,29 @@ def test_union_sort_other_incomparable(self):
849848
# https://github.com/pandas-dev/pandas/issues/24959
850849
idx = pd.Index([1, pd.Timestamp('2000')])
851850
# default (sort=None)
852-
with tm.assert_produces_warning(RuntimeWarning):
851+
with tm.assert_produces_warning(RuntimeWarning,
852+
raise_on_extra_warnings=False):
853853
result = idx.union(idx[:1])
854-
855854
tm.assert_index_equal(result, idx)
856855

857856
# sort=None
858-
with tm.assert_produces_warning(RuntimeWarning):
857+
with tm.assert_produces_warning(RuntimeWarning,
858+
raise_on_extra_warnings=False):
859859
result = idx.union(idx[:1], sort=None)
860860
tm.assert_index_equal(result, idx)
861861

862+
# sort=True
863+
with tm.assert_produces_warning(RuntimeWarning):
864+
result = idx.union(idx[:1], sort=True)
865+
tm.assert_index_equal(result, idx)
866+
862867
# sort=False
863868
result = idx.union(idx[:1], sort=False)
864869
tm.assert_index_equal(result, idx)
865870

866-
@pytest.mark.xfail(reason="Not implemented")
867-
def test_union_sort_other_incomparable_true(self):
868-
# TODO decide on True behaviour
869-
# sort=True
870-
idx = pd.Index([1, pd.Timestamp('2000')])
871-
with pytest.raises(TypeError, match='.*'):
872-
idx.union(idx[:1], sort=True)
873-
874871
@pytest.mark.parametrize("klass", [
875872
np.array, Series, list])
876-
@pytest.mark.parametrize("sort", [None, False])
873+
@pytest.mark.parametrize("sort", [None, True, False])
877874
def test_union_from_iterables(self, klass, sort):
878875
# GH 10149
879876
# TODO: Replace with fixturesult
@@ -882,51 +879,68 @@ def test_union_from_iterables(self, klass, sort):
882879
everything = self.strIndex[:20]
883880

884881
case = klass(second.values)
885-
result = first.union(case, sort=sort)
886-
if sort is None:
882+
883+
warning = FutureWarning if sort is None else None
884+
with tm.assert_produces_warning(warning):
885+
result = first.union(case, sort=sort)
886+
887+
if sort is not False:
887888
tm.assert_index_equal(result, everything.sort_values())
888889
assert tm.equalContents(result, everything)
889890

890-
@pytest.mark.parametrize("sort", [None, False])
891+
@pytest.mark.parametrize("sort", [None, True, False])
891892
def test_union_identity(self, sort):
892893
# TODO: replace with fixturesult
893894
first = self.strIndex[5:20]
894895

895-
union = first.union(first, sort=sort)
896+
warning = FutureWarning if sort is None else None
897+
with tm.assert_produces_warning(warning):
898+
union = first.union(first, sort=sort)
899+
896900
# i.e. identity is not preserved when sort is True
897901
assert (union is first) is (not sort)
898902

899-
union = first.union([], sort=sort)
903+
with tm.assert_produces_warning(warning):
904+
union = first.union([], sort=sort)
900905
assert (union is first) is (not sort)
901906

902-
union = Index([]).union(first, sort=sort)
907+
with tm.assert_produces_warning(warning):
908+
union = Index([]).union(first, sort=sort)
903909
assert (union is first) is (not sort)
904910

905911
@pytest.mark.parametrize("first_list", [list('ba'), list()])
906912
@pytest.mark.parametrize("second_list", [list('ab'), list()])
907913
@pytest.mark.parametrize("first_name, second_name, expected_name", [
908914
('A', 'B', None), (None, 'B', None), ('A', None, None)])
909-
@pytest.mark.parametrize("sort", [None, False])
915+
@pytest.mark.parametrize("sort", [None, True, False])
910916
def test_union_name_preservation(self, first_list, second_list, first_name,
911917
second_name, expected_name, sort):
912918
first = Index(first_list, name=first_name)
913919
second = Index(second_list, name=second_name)
914-
union = first.union(second, sort=sort)
920+
921+
warning = FutureWarning if sort is None else None
922+
with tm.assert_produces_warning(warning):
923+
union = first.union(second, sort=sort)
915924

916925
vals = set(first_list).union(second_list)
917926

918927
if sort is None and len(first_list) > 0 and len(second_list) > 0:
919928
expected = Index(sorted(vals), name=expected_name)
920929
tm.assert_index_equal(union, expected)
930+
elif sort:
931+
expected = Index(sorted(vals), name=expected_name)
932+
tm.assert_index_equal(union, expected)
921933
else:
922934
expected = Index(vals, name=expected_name)
923935
assert tm.equalContents(union, expected)
924936

925-
@pytest.mark.parametrize("sort", [None, False])
937+
@pytest.mark.parametrize("sort", [None, True, False])
926938
def test_union_dt_as_obj(self, sort):
927939
# TODO: Replace with fixturesult
928-
firstCat = self.strIndex.union(self.dateIndex)
929-
secondCat = self.strIndex.union(self.strIndex)
940+
warning = FutureWarning if sort is None else None
941+
with tm.assert_produces_warning(warning, check_stacklevel=False):
942+
firstCat = self.strIndex.union(self.dateIndex, sort=sort)
943+
secondCat = self.strIndex.union(self.strIndex, sort=sort)
930944

931945
if self.dateIndex.dtype == np.object_:
932946
appended = np.append(self.strIndex, self.dateIndex)
@@ -939,15 +953,6 @@ def test_union_dt_as_obj(self, sort):
939953
tm.assert_contains_all(self.strIndex, secondCat)
940954
tm.assert_contains_all(self.dateIndex, firstCat)
941955

942-
@pytest.mark.parametrize("method", ['union', 'intersection', 'difference',
943-
'symmetric_difference'])
944-
def test_setops_disallow_true(self, method):
945-
idx1 = pd.Index(['a', 'b'])
946-
idx2 = pd.Index(['b', 'c'])
947-
948-
with pytest.raises(ValueError, match="The 'sort' keyword only takes"):
949-
getattr(idx1, method)(idx2, sort=True)
950-
951956
def test_map_identity_mapping(self):
952957
# GH 12766
953958
# TODO: replace with fixture
@@ -1714,7 +1719,9 @@ def test_tuple_union_bug(self, method, expected, sort):
17141719
(2, 'B'), (1, 'C'), (2, 'C')],
17151720
dtype=[('num', int), ('let', 'a1')]))
17161721

1717-
result = getattr(index1, method)(index2, sort=sort)
1722+
warning = FutureWarning if method == 'union' else None
1723+
with tm.assert_produces_warning(warning):
1724+
result = getattr(index1, method)(index2, sort=sort)
17181725
assert result.ndim == 1
17191726

17201727
expected = Index(expected)
@@ -1924,12 +1931,14 @@ def test_outer_join_sort(self):
19241931
left_index = Index(np.random.permutation(15))
19251932
right_index = tm.makeDateIndex(10)
19261933

1927-
with tm.assert_produces_warning(RuntimeWarning):
1934+
with tm.assert_produces_warning(RuntimeWarning,
1935+
raise_on_extra_warnings=False):
19281936
result = left_index.join(right_index, how='outer')
19291937

19301938
# right_index in this case because DatetimeIndex has join precedence
19311939
# over Int64Index
1932-
with tm.assert_produces_warning(RuntimeWarning):
1940+
with tm.assert_produces_warning(RuntimeWarning,
1941+
raise_on_extra_warnings=False):
19331942
expected = right_index.astype(object).union(
19341943
left_index.astype(object))
19351944

@@ -2240,7 +2249,8 @@ def test_union_base(self):
22402249
first = index[3:]
22412250
second = index[:5]
22422251

2243-
result = first.union(second)
2252+
with tm.assert_produces_warning(FutureWarning):
2253+
result = first.union(second)
22442254

22452255
expected = Index([0, 1, 2, 'a', 'b', 'c'])
22462256
tm.assert_index_equal(result, expected)
@@ -2253,7 +2263,8 @@ def test_union_different_type_base(self, klass):
22532263
first = index[3:]
22542264
second = index[:5]
22552265

2256-
result = first.union(klass(second.values))
2266+
with tm.assert_produces_warning(FutureWarning):
2267+
result = first.union(klass(second.values))
22572268

22582269
assert tm.equalContents(result, index)
22592270

0 commit comments

Comments
 (0)