Skip to content

Commit baef416

Browse files
committed
DEPR: Deprecate sort=None for union and implement sort=True
1 parent b115a6b commit baef416

File tree

3 files changed

+85
-70
lines changed

3 files changed

+85
-70
lines changed

pandas/core/indexes/base.py

+19-6
Original file line numberDiff line numberDiff line change
@@ -2324,9 +2324,10 @@ def _is_compatible_with_other(self, other):
23242324
and is_dtype_equal(self.dtype, other.dtype))
23252325

23262326
def _validate_sort_keyword(self, sort):
2327-
if sort not in [None, False]:
2327+
if sort not in [None, True, False]:
23282328
raise ValueError("The 'sort' keyword only takes the values of "
2329-
"None or False; {0} was passed.".format(sort))
2329+
"None, True or False; {0} was "
2330+
"passed.".format(sort))
23302331

23312332
def union(self, other, sort=None):
23322333
"""
@@ -2350,6 +2351,12 @@ def union(self, other, sort=None):
23502351
3. Some values in `self` or `other` cannot be compared.
23512352
A RuntimeWarning is issued in this case.
23522353
2354+
.. deprecated:: 0.25.0
2355+
2356+
* True : Sort the result, except when some values in `self`
2357+
or `other` cannot be compared. A RuntimeWarning is issued
2358+
in this case
2359+
23532360
* False : do not sort the result.
23542361
23552362
.. versionadded:: 0.24.0
@@ -2409,10 +2416,16 @@ def _union(self, other, sort):
24092416
"""
24102417

24112418
if not len(other) or self.equals(other):
2412-
return self._get_reconciled_name_object(other)
2419+
res = self._get_reconciled_name_object(other)
2420+
if sort:
2421+
res = res.sort_values()
2422+
return res
24132423

24142424
if not len(self):
2415-
return other._get_reconciled_name_object(self)
2425+
res = other._get_reconciled_name_object(self)
2426+
if sort:
2427+
res = res.sort_values()
2428+
return res
24162429

24172430
# TODO(EA): setops-refactor, clean all this up
24182431
if is_period_dtype(self) or is_datetime64tz_dtype(self):
@@ -2424,7 +2437,7 @@ def _union(self, other, sort):
24242437
else:
24252438
rvals = other._values
24262439

2427-
if sort is None and self.is_monotonic and other.is_monotonic:
2440+
if sort is not False and self.is_monotonic and other.is_monotonic:
24282441
try:
24292442
result = self._outer_indexer(lvals, rvals)[0]
24302443
except TypeError:
@@ -2446,7 +2459,7 @@ def _union(self, other, sort):
24462459
else:
24472460
result = lvals
24482461

2449-
if sort is None:
2462+
if sort is not False:
24502463
try:
24512464
result = sorting.safe_sort(result)
24522465
except TypeError as e:

pandas/tests/indexes/multi/test_set_ops.py

-10
Original file line numberDiff line numberDiff line change
@@ -358,13 +358,3 @@ def test_union_sort_other_incomparable_sort():
358358
idx = pd.MultiIndex.from_product([[1, pd.Timestamp('2000')], ['a', 'b']])
359359
with pytest.raises(TypeError, match='Cannot compare'):
360360
idx.union(idx[:1], sort=True)
361-
362-
363-
@pytest.mark.parametrize("method", ['union', 'intersection', 'difference',
364-
'symmetric_difference'])
365-
def test_setops_disallow_true(method):
366-
idx1 = pd.MultiIndex.from_product([['a', 'b'], [1, 2]])
367-
idx2 = pd.MultiIndex.from_product([['b', 'c'], [1, 2]])
368-
369-
with pytest.raises(ValueError, match="The 'sort' keyword only takes"):
370-
getattr(idx1, method)(idx2, sort=True)

pandas/tests/indexes/test_base.py

+66-54
Original file line numberDiff line numberDiff line change
@@ -783,32 +783,38 @@ def test_intersection_equal_sort_true(self):
783783
sorted_ = pd.Index(['a', 'b', 'c'])
784784
tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_)
785785

786-
@pytest.mark.parametrize("sort", [None, False])
786+
@pytest.mark.parametrize("sort", [None, True, False])
787787
def test_chained_union(self, sort):
788788
# Chained unions handles names correctly
789789
i1 = Index([1, 2], name='i1')
790790
i2 = Index([5, 6], name='i2')
791791
i3 = Index([3, 4], name='i3')
792-
union = i1.union(i2.union(i3, sort=sort), sort=sort)
793-
expected = i1.union(i2, sort=sort).union(i3, sort=sort)
792+
793+
warning = FutureWarning if sort is None else None
794+
with tm.assert_produces_warning(warning):
795+
union = i1.union(i2.union(i3, sort=sort), sort=sort)
796+
expected = i1.union(i2, sort=sort).union(i3, sort=sort)
794797
tm.assert_index_equal(union, expected)
795798

796799
j1 = Index([1, 2], name='j1')
797800
j2 = Index([], name='j2')
798801
j3 = Index([], name='j3')
799-
union = j1.union(j2.union(j3, sort=sort), sort=sort)
800-
expected = j1.union(j2, sort=sort).union(j3, sort=sort)
802+
with tm.assert_produces_warning(warning):
803+
union = j1.union(j2.union(j3, sort=sort), sort=sort)
804+
expected = j1.union(j2, sort=sort).union(j3, sort=sort)
801805
tm.assert_index_equal(union, expected)
802806

803-
@pytest.mark.parametrize("sort", [None, False])
807+
@pytest.mark.parametrize("sort", [None, True, False])
804808
def test_union(self, sort):
805809
# TODO: Replace with fixturesult
806810
first = self.strIndex[5:20]
807811
second = self.strIndex[:10]
808812
everything = self.strIndex[:20]
809813

810-
union = first.union(second, sort=sort)
811-
if sort is None:
814+
warning = FutureWarning if sort is None else None
815+
with tm.assert_produces_warning(warning):
816+
union = first.union(second, sort=sort)
817+
if sort is not False:
812818
tm.assert_index_equal(union, everything.sort_values())
813819
assert tm.equalContents(union, everything)
814820

@@ -819,21 +825,14 @@ def test_union_sort_other_special(self, slice_):
819825
idx = pd.Index([1, 0, 2])
820826
# default, sort=None
821827
other = idx[slice_]
822-
tm.assert_index_equal(idx.union(other), idx)
823-
tm.assert_index_equal(other.union(idx), idx)
828+
with tm.assert_produces_warning(FutureWarning):
829+
tm.assert_index_equal(idx.union(other), idx)
830+
tm.assert_index_equal(other.union(idx), idx)
824831

825832
# sort=False
826833
tm.assert_index_equal(idx.union(other, sort=False), idx)
827834

828-
@pytest.mark.xfail(reason="Not implemented")
829-
@pytest.mark.parametrize('slice_', [slice(None), slice(0)])
830-
def test_union_sort_special_true(self, slice_):
831-
# TODO decide on True behaviour
832835
# sort=True
833-
idx = pd.Index([1, 0, 2])
834-
# default, sort=None
835-
other = idx[slice_]
836-
837836
result = idx.union(other, sort=True)
838837
expected = pd.Index([0, 1, 2])
839838
tm.assert_index_equal(result, expected)
@@ -842,31 +841,29 @@ def test_union_sort_other_incomparable(self):
842841
# https://github.com/pandas-dev/pandas/issues/24959
843842
idx = pd.Index([1, pd.Timestamp('2000')])
844843
# default (sort=None)
845-
with tm.assert_produces_warning(RuntimeWarning):
844+
with tm.assert_produces_warning(RuntimeWarning,
845+
raise_on_extra_warnings=False):
846846
result = idx.union(idx[:1])
847-
848847
tm.assert_index_equal(result, idx)
849848

850849
# sort=None
851-
with tm.assert_produces_warning(RuntimeWarning):
850+
with tm.assert_produces_warning(RuntimeWarning,
851+
raise_on_extra_warnings=False):
852852
result = idx.union(idx[:1], sort=None)
853853
tm.assert_index_equal(result, idx)
854854

855+
# sort=True
856+
with tm.assert_produces_warning(RuntimeWarning):
857+
result = idx.union(idx[:1], sort=True)
858+
tm.assert_index_equal(result, idx)
859+
855860
# sort=False
856861
result = idx.union(idx[:1], sort=False)
857862
tm.assert_index_equal(result, idx)
858863

859-
@pytest.mark.xfail(reason="Not implemented")
860-
def test_union_sort_other_incomparable_true(self):
861-
# TODO decide on True behaviour
862-
# sort=True
863-
idx = pd.Index([1, pd.Timestamp('2000')])
864-
with pytest.raises(TypeError, match='.*'):
865-
idx.union(idx[:1], sort=True)
866-
867864
@pytest.mark.parametrize("klass", [
868865
np.array, Series, list])
869-
@pytest.mark.parametrize("sort", [None, False])
866+
@pytest.mark.parametrize("sort", [None, True, False])
870867
def test_union_from_iterables(self, klass, sort):
871868
# GH 10149
872869
# TODO: Replace with fixturesult
@@ -875,53 +872,71 @@ def test_union_from_iterables(self, klass, sort):
875872
everything = self.strIndex[:20]
876873

877874
case = klass(second.values)
878-
result = first.union(case, sort=sort)
879-
if sort is None:
875+
876+
warning = FutureWarning if sort is None else None
877+
with tm.assert_produces_warning(warning):
878+
result = first.union(case, sort=sort)
879+
880+
if sort is not False:
880881
tm.assert_index_equal(result, everything.sort_values())
881882
assert tm.equalContents(result, everything)
882883

883-
@pytest.mark.parametrize("sort", [None, False])
884+
@pytest.mark.parametrize("sort", [None, True, False])
884885
def test_union_identity(self, sort):
885886
# TODO: replace with fixturesult
886887
first = self.strIndex[5:20]
887888

888-
union = first.union(first, sort=sort)
889+
warning = FutureWarning if sort is None else None
890+
with tm.assert_produces_warning(warning):
891+
union = first.union(first, sort=sort)
892+
889893
# i.e. identity is not preserved when sort is True
890894
assert (union is first) is (not sort)
891895

892896
# This should no longer be the same object, since [] is not consistent,
893897
# both objects will be recast to dtype('O')
894898
union = first.union([], sort=sort)
899+
with tm.assert_produces_warning(warning):
900+
union = first.union([], sort=sort)
895901
assert (union is first) is (not sort)
896902

897-
union = Index([]).union(first, sort=sort)
903+
with tm.assert_produces_warning(warning):
904+
union = Index([]).union(first, sort=sort)
898905
assert (union is first) is (not sort)
899906

900907
@pytest.mark.parametrize("first_list", [list('ba'), list()])
901908
@pytest.mark.parametrize("second_list", [list('ab'), list()])
902909
@pytest.mark.parametrize("first_name, second_name, expected_name", [
903910
('A', 'B', None), (None, 'B', None), ('A', None, None)])
904-
@pytest.mark.parametrize("sort", [None, False])
911+
@pytest.mark.parametrize("sort", [None, True, False])
905912
def test_union_name_preservation(self, first_list, second_list, first_name,
906913
second_name, expected_name, sort):
907914
first = Index(first_list, name=first_name)
908915
second = Index(second_list, name=second_name)
909-
union = first.union(second, sort=sort)
916+
917+
warning = FutureWarning if sort is None else None
918+
with tm.assert_produces_warning(warning):
919+
union = first.union(second, sort=sort)
910920

911921
vals = set(first_list).union(second_list)
912922

913923
if sort is None and len(first_list) > 0 and len(second_list) > 0:
914924
expected = Index(sorted(vals), name=expected_name)
915925
tm.assert_index_equal(union, expected)
926+
elif sort:
927+
expected = Index(sorted(vals), name=expected_name)
928+
tm.assert_index_equal(union, expected)
916929
else:
917930
expected = Index(vals, name=expected_name)
918931
assert tm.equalContents(union, expected)
919932

920-
@pytest.mark.parametrize("sort", [None, False])
933+
@pytest.mark.parametrize("sort", [None, True, False])
921934
def test_union_dt_as_obj(self, sort):
922935
# TODO: Replace with fixturesult
923-
firstCat = self.strIndex.union(self.dateIndex)
924-
secondCat = self.strIndex.union(self.strIndex)
936+
warning = FutureWarning if sort is None else None
937+
with tm.assert_produces_warning(warning, check_stacklevel=False):
938+
firstCat = self.strIndex.union(self.dateIndex, sort=sort)
939+
secondCat = self.strIndex.union(self.strIndex, sort=sort)
925940

926941
if self.dateIndex.dtype == np.object_:
927942
appended = np.append(self.strIndex, self.dateIndex)
@@ -934,15 +949,6 @@ def test_union_dt_as_obj(self, sort):
934949
tm.assert_contains_all(self.strIndex, secondCat)
935950
tm.assert_contains_all(self.dateIndex, firstCat)
936951

937-
@pytest.mark.parametrize("method", ['union', 'intersection', 'difference',
938-
'symmetric_difference'])
939-
def test_setops_disallow_true(self, method):
940-
idx1 = pd.Index(['a', 'b'])
941-
idx2 = pd.Index(['b', 'c'])
942-
943-
with pytest.raises(ValueError, match="The 'sort' keyword only takes"):
944-
getattr(idx1, method)(idx2, sort=True)
945-
946952
def test_map_identity_mapping(self):
947953
# GH 12766
948954
# TODO: replace with fixture
@@ -1712,7 +1718,9 @@ def test_tuple_union_bug(self, method, expected, sort):
17121718
(2, 'B'), (1, 'C'), (2, 'C')],
17131719
dtype=[('num', int), ('let', 'a1')]))
17141720

1715-
result = getattr(index1, method)(index2, sort=sort)
1721+
warning = FutureWarning if method == 'union' else None
1722+
with tm.assert_produces_warning(warning):
1723+
result = getattr(index1, method)(index2, sort=sort)
17161724
assert result.ndim == 1
17171725

17181726
expected = Index(expected)
@@ -1923,12 +1931,14 @@ def test_outer_join_sort(self):
19231931
left_index = Index(np.random.permutation(15))
19241932
right_index = tm.makeDateIndex(10)
19251933

1926-
with tm.assert_produces_warning(RuntimeWarning):
1934+
with tm.assert_produces_warning(RuntimeWarning,
1935+
raise_on_extra_warnings=False):
19271936
result = left_index.join(right_index, how='outer')
19281937

19291938
# right_index in this case because DatetimeIndex has join precedence
19301939
# over Int64Index
1931-
with tm.assert_produces_warning(RuntimeWarning):
1940+
with tm.assert_produces_warning(RuntimeWarning,
1941+
raise_on_extra_warnings=False):
19321942
expected = right_index.astype(object).union(
19331943
left_index.astype(object))
19341944

@@ -2239,7 +2249,8 @@ def test_union_base(self):
22392249
first = index[3:]
22402250
second = index[:5]
22412251

2242-
result = first.union(second)
2252+
with tm.assert_produces_warning(FutureWarning):
2253+
result = first.union(second)
22432254

22442255
expected = Index([0, 1, 2, 'a', 'b', 'c'])
22452256
tm.assert_index_equal(result, expected)
@@ -2252,7 +2263,8 @@ def test_union_different_type_base(self, klass):
22522263
first = index[3:]
22532264
second = index[:5]
22542265

2255-
result = first.union(klass(second.values))
2266+
with tm.assert_produces_warning(FutureWarning):
2267+
result = first.union(klass(second.values))
22562268

22572269
assert tm.equalContents(result, index)
22582270

0 commit comments

Comments
 (0)