Skip to content

Commit 5d14833

Browse files
committed
DEPR: Deprecate sort=None for union and implement sort=True
1 parent 3937fbc commit 5d14833

File tree

3 files changed

+85
-70
lines changed

3 files changed

+85
-70
lines changed

pandas/core/indexes/base.py

+19-6
Original file line numberDiff line numberDiff line change
@@ -2304,9 +2304,10 @@ def _is_compatible_with_other(self, other):
23042304
and is_dtype_equal(self.dtype, other.dtype))
23052305

23062306
def _validate_sort_keyword(self, sort):
2307-
if sort not in [None, False]:
2307+
if sort not in [None, True, False]:
23082308
raise ValueError("The 'sort' keyword only takes the values of "
2309-
"None or False; {0} was passed.".format(sort))
2309+
"None, True or False; {0} was "
2310+
"passed.".format(sort))
23102311

23112312
def union(self, other, sort=None):
23122313
"""
@@ -2330,6 +2331,12 @@ def union(self, other, sort=None):
23302331
3. Some values in `self` or `other` cannot be compared.
23312332
A RuntimeWarning is issued in this case.
23322333
2334+
.. deprecated:: 0.25.0
2335+
2336+
* True : Sort the result, except when some values in `self`
2337+
or `other` cannot be compared. A RuntimeWarning is issued
2338+
in this case
2339+
23332340
* False : do not sort the result.
23342341
23352342
.. versionadded:: 0.24.0
@@ -2389,10 +2396,16 @@ def _union(self, other, sort):
23892396
"""
23902397

23912398
if not len(other) or self.equals(other):
2392-
return self._get_reconciled_name_object(other)
2399+
res = self._get_reconciled_name_object(other)
2400+
if sort:
2401+
res = res.sort_values()
2402+
return res
23932403

23942404
if not len(self):
2395-
return other._get_reconciled_name_object(self)
2405+
res = other._get_reconciled_name_object(self)
2406+
if sort:
2407+
res = res.sort_values()
2408+
return res
23962409

23972410
# TODO(EA): setops-refactor, clean all this up
23982411
if is_period_dtype(self) or is_datetime64tz_dtype(self):
@@ -2404,7 +2417,7 @@ def _union(self, other, sort):
24042417
else:
24052418
rvals = other._values
24062419

2407-
if sort is None and self.is_monotonic and other.is_monotonic:
2420+
if sort is not False and self.is_monotonic and other.is_monotonic:
24082421
try:
24092422
result = self._outer_indexer(lvals, rvals)[0]
24102423
except TypeError:
@@ -2426,7 +2439,7 @@ def _union(self, other, sort):
24262439
else:
24272440
result = lvals
24282441

2429-
if sort is None:
2442+
if sort is not False:
24302443
try:
24312444
result = sorting.safe_sort(result)
24322445
except TypeError as e:

pandas/tests/indexes/multi/test_set_ops.py

-10
Original file line numberDiff line numberDiff line change
@@ -358,13 +358,3 @@ def test_union_sort_other_incomparable_sort():
358358
idx = pd.MultiIndex.from_product([[1, pd.Timestamp('2000')], ['a', 'b']])
359359
with pytest.raises(TypeError, match='Cannot compare'):
360360
idx.union(idx[:1], sort=True)
361-
362-
363-
@pytest.mark.parametrize("method", ['union', 'intersection', 'difference',
364-
'symmetric_difference'])
365-
def test_setops_disallow_true(method):
366-
idx1 = pd.MultiIndex.from_product([['a', 'b'], [1, 2]])
367-
idx2 = pd.MultiIndex.from_product([['b', 'c'], [1, 2]])
368-
369-
with pytest.raises(ValueError, match="The 'sort' keyword only takes"):
370-
getattr(idx1, method)(idx2, sort=True)

pandas/tests/indexes/test_base.py

+66-54
Original file line numberDiff line numberDiff line change
@@ -783,32 +783,38 @@ def test_intersection_equal_sort_true(self):
783783
sorted_ = pd.Index(['a', 'b', 'c'])
784784
tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_)
785785

786-
@pytest.mark.parametrize("sort", [None, False])
786+
@pytest.mark.parametrize("sort", [None, True, False])
787787
def test_chained_union(self, sort):
788788
# Chained unions handles names correctly
789789
i1 = Index([1, 2], name='i1')
790790
i2 = Index([5, 6], name='i2')
791791
i3 = Index([3, 4], name='i3')
792-
union = i1.union(i2.union(i3, sort=sort), sort=sort)
793-
expected = i1.union(i2, sort=sort).union(i3, sort=sort)
792+
793+
warning = FutureWarning if sort is None else None
794+
with tm.assert_produces_warning(warning):
795+
union = i1.union(i2.union(i3, sort=sort), sort=sort)
796+
expected = i1.union(i2, sort=sort).union(i3, sort=sort)
794797
tm.assert_index_equal(union, expected)
795798

796799
j1 = Index([1, 2], name='j1')
797800
j2 = Index([], name='j2')
798801
j3 = Index([], name='j3')
799-
union = j1.union(j2.union(j3, sort=sort), sort=sort)
800-
expected = j1.union(j2, sort=sort).union(j3, sort=sort)
802+
with tm.assert_produces_warning(warning):
803+
union = j1.union(j2.union(j3, sort=sort), sort=sort)
804+
expected = j1.union(j2, sort=sort).union(j3, sort=sort)
801805
tm.assert_index_equal(union, expected)
802806

803-
@pytest.mark.parametrize("sort", [None, False])
807+
@pytest.mark.parametrize("sort", [None, True, False])
804808
def test_union(self, sort):
805809
# TODO: Replace with fixturesult
806810
first = self.strIndex[5:20]
807811
second = self.strIndex[:10]
808812
everything = self.strIndex[:20]
809813

810-
union = first.union(second, sort=sort)
811-
if sort is None:
814+
warning = FutureWarning if sort is None else None
815+
with tm.assert_produces_warning(warning):
816+
union = first.union(second, sort=sort)
817+
if sort is not False:
812818
tm.assert_index_equal(union, everything.sort_values())
813819
assert tm.equalContents(union, everything)
814820

@@ -819,21 +825,14 @@ def test_union_sort_other_special(self, slice_):
819825
idx = pd.Index([1, 0, 2])
820826
# default, sort=None
821827
other = idx[slice_]
822-
tm.assert_index_equal(idx.union(other), idx)
823-
tm.assert_index_equal(other.union(idx), idx)
828+
with tm.assert_produces_warning(FutureWarning):
829+
tm.assert_index_equal(idx.union(other), idx)
830+
tm.assert_index_equal(other.union(idx), idx)
824831

825832
# sort=False
826833
tm.assert_index_equal(idx.union(other, sort=False), idx)
827834

828-
@pytest.mark.xfail(reason="Not implemented")
829-
@pytest.mark.parametrize('slice_', [slice(None), slice(0)])
830-
def test_union_sort_special_true(self, slice_):
831-
# TODO decide on True behaviour
832835
# sort=True
833-
idx = pd.Index([1, 0, 2])
834-
# default, sort=None
835-
other = idx[slice_]
836-
837836
result = idx.union(other, sort=True)
838837
expected = pd.Index([0, 1, 2])
839838
tm.assert_index_equal(result, expected)
@@ -842,31 +841,29 @@ def test_union_sort_other_incomparable(self):
842841
# https://github.com/pandas-dev/pandas/issues/24959
843842
idx = pd.Index([1, pd.Timestamp('2000')])
844843
# default (sort=None)
845-
with tm.assert_produces_warning(RuntimeWarning):
844+
with tm.assert_produces_warning(RuntimeWarning,
845+
raise_on_extra_warnings=False):
846846
result = idx.union(idx[:1])
847-
848847
tm.assert_index_equal(result, idx)
849848

850849
# sort=None
851-
with tm.assert_produces_warning(RuntimeWarning):
850+
with tm.assert_produces_warning(RuntimeWarning,
851+
raise_on_extra_warnings=False):
852852
result = idx.union(idx[:1], sort=None)
853853
tm.assert_index_equal(result, idx)
854854

855+
# sort=True
856+
with tm.assert_produces_warning(RuntimeWarning):
857+
result = idx.union(idx[:1], sort=True)
858+
tm.assert_index_equal(result, idx)
859+
855860
# sort=False
856861
result = idx.union(idx[:1], sort=False)
857862
tm.assert_index_equal(result, idx)
858863

859-
@pytest.mark.xfail(reason="Not implemented")
860-
def test_union_sort_other_incomparable_true(self):
861-
# TODO decide on True behaviour
862-
# sort=True
863-
idx = pd.Index([1, pd.Timestamp('2000')])
864-
with pytest.raises(TypeError, match='.*'):
865-
idx.union(idx[:1], sort=True)
866-
867864
@pytest.mark.parametrize("klass", [
868865
np.array, Series, list])
869-
@pytest.mark.parametrize("sort", [None, False])
866+
@pytest.mark.parametrize("sort", [None, True, False])
870867
def test_union_from_iterables(self, klass, sort):
871868
# GH 10149
872869
# TODO: Replace with fixturesult
@@ -875,53 +872,71 @@ def test_union_from_iterables(self, klass, sort):
875872
everything = self.strIndex[:20]
876873

877874
case = klass(second.values)
878-
result = first.union(case, sort=sort)
879-
if sort is None:
875+
876+
warning = FutureWarning if sort is None else None
877+
with tm.assert_produces_warning(warning):
878+
result = first.union(case, sort=sort)
879+
880+
if sort is not False:
880881
tm.assert_index_equal(result, everything.sort_values())
881882
assert tm.equalContents(result, everything)
882883

883-
@pytest.mark.parametrize("sort", [None, False])
884+
@pytest.mark.parametrize("sort", [None, True, False])
884885
def test_union_identity(self, sort):
885886
# TODO: replace with fixturesult
886887
first = self.strIndex[5:20]
887888

888-
union = first.union(first, sort=sort)
889+
warning = FutureWarning if sort is None else None
890+
with tm.assert_produces_warning(warning):
891+
union = first.union(first, sort=sort)
892+
889893
# i.e. identity is not preserved when sort is True
890894
assert (union is first) is (not sort)
891895

892896
# This should no longer be the same object, since [] is not consistent,
893897
# both objects will be recast to dtype('O')
894898
union = first.union([], sort=sort)
899+
with tm.assert_produces_warning(warning):
900+
union = first.union([], sort=sort)
895901
assert (union is first) is (not sort)
896902

897-
union = Index([]).union(first, sort=sort)
903+
with tm.assert_produces_warning(warning):
904+
union = Index([]).union(first, sort=sort)
898905
assert (union is first) is (not sort)
899906

900907
@pytest.mark.parametrize("first_list", [list('ba'), list()])
901908
@pytest.mark.parametrize("second_list", [list('ab'), list()])
902909
@pytest.mark.parametrize("first_name, second_name, expected_name", [
903910
('A', 'B', None), (None, 'B', None), ('A', None, None)])
904-
@pytest.mark.parametrize("sort", [None, False])
911+
@pytest.mark.parametrize("sort", [None, True, False])
905912
def test_union_name_preservation(self, first_list, second_list, first_name,
906913
second_name, expected_name, sort):
907914
first = Index(first_list, name=first_name)
908915
second = Index(second_list, name=second_name)
909-
union = first.union(second, sort=sort)
916+
917+
warning = FutureWarning if sort is None else None
918+
with tm.assert_produces_warning(warning):
919+
union = first.union(second, sort=sort)
910920

911921
vals = set(first_list).union(second_list)
912922

913923
if sort is None and len(first_list) > 0 and len(second_list) > 0:
914924
expected = Index(sorted(vals), name=expected_name)
915925
tm.assert_index_equal(union, expected)
926+
elif sort:
927+
expected = Index(sorted(vals), name=expected_name)
928+
tm.assert_index_equal(union, expected)
916929
else:
917930
expected = Index(vals, name=expected_name)
918931
assert tm.equalContents(union, expected)
919932

920-
@pytest.mark.parametrize("sort", [None, False])
933+
@pytest.mark.parametrize("sort", [None, True, False])
921934
def test_union_dt_as_obj(self, sort):
922935
# TODO: Replace with fixturesult
923-
firstCat = self.strIndex.union(self.dateIndex)
924-
secondCat = self.strIndex.union(self.strIndex)
936+
warning = FutureWarning if sort is None else None
937+
with tm.assert_produces_warning(warning, check_stacklevel=False):
938+
firstCat = self.strIndex.union(self.dateIndex, sort=sort)
939+
secondCat = self.strIndex.union(self.strIndex, sort=sort)
925940

926941
if self.dateIndex.dtype == np.object_:
927942
appended = np.append(self.strIndex, self.dateIndex)
@@ -934,15 +949,6 @@ def test_union_dt_as_obj(self, sort):
934949
tm.assert_contains_all(self.strIndex, secondCat)
935950
tm.assert_contains_all(self.dateIndex, firstCat)
936951

937-
@pytest.mark.parametrize("method", ['union', 'intersection', 'difference',
938-
'symmetric_difference'])
939-
def test_setops_disallow_true(self, method):
940-
idx1 = pd.Index(['a', 'b'])
941-
idx2 = pd.Index(['b', 'c'])
942-
943-
with pytest.raises(ValueError, match="The 'sort' keyword only takes"):
944-
getattr(idx1, method)(idx2, sort=True)
945-
946952
def test_map_identity_mapping(self):
947953
# GH 12766
948954
# TODO: replace with fixture
@@ -1709,7 +1715,9 @@ def test_tuple_union_bug(self, method, expected, sort):
17091715
(2, 'B'), (1, 'C'), (2, 'C')],
17101716
dtype=[('num', int), ('let', 'a1')]))
17111717

1712-
result = getattr(index1, method)(index2, sort=sort)
1718+
warning = FutureWarning if method == 'union' else None
1719+
with tm.assert_produces_warning(warning):
1720+
result = getattr(index1, method)(index2, sort=sort)
17131721
assert result.ndim == 1
17141722

17151723
expected = Index(expected)
@@ -1919,12 +1927,14 @@ def test_outer_join_sort(self):
19191927
left_index = Index(np.random.permutation(15))
19201928
right_index = tm.makeDateIndex(10)
19211929

1922-
with tm.assert_produces_warning(RuntimeWarning):
1930+
with tm.assert_produces_warning(RuntimeWarning,
1931+
raise_on_extra_warnings=False):
19231932
result = left_index.join(right_index, how='outer')
19241933

19251934
# right_index in this case because DatetimeIndex has join precedence
19261935
# over Int64Index
1927-
with tm.assert_produces_warning(RuntimeWarning):
1936+
with tm.assert_produces_warning(RuntimeWarning,
1937+
raise_on_extra_warnings=False):
19281938
expected = right_index.astype(object).union(
19291939
left_index.astype(object))
19301940

@@ -2235,7 +2245,8 @@ def test_union_base(self):
22352245
first = index[3:]
22362246
second = index[:5]
22372247

2238-
result = first.union(second)
2248+
with tm.assert_produces_warning(FutureWarning):
2249+
result = first.union(second)
22392250

22402251
expected = Index([0, 1, 2, 'a', 'b', 'c'])
22412252
tm.assert_index_equal(result, expected)
@@ -2248,7 +2259,8 @@ def test_union_different_type_base(self, klass):
22482259
first = index[3:]
22492260
second = index[:5]
22502261

2251-
result = first.union(klass(second.values))
2262+
with tm.assert_produces_warning(FutureWarning):
2263+
result = first.union(klass(second.values))
22522264

22532265
assert tm.equalContents(result, index)
22542266

0 commit comments

Comments
 (0)