Skip to content

DEPR: Deprecate sort=None for union and implement sort=True #25980

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 25 additions & 6 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2301,6 +2301,7 @@ def _union_incompatible_dtypes(self, other, sort):
-------
Index
"""

this = self.astype(object, copy=False)
# cast to Index for when `other` is list-like
other = Index(other).astype(object, copy=False)
Expand All @@ -2324,9 +2325,10 @@ def _is_compatible_with_other(self, other):
and is_dtype_equal(self.dtype, other.dtype))

def _validate_sort_keyword(self, sort):
if sort not in [None, False]:
if sort not in [None, True, False]:
raise ValueError("The 'sort' keyword only takes the values of "
"None or False; {0} was passed.".format(sort))
"None, True or False; {0} was "
"passed.".format(sort))

def union(self, other, sort=None):
"""
Expand All @@ -2350,6 +2352,12 @@ def union(self, other, sort=None):
3. Some values in `self` or `other` cannot be compared.
A RuntimeWarning is issued in this case.

.. deprecated:: 0.25.0

* True : Sort the result, except when some values in `self`
or `other` cannot be compared. A RuntimeWarning is issued
in this case

* False : do not sort the result.

.. versionadded:: 0.24.0
Expand Down Expand Up @@ -2408,11 +2416,22 @@ def _union(self, other, sort):
Index
"""

if sort is None:
warnings.warn("sort='None' is deprecated, and will be "
"removed in a future version.",
FutureWarning, stacklevel=3)

if not len(other) or self.equals(other):
return self._get_reconciled_name_object(other)
res = self._get_reconciled_name_object(other)
if sort:
res = res.sort_values()
return res

if not len(self):
return other._get_reconciled_name_object(self)
res = other._get_reconciled_name_object(self)
if sort:
res = res.sort_values()
return res

# TODO(EA): setops-refactor, clean all this up
if is_period_dtype(self) or is_datetime64tz_dtype(self):
Expand All @@ -2424,7 +2443,7 @@ def _union(self, other, sort):
else:
rvals = other._values

if sort is None and self.is_monotonic and other.is_monotonic:
if sort is not False and self.is_monotonic and other.is_monotonic:
try:
result = self._outer_indexer(lvals, rvals)[0]
except TypeError:
Expand All @@ -2446,7 +2465,7 @@ def _union(self, other, sort):
else:
result = lvals

if sort is None:
if sort is not False:
try:
result = sorting.safe_sort(result)
except TypeError as e:
Expand Down
10 changes: 0 additions & 10 deletions pandas/tests/indexes/multi/test_set_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,13 +358,3 @@ def test_union_sort_other_incomparable_sort():
idx = pd.MultiIndex.from_product([[1, pd.Timestamp('2000')], ['a', 'b']])
with pytest.raises(TypeError, match='Cannot compare'):
idx.union(idx[:1], sort=True)


@pytest.mark.parametrize("method", ['union', 'intersection', 'difference',
'symmetric_difference'])
def test_setops_disallow_true(method):
idx1 = pd.MultiIndex.from_product([['a', 'b'], [1, 2]])
idx2 = pd.MultiIndex.from_product([['b', 'c'], [1, 2]])

with pytest.raises(ValueError, match="The 'sort' keyword only takes"):
getattr(idx1, method)(idx2, sort=True)
121 changes: 66 additions & 55 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -783,32 +783,38 @@ def test_intersection_equal_sort_true(self):
sorted_ = pd.Index(['a', 'b', 'c'])
tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_)

@pytest.mark.parametrize("sort", [None, False])
@pytest.mark.parametrize("sort", [None, True, False])
def test_chained_union(self, sort):
# Chained unions handles names correctly
i1 = Index([1, 2], name='i1')
i2 = Index([5, 6], name='i2')
i3 = Index([3, 4], name='i3')
union = i1.union(i2.union(i3, sort=sort), sort=sort)
expected = i1.union(i2, sort=sort).union(i3, sort=sort)

warning = FutureWarning if sort is None else None
with tm.assert_produces_warning(warning):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would just be easier to move the warnings to a separate test since this will eventually be ripped out anyway

union = i1.union(i2.union(i3, sort=sort), sort=sort)
expected = i1.union(i2, sort=sort).union(i3, sort=sort)
tm.assert_index_equal(union, expected)

j1 = Index([1, 2], name='j1')
j2 = Index([], name='j2')
j3 = Index([], name='j3')
union = j1.union(j2.union(j3, sort=sort), sort=sort)
expected = j1.union(j2, sort=sort).union(j3, sort=sort)
with tm.assert_produces_warning(warning, check_stacklevel=False):
union = j1.union(j2.union(j3, sort=sort), sort=sort)
expected = j1.union(j2, sort=sort).union(j3, sort=sort)
tm.assert_index_equal(union, expected)

@pytest.mark.parametrize("sort", [None, False])
@pytest.mark.parametrize("sort", [None, True, False])
def test_union(self, sort):
# TODO: Replace with fixturesult
first = self.strIndex[5:20]
second = self.strIndex[:10]
everything = self.strIndex[:20]

union = first.union(second, sort=sort)
if sort is None:
warning = FutureWarning if sort is None else None
with tm.assert_produces_warning(warning):
union = first.union(second, sort=sort)
if sort is not False:
tm.assert_index_equal(union, everything.sort_values())
assert tm.equalContents(union, everything)

Expand All @@ -819,21 +825,14 @@ def test_union_sort_other_special(self, slice_):
idx = pd.Index([1, 0, 2])
# default, sort=None
other = idx[slice_]
tm.assert_index_equal(idx.union(other), idx)
tm.assert_index_equal(other.union(idx), idx)
with tm.assert_produces_warning(FutureWarning):
tm.assert_index_equal(idx.union(other), idx)
tm.assert_index_equal(other.union(idx), idx)

# sort=False
tm.assert_index_equal(idx.union(other, sort=False), idx)

@pytest.mark.xfail(reason="Not implemented")
@pytest.mark.parametrize('slice_', [slice(None), slice(0)])
def test_union_sort_special_true(self, slice_):
# TODO decide on True behaviour
# sort=True
idx = pd.Index([1, 0, 2])
# default, sort=None
other = idx[slice_]

result = idx.union(other, sort=True)
expected = pd.Index([0, 1, 2])
tm.assert_index_equal(result, expected)
Expand All @@ -842,31 +841,29 @@ def test_union_sort_other_incomparable(self):
# https://github.com/pandas-dev/pandas/issues/24959
idx = pd.Index([1, pd.Timestamp('2000')])
# default (sort=None)
with tm.assert_produces_warning(RuntimeWarning):
with tm.assert_produces_warning(RuntimeWarning,
raise_on_extra_warnings=False):
result = idx.union(idx[:1])

tm.assert_index_equal(result, idx)

# sort=None
with tm.assert_produces_warning(RuntimeWarning):
with tm.assert_produces_warning(RuntimeWarning,
raise_on_extra_warnings=False):
result = idx.union(idx[:1], sort=None)
tm.assert_index_equal(result, idx)

# sort=True
with tm.assert_produces_warning(RuntimeWarning):
result = idx.union(idx[:1], sort=True)
tm.assert_index_equal(result, idx)

# sort=False
result = idx.union(idx[:1], sort=False)
tm.assert_index_equal(result, idx)

@pytest.mark.xfail(reason="Not implemented")
def test_union_sort_other_incomparable_true(self):
# TODO decide on True behaviour
# sort=True
idx = pd.Index([1, pd.Timestamp('2000')])
with pytest.raises(TypeError, match='.*'):
idx.union(idx[:1], sort=True)

@pytest.mark.parametrize("klass", [
np.array, Series, list])
@pytest.mark.parametrize("sort", [None, False])
@pytest.mark.parametrize("sort", [None, True, False])
def test_union_from_iterables(self, klass, sort):
# GH 10149
# TODO: Replace with fixturesult
Expand All @@ -875,53 +872,70 @@ def test_union_from_iterables(self, klass, sort):
everything = self.strIndex[:20]

case = klass(second.values)
result = first.union(case, sort=sort)
if sort is None:

warning = FutureWarning if sort is None else None
with tm.assert_produces_warning(warning, check_stacklevel=False):
result = first.union(case, sort=sort)

if sort is not False:
tm.assert_index_equal(result, everything.sort_values())
assert tm.equalContents(result, everything)

@pytest.mark.parametrize("sort", [None, False])
@pytest.mark.parametrize("sort", [None, True, False])
def test_union_identity(self, sort):
# TODO: replace with fixturesult
first = self.strIndex[5:20]

union = first.union(first, sort=sort)
warning = FutureWarning if sort is None else None
with tm.assert_produces_warning(warning):
union = first.union(first, sort=sort)

# i.e. identity is not preserved when sort is True
assert (union is first) is (not sort)

# This should no longer be the same object, since [] is not consistent,
# both objects will be recast to dtype('O')
union = first.union([], sort=sort)
with tm.assert_produces_warning(warning, check_stacklevel=False):
union = first.union([], sort=sort)
assert (union is first) is (not sort)

union = Index([]).union(first, sort=sort)
with tm.assert_produces_warning(warning):
union = Index([]).union(first, sort=sort)
assert (union is first) is (not sort)

@pytest.mark.parametrize("first_list", [list('ba'), list()])
@pytest.mark.parametrize("second_list", [list('ab'), list()])
@pytest.mark.parametrize("first_name, second_name, expected_name", [
('A', 'B', None), (None, 'B', None), ('A', None, None)])
@pytest.mark.parametrize("sort", [None, False])
@pytest.mark.parametrize("sort", [None, True, False])
def test_union_name_preservation(self, first_list, second_list, first_name,
second_name, expected_name, sort):
first = Index(first_list, name=first_name)
second = Index(second_list, name=second_name)
union = first.union(second, sort=sort)

warning = FutureWarning if sort is None else None
with tm.assert_produces_warning(warning):
union = first.union(second, sort=sort)

vals = set(first_list).union(second_list)

if sort is None and len(first_list) > 0 and len(second_list) > 0:
expected = Index(sorted(vals), name=expected_name)
tm.assert_index_equal(union, expected)
elif sort:
expected = Index(sorted(vals), name=expected_name)
tm.assert_index_equal(union, expected)
else:
expected = Index(vals, name=expected_name)
assert tm.equalContents(union, expected)

@pytest.mark.parametrize("sort", [None, False])
@pytest.mark.parametrize("sort", [None, True, False])
def test_union_dt_as_obj(self, sort):
# TODO: Replace with fixturesult
firstCat = self.strIndex.union(self.dateIndex)
secondCat = self.strIndex.union(self.strIndex)
warning = FutureWarning if sort is None else None
with tm.assert_produces_warning(warning, check_stacklevel=False):
firstCat = self.strIndex.union(self.dateIndex, sort=sort)
secondCat = self.strIndex.union(self.strIndex, sort=sort)

if self.dateIndex.dtype == np.object_:
appended = np.append(self.strIndex, self.dateIndex)
Expand All @@ -934,15 +948,6 @@ def test_union_dt_as_obj(self, sort):
tm.assert_contains_all(self.strIndex, secondCat)
tm.assert_contains_all(self.dateIndex, firstCat)

@pytest.mark.parametrize("method", ['union', 'intersection', 'difference',
'symmetric_difference'])
def test_setops_disallow_true(self, method):
idx1 = pd.Index(['a', 'b'])
idx2 = pd.Index(['b', 'c'])

with pytest.raises(ValueError, match="The 'sort' keyword only takes"):
getattr(idx1, method)(idx2, sort=True)

def test_map_identity_mapping(self):
# GH 12766
# TODO: replace with fixture
Expand Down Expand Up @@ -1712,7 +1717,9 @@ def test_tuple_union_bug(self, method, expected, sort):
(2, 'B'), (1, 'C'), (2, 'C')],
dtype=[('num', int), ('let', 'a1')]))

result = getattr(index1, method)(index2, sort=sort)
warning = FutureWarning if method == 'union' else None
with tm.assert_produces_warning(warning):
result = getattr(index1, method)(index2, sort=sort)
assert result.ndim == 1

expected = Index(expected)
Expand Down Expand Up @@ -1923,12 +1930,14 @@ def test_outer_join_sort(self):
left_index = Index(np.random.permutation(15))
right_index = tm.makeDateIndex(10)

with tm.assert_produces_warning(RuntimeWarning):
with tm.assert_produces_warning(RuntimeWarning,
raise_on_extra_warnings=False):
result = left_index.join(right_index, how='outer')

# right_index in this case because DatetimeIndex has join precedence
# over Int64Index
with tm.assert_produces_warning(RuntimeWarning):
with tm.assert_produces_warning(RuntimeWarning,
raise_on_extra_warnings=False):
expected = right_index.astype(object).union(
left_index.astype(object))

Expand Down Expand Up @@ -2239,7 +2248,8 @@ def test_union_base(self):
first = index[3:]
second = index[:5]

result = first.union(second)
with tm.assert_produces_warning(FutureWarning):
result = first.union(second)

expected = Index([0, 1, 2, 'a', 'b', 'c'])
tm.assert_index_equal(result, expected)
Expand All @@ -2252,7 +2262,8 @@ def test_union_different_type_base(self, klass):
first = index[3:]
second = index[:5]

result = first.union(klass(second.values))
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = first.union(klass(second.values))

assert tm.equalContents(result, index)

Expand Down