-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: fix union_indexes not supporting sort=False for Index subclasses #35098
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 16 commits
148a589
8a51ded
371fd6e
e5f1e68
9701bb5
3208fbe
a15cef4
933956c
4ecd83e
896d62a
950b279
4b2076d
6f2f3c9
0a6c62e
9ca48f7
f41b7e7
2fe2c9d
cefd5c9
605ac21
18abbae
81116d4
a7ec4bf
e85529d
683f615
c76859e
11773a5
836fc1a
b9d5ab4
ee7048e
1f67a3c
c722154
ab291a7
93a4ccc
93452c1
fb3a906
0479618
0f96dbe
ada7346
c3cbecc
7cc4d9d
4c1cc42
cc0d167
54140af
1c371bd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -206,6 +206,17 @@ def conv(i): | |
|
||
return Index(lib.fast_unique_multiple_list([conv(i) for i in inds], sort=sort)) | ||
|
||
# GH 35092. Detect if we have an Index type, for which the sort | ||
# setting doesn't make sense | ||
ind_types = list({type(index) for index in indexes}) | ||
gfyoung marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if any( | ||
ind_type in [MultiIndex, RangeIndex, DatetimeIndex, CategoricalIndex] | ||
gfyoung marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @gfyoung Turns out that This is the last example of each of these types of indices breaking sorting expectations somewhere. |
||
for ind_type in ind_types | ||
): | ||
ignore_sort = True | ||
else: | ||
ignore_sort = False | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we let any of these index type not get sorted, we'll break some tests where we assume that the results of joining on any of these come out sorted. Moreover, there isn't much point to having any of them unsorted except for pretty printing purposes, in my opinion. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you show what tests break? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i would rather just pass thru and adjust the tests There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
|
||
if kind == "special": | ||
result = indexes[0] | ||
|
||
|
@@ -214,7 +225,11 @@ def conv(i): | |
return result.union_many(indexes[1:]) | ||
else: | ||
for other in indexes[1:]: | ||
result = result.union(other) | ||
# GH 35092. Pass sort to Index.union | ||
# Index.union expects sort=None instead of sort=True | ||
if sort: | ||
sort = None | ||
result = result.union(other, sort=sort) | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return result | ||
elif kind == "array": | ||
index = indexes[0] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,8 +13,9 @@ | |
from pandas.core.dtypes.common import is_period_dtype, needs_i8_conversion | ||
|
||
import pandas as pd | ||
from pandas import CategoricalIndex, MultiIndex, RangeIndex | ||
from pandas import CategoricalIndex, Index, MultiIndex, RangeIndex | ||
import pandas._testing as tm | ||
from pandas.core.indexes.api import union_indexes | ||
|
||
|
||
class TestCommon: | ||
|
@@ -395,3 +396,15 @@ def test_astype_preserves_name(self, index, dtype, copy): | |
assert result.names == index.names | ||
else: | ||
assert result.name == index.name | ||
|
||
|
||
@pytest.mark.parametrize("exp_arr, sort", [([0, 1, 4, 3], False), ([0, 1, 3, 4], True)]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you use the index fixture There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. Using the |
||
@pytest.mark.parametrize("dtype", ["int8", "int16", "int32", "int64"]) | ||
def test_union_index_no_sort(exp_arr, sort, dtype): | ||
# GH 35092. Check that we don't sort with sort=False | ||
ind1 = Index([0, 1], dtype=dtype) | ||
ind2 = Index([4, 3], dtype=dtype) | ||
|
||
expected = Index(exp_arr, dtype=dtype) | ||
result = union_indexes([ind1, ind2], sort=sort) | ||
tm.assert_index_equal(result, expected) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you don't need the first part (as this is a private function); move to reshaping section of Bug Fixes