Skip to content

Commit 565f982

Browse files
committed
BUG: lexsort_depth now return sortorder if sortorder is not set to None
This fix issue pandas-dev#28518, where the label of the merge index where invalid due to inconsistent lexsort_depth property of the intersection of the indexes
1 parent db15036 commit 565f982

File tree

3 files changed

+36
-28
lines changed

3 files changed

+36
-28
lines changed

pandas/core/indexes/multi.py

+13-5
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,15 @@ def _verify_integrity(self, codes=None, levels=None):
365365
"Level values must be unique: {values} on "
366366
"level {level}".format(values=[value for value in level], level=i)
367367
)
368+
if self.sortorder is not None:
369+
if int(self.sortorder) > self._lexsort_depth():
370+
raise ValueError(
371+
"Value for sortorder must be inferior or equal "
372+
"to actual lexsort_depth: "
373+
"sortorder {sortorder} with lexsort_depth {lexsort_depth}".format(
374+
sortorder=self.sortorder, lexsort_depth=self._lexsort_depth()
375+
)
376+
)
368377

369378
codes = [
370379
self._validate_codes(level, code) for level, code in zip(levels, codes)
@@ -1783,16 +1792,15 @@ def is_lexsorted(self):
17831792
@cache_readonly
17841793
def lexsort_depth(self):
17851794
if self.sortorder is not None:
1786-
if self.sortorder == 0:
1787-
return self.nlevels
1788-
else:
1789-
return 0
1795+
return int(self.sortorder)
1796+
1797+
return self._lexsort_depth()
17901798

1799+
def _lexsort_depth(self):
17911800
int64_codes = [ensure_int64(level_codes) for level_codes in self.codes]
17921801
for k in range(self.nlevels, 0, -1):
17931802
if libalgos.is_lexsorted(int64_codes[:k]):
17941803
return k
1795-
17961804
return 0
17971805

17981806
def _sort_levels_monotonic(self):

pandas/tests/reshape/merge/test_merge.py

+9-8
Original file line numberDiff line numberDiff line change
@@ -2095,13 +2095,14 @@ def test_merge_equal_cat_dtypes2():
20952095
# Categorical is unordered, so don't check ordering.
20962096
tm.assert_frame_equal(result, expected, check_categorical=False)
20972097

2098+
20982099
def test_merge_multiindex_columns():
20992100
# Issue #28518
21002101
# Verify that merging two dataframes give the expected labels
21012102
# The original cause of this issue come from a bug lexsort_depth and is tested in
21022103
# test_lexsort_depth
2103-
2104-
index_tuples=[]
2104+
2105+
index_tuples = []
21052106
letters = ["a", "b", "c", "d"]
21062107
numbers = ["1", "2", "3"]
21072108

@@ -2111,14 +2112,14 @@ def test_merge_multiindex_columns():
21112112

21122113
index = pd.MultiIndex.from_tuples(index_tuples, names=["outer", "inner"])
21132114

2114-
frame_x = pd.DataFrame(columns = index)
2115-
frame_x["id"]=""
2115+
frame_x = pd.DataFrame(columns=index)
2116+
frame_x["id"] = ""
21162117

2117-
frame_y = pd.DataFrame(columns = index)
2118-
frame_y["id"]=""
2118+
frame_y = pd.DataFrame(columns=index)
2119+
frame_y["id"] = ""
21192120

2120-
l_suf = '_x'
2121-
r_suf = '_y'
2121+
l_suf = "_x"
2122+
r_suf = "_y"
21222123
expected_labels = sum(([l + l_suf, l + r_suf] for l in letters), [])
21232124
merged_frame = frame_x.merge(frame_y, on="id", suffixes=((l_suf, r_suf))).columns
21242125
for label in expected_labels:

pandas/tests/test_multilevel.py

+14-15
Original file line numberDiff line numberDiff line change
@@ -2063,44 +2063,43 @@ def test_raise_invalid_sortorder(self):
20632063
levels = [[0, 1], [0, 1, 2]]
20642064

20652065
# Correct sortorder
2066-
index = MultiIndex(
2067-
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]],
2068-
sortorder=2,
2066+
MultiIndex(
2067+
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2
20692068
)
20702069

20712070
with pytest.raises(ValueError, match=r".* sortorder 2 with lexsort_depth 1.*"):
2072-
index = MultiIndex(
2073-
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]],
2071+
MultiIndex(
2072+
levels=levels,
2073+
codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]],
20742074
sortorder=2,
20752075
)
20762076

20772077
with pytest.raises(ValueError, match=r".* sortorder 1 with lexsort_depth 0.*"):
2078-
index = MultiIndex(
2079-
levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]],
2078+
MultiIndex(
2079+
levels=levels,
2080+
codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]],
20802081
sortorder=1,
2081-
)
2082+
)
20822083

20832084
def test_lexsort_depth(self):
2084-
# Test that lexsort_depth return the correct sortorder when it was given to the MultiIndex const.
2085+
# Test that lexsort_depth return the correct sortorder
2086+
# when it was given to the MultiIndex const.
20852087
# Issue #28518
20862088

20872089
levels = [[0, 1], [0, 1, 2]]
20882090

20892091
index = MultiIndex(
2090-
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]],
2091-
sortorder=2,
2092+
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2
20922093
)
20932094
assert index.lexsort_depth == 2
20942095

20952096
index = MultiIndex(
2096-
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]],
2097-
sortorder=1,
2097+
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=1
20982098
)
20992099
assert index.lexsort_depth == 1
21002100

21012101
index = MultiIndex(
2102-
levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]],
2103-
sortorder=0,
2102+
levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=0
21042103
)
21052104
assert index.lexsort_depth == 0
21062105

0 commit comments

Comments
 (0)