Skip to content

Commit d0f2c75

Browse files
nrebenaproost
authored andcommitted
Issue 28518 multiindex interesection (pandas-dev#28735)
1 parent 94d73e4 commit d0f2c75

File tree

5 files changed

+100
-6
lines changed

5 files changed

+100
-6
lines changed

doc/source/whatsnew/v1.0.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,7 @@ Missing
251251
MultiIndex
252252
^^^^^^^^^^
253253

254+
- Constructior for :class:`MultiIndex` verifies that the given ``sortorder`` is compatible with the actual ``lexsort_depth`` if ``verify_integrity`` parameter is ``True`` (the default) (:issue:`28735`)
254255
-
255256
-
256257

@@ -292,6 +293,7 @@ Reshaping
292293
- Bug in :meth:`DataFrame.apply` that caused incorrect output with empty :class:`DataFrame` (:issue:`28202`, :issue:`21959`)
293294
- Bug in :meth:`DataFrame.stack` not handling non-unique indexes correctly when creating MultiIndex (:issue: `28301`)
294295
- Bug :func:`merge_asof` could not use :class:`datetime.timedelta` for ``tolerance`` kwarg (:issue:`28098`)
296+
- Bug in :func:`merge`, did not append suffixes correctly with MultiIndex (:issue:`28518`)
295297

296298
Sparse
297299
^^^^^^

pandas/core/indexes/multi.py

+21-5
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,15 @@ def _verify_integrity(self, codes=None, levels=None):
365365
"Level values must be unique: {values} on "
366366
"level {level}".format(values=[value for value in level], level=i)
367367
)
368+
if self.sortorder is not None:
369+
if self.sortorder > self._lexsort_depth():
370+
raise ValueError(
371+
"Value for sortorder must be inferior or equal "
372+
"to actual lexsort_depth: "
373+
"sortorder {sortorder} with lexsort_depth {lexsort_depth}".format(
374+
sortorder=self.sortorder, lexsort_depth=self._lexsort_depth()
375+
)
376+
)
368377

369378
codes = [
370379
self._validate_codes(level, code) for level, code in zip(levels, codes)
@@ -1788,16 +1797,23 @@ def is_lexsorted(self):
17881797
@cache_readonly
17891798
def lexsort_depth(self):
17901799
if self.sortorder is not None:
1791-
if self.sortorder == 0:
1792-
return self.nlevels
1793-
else:
1794-
return 0
1800+
return self.sortorder
1801+
1802+
return self._lexsort_depth()
17951803

1804+
def _lexsort_depth(self) -> int:
1805+
"""
1806+
Compute and return the lexsort_depth, the number of levels of the
1807+
MultiIndex that are sorted lexically
1808+
1809+
Returns
1810+
------
1811+
int
1812+
"""
17961813
int64_codes = [ensure_int64(level_codes) for level_codes in self.codes]
17971814
for k in range(self.nlevels, 0, -1):
17981815
if libalgos.is_lexsorted(int64_codes[:k]):
17991816
return k
1800-
18011817
return 0
18021818

18031819
def _sort_levels_monotonic(self):

pandas/tests/indexes/multi/test_indexing.py

-1
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,6 @@ def test_slice_locs_not_contained():
9696
index = MultiIndex(
9797
levels=[[0, 2, 4, 6], [0, 2, 4]],
9898
codes=[[0, 0, 0, 1, 1, 2, 3, 3, 3], [0, 1, 2, 1, 2, 2, 0, 1, 2]],
99-
sortorder=0,
10099
)
101100

102101
result = index.slice_locs((1, 0), (5, 2))

pandas/tests/reshape/merge/test_merge.py

+30
Original file line numberDiff line numberDiff line change
@@ -2094,3 +2094,33 @@ def test_merge_equal_cat_dtypes2():
20942094

20952095
# Categorical is unordered, so don't check ordering.
20962096
tm.assert_frame_equal(result, expected, check_categorical=False)
2097+
2098+
2099+
def test_merge_multiindex_columns():
2100+
# Issue #28518
2101+
# Verify that merging two dataframes give the expected labels
2102+
# The original cause of this issue come from a bug lexsort_depth and is tested in
2103+
# test_lexsort_depth
2104+
2105+
letters = ["a", "b", "c", "d"]
2106+
numbers = ["1", "2", "3"]
2107+
index = pd.MultiIndex.from_product((letters, numbers), names=["outer", "inner"])
2108+
2109+
frame_x = pd.DataFrame(columns=index)
2110+
frame_x["id"] = ""
2111+
frame_y = pd.DataFrame(columns=index)
2112+
frame_y["id"] = ""
2113+
2114+
l_suf = "_x"
2115+
r_suf = "_y"
2116+
result = frame_x.merge(frame_y, on="id", suffixes=((l_suf, r_suf)))
2117+
2118+
# Constructing the expected results
2119+
expected_labels = [l + l_suf for l in letters] + [l + r_suf for l in letters]
2120+
expected_index = pd.MultiIndex.from_product(
2121+
[expected_labels, numbers], names=["outer", "inner"]
2122+
)
2123+
expected = pd.DataFrame(columns=expected_index)
2124+
expected["id"] = ""
2125+
2126+
tm.assert_frame_equal(result, expected)

pandas/tests/test_multilevel.py

+47
Original file line numberDiff line numberDiff line change
@@ -2056,6 +2056,53 @@ def test_is_lexsorted(self):
20562056
assert not index.is_lexsorted()
20572057
assert index.lexsort_depth == 0
20582058

2059+
def test_raise_invalid_sortorder(self):
2060+
# Test that the MultiIndex constructor raise when a incorrect sortorder is given
2061+
# Issue #28518
2062+
2063+
levels = [[0, 1], [0, 1, 2]]
2064+
2065+
# Correct sortorder
2066+
MultiIndex(
2067+
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2
2068+
)
2069+
2070+
with pytest.raises(ValueError, match=r".* sortorder 2 with lexsort_depth 1.*"):
2071+
MultiIndex(
2072+
levels=levels,
2073+
codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]],
2074+
sortorder=2,
2075+
)
2076+
2077+
with pytest.raises(ValueError, match=r".* sortorder 1 with lexsort_depth 0.*"):
2078+
MultiIndex(
2079+
levels=levels,
2080+
codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]],
2081+
sortorder=1,
2082+
)
2083+
2084+
def test_lexsort_depth(self):
2085+
# Test that lexsort_depth return the correct sortorder
2086+
# when it was given to the MultiIndex const.
2087+
# Issue #28518
2088+
2089+
levels = [[0, 1], [0, 1, 2]]
2090+
2091+
index = MultiIndex(
2092+
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2
2093+
)
2094+
assert index.lexsort_depth == 2
2095+
2096+
index = MultiIndex(
2097+
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=1
2098+
)
2099+
assert index.lexsort_depth == 1
2100+
2101+
index = MultiIndex(
2102+
levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=0
2103+
)
2104+
assert index.lexsort_depth == 0
2105+
20592106
def test_sort_index_and_reconstruction(self):
20602107

20612108
# 15622

0 commit comments

Comments
 (0)