Skip to content

Commit bdc5a67

Browse files
authored
BUG: unstack with missing levels results in incorrect index names (#38029)
1 parent 906d3b6 commit bdc5a67

File tree

4 files changed

+39
-1
lines changed

4 files changed

+39
-1
lines changed

doc/source/whatsnew/v1.3.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ Groupby/resample/rolling
290290
Reshaping
291291
^^^^^^^^^
292292

293-
-
293+
- Bug in :meth:`DataFrame.unstack` with missing levels led to incorrect index names (:issue:`37510`)
294294
-
295295

296296
Sparse

pandas/core/indexes/multi.py

+3
Original file line numberDiff line numberDiff line change
@@ -1979,6 +1979,9 @@ def remove_unused_levels(self):
19791979
has_na = int(len(uniques) and (uniques[0] == -1))
19801980

19811981
if len(uniques) != len(lev) + has_na:
1982+
1983+
if lev.isna().any() and len(uniques) == len(lev):
1984+
break
19821985
# We have unused levels
19831986
changed = True
19841987

pandas/tests/frame/test_stack_unstack.py

+24
Original file line numberDiff line numberDiff line change
@@ -1907,3 +1907,27 @@ def test_unstack_with_missing_int_cast_to_float(self):
19071907
),
19081908
)
19091909
tm.assert_frame_equal(result, expected)
1910+
1911+
def test_unstack_with_level_has_nan(self):
1912+
# GH 37510
1913+
df1 = DataFrame(
1914+
{
1915+
"L1": [1, 2, 3, 4],
1916+
"L2": [3, 4, 1, 2],
1917+
"L3": [1, 1, 1, 1],
1918+
"x": [1, 2, 3, 4],
1919+
}
1920+
)
1921+
df1 = df1.set_index(["L1", "L2", "L3"])
1922+
new_levels = ["n1", "n2", "n3", None]
1923+
df1.index = df1.index.set_levels(levels=new_levels, level="L1")
1924+
df1.index = df1.index.set_levels(levels=new_levels, level="L2")
1925+
1926+
result = df1.unstack("L3")[("x", 1)].sort_index().index
1927+
expected = MultiIndex(
1928+
levels=[["n1", "n2", "n3", None], ["n1", "n2", "n3", None]],
1929+
codes=[[0, 1, 2, 3], [2, 3, 0, 1]],
1930+
names=["L1", "L2"],
1931+
)
1932+
1933+
tm.assert_index_equal(result, expected)

pandas/tests/indexes/multi/test_sorting.py

+11
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from pandas import CategoricalIndex, DataFrame, Index, MultiIndex, RangeIndex
99
import pandas._testing as tm
10+
from pandas.core.indexes.frozen import FrozenList
1011

1112

1213
def test_sortlevel(idx):
@@ -271,3 +272,13 @@ def test_argsort(idx):
271272
result = idx.argsort()
272273
expected = idx.values.argsort()
273274
tm.assert_numpy_array_equal(result, expected)
275+
276+
277+
def test_remove_unused_levels_with_nan():
278+
# GH 37510
279+
idx = Index([(1, np.nan), (3, 4)]).rename(["id1", "id2"])
280+
idx = idx.set_levels(["a", np.nan], level="id1")
281+
idx = idx.remove_unused_levels()
282+
result = idx.levels
283+
expected = FrozenList([["a", np.nan], [4]])
284+
assert str(result) == str(expected)

0 commit comments

Comments
 (0)