Skip to content

Commit 92570aa

Browse files
authored
Revert "BUG: DataFrame.stack with sort=True and unsorted MultiIndex levels (#53637)"
This reverts commit 5edc2cc.
1 parent d36da2b commit 92570aa

File tree

3 files changed

+9
-41
lines changed

3 files changed

+9
-41
lines changed

doc/source/whatsnew/v2.1.0.rst

-1
Original file line numberDiff line numberDiff line change
@@ -491,7 +491,6 @@ Reshaping
491491
- Bug in :meth:`DataFrame.idxmin` and :meth:`DataFrame.idxmax`, where the axis dtype would be lost for empty frames (:issue:`53265`)
492492
- Bug in :meth:`DataFrame.merge` not merging correctly when having ``MultiIndex`` with single level (:issue:`52331`)
493493
- Bug in :meth:`DataFrame.stack` losing extension dtypes when columns is a :class:`MultiIndex` and frame contains mixed dtypes (:issue:`45740`)
494-
- Bug in :meth:`DataFrame.stack` would incorrectly order results when ``sort=True`` and the input had :class:`MultiIndex` levels that were not sorted (:issue:`53636`)
495494
- Bug in :meth:`DataFrame.transpose` inferring dtype for object column (:issue:`51546`)
496495
- Bug in :meth:`Series.combine_first` converting ``int64`` dtype to ``float64`` and losing precision on very large integers (:issue:`51764`)
497496
-

pandas/core/reshape/reshape.py

+1-10
Original file line numberDiff line numberDiff line change
@@ -756,16 +756,7 @@ def _convert_level_number(level_num: int, columns: Index):
756756
level_vals = mi_cols.levels[-1]
757757
level_codes = unique(mi_cols.codes[-1])
758758
if sort:
759-
_, index, inverse = np.unique(
760-
level_vals, return_index=True, return_inverse=True
761-
)
762-
sorted_level_vals = np.take(level_vals, index)
763759
level_codes = np.sort(level_codes)
764-
# Take level_codes according to where level_vals get sorted to, while
765-
# also allowing for NA (-1) values
766-
level_codes = np.where(level_codes == -1, -1, np.take(inverse, level_codes))
767-
else:
768-
sorted_level_vals = level_vals
769760
level_vals_nan = level_vals.insert(len(level_vals), None)
770761

771762
level_vals_used = np.take(level_vals_nan, level_codes)
@@ -827,7 +818,7 @@ def _convert_level_number(level_num: int, columns: Index):
827818
new_codes = [old_codes.repeat(levsize)]
828819
new_names = [this.index.name] # something better?
829820

830-
new_levels.append(sorted_level_vals)
821+
new_levels.append(level_vals)
831822
new_codes.append(np.tile(level_codes, N))
832823
new_names.append(frame.columns.names[level_num])
833824

pandas/tests/frame/test_stack_unstack.py

+8-30
Original file line numberDiff line numberDiff line change
@@ -2000,20 +2000,18 @@ def __init__(self, *args, **kwargs) -> None:
20002000
),
20012001
)
20022002
@pytest.mark.parametrize("stack_lev", range(2))
2003-
@pytest.mark.parametrize("sort", [True, False])
2004-
def test_stack_order_with_unsorted_levels(self, levels, stack_lev, sort):
2003+
def test_stack_order_with_unsorted_levels(self, levels, stack_lev):
20052004
# GH#16323
20062005
# deep check for 1-row case
20072006
columns = MultiIndex(levels=levels, codes=[[0, 0, 1, 1], [0, 1, 0, 1]])
20082007
df = DataFrame(columns=columns, data=[range(4)])
2009-
df_stacked = df.stack(stack_lev, sort=sort)
2010-
for row in df.index:
2011-
for col in df.columns:
2012-
expected = df.loc[row, col]
2013-
result_row = row, col[stack_lev]
2014-
result_col = col[1 - stack_lev]
2015-
result = df_stacked.loc[result_row, result_col]
2016-
assert result == expected
2008+
df_stacked = df.stack(stack_lev)
2009+
assert all(
2010+
df.loc[row, col]
2011+
== df_stacked.loc[(row, col[stack_lev]), col[1 - stack_lev]]
2012+
for row in df.index
2013+
for col in df.columns
2014+
)
20172015

20182016
def test_stack_order_with_unsorted_levels_multi_row(self):
20192017
# GH#16323
@@ -2032,26 +2030,6 @@ def test_stack_order_with_unsorted_levels_multi_row(self):
20322030
for col in df.columns
20332031
)
20342032

2035-
def test_stack_order_with_unsorted_levels_multi_row_2(self):
2036-
# GH#53636
2037-
levels = ((0, 1), (1, 0))
2038-
stack_lev = 1
2039-
columns = MultiIndex(levels=levels, codes=[[0, 0, 1, 1], [0, 1, 0, 1]])
2040-
df = DataFrame(columns=columns, data=[range(4)], index=[1, 0, 2, 3])
2041-
result = df.stack(stack_lev, sort=True)
2042-
expected_index = MultiIndex(
2043-
levels=[[0, 1, 2, 3], [0, 1]],
2044-
codes=[[1, 1, 0, 0, 2, 2, 3, 3], [1, 0, 1, 0, 1, 0, 1, 0]],
2045-
)
2046-
expected = DataFrame(
2047-
{
2048-
0: [0, 1, 0, 1, 0, 1, 0, 1],
2049-
1: [2, 3, 2, 3, 2, 3, 2, 3],
2050-
},
2051-
index=expected_index,
2052-
)
2053-
tm.assert_frame_equal(result, expected)
2054-
20552033
def test_stack_unstack_unordered_multiindex(self):
20562034
# GH# 18265
20572035
values = np.arange(5)

0 commit comments

Comments
 (0)