Skip to content

Revert "BUG: DataFrame.stack with sort=True and unsorted MultiIndex levels" #53760

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,6 @@ Reshaping
- Bug in :meth:`DataFrame.idxmin` and :meth:`DataFrame.idxmax`, where the axis dtype would be lost for empty frames (:issue:`53265`)
- Bug in :meth:`DataFrame.merge` not merging correctly when having ``MultiIndex`` with single level (:issue:`52331`)
- Bug in :meth:`DataFrame.stack` losing extension dtypes when columns is a :class:`MultiIndex` and frame contains mixed dtypes (:issue:`45740`)
- Bug in :meth:`DataFrame.stack` would incorrectly order results when ``sort=True`` and the input had :class:`MultiIndex` levels that were not sorted (:issue:`53636`)
- Bug in :meth:`DataFrame.transpose` inferring dtype for object column (:issue:`51546`)
- Bug in :meth:`Series.combine_first` converting ``int64`` dtype to ``float64`` and losing precision on very large integers (:issue:`51764`)
-
Expand Down
11 changes: 1 addition & 10 deletions pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -756,16 +756,7 @@ def _convert_level_number(level_num: int, columns: Index):
level_vals = mi_cols.levels[-1]
level_codes = unique(mi_cols.codes[-1])
if sort:
_, index, inverse = np.unique(
level_vals, return_index=True, return_inverse=True
)
sorted_level_vals = np.take(level_vals, index)
level_codes = np.sort(level_codes)
# Take level_codes according to where level_vals get sorted to, while
# also allowing for NA (-1) values
level_codes = np.where(level_codes == -1, -1, np.take(inverse, level_codes))
else:
sorted_level_vals = level_vals
level_vals_nan = level_vals.insert(len(level_vals), None)

level_vals_used = np.take(level_vals_nan, level_codes)
Expand Down Expand Up @@ -827,7 +818,7 @@ def _convert_level_number(level_num: int, columns: Index):
new_codes = [old_codes.repeat(levsize)]
new_names = [this.index.name] # something better?

new_levels.append(sorted_level_vals)
new_levels.append(level_vals)
new_codes.append(np.tile(level_codes, N))
new_names.append(frame.columns.names[level_num])

Expand Down
38 changes: 8 additions & 30 deletions pandas/tests/frame/test_stack_unstack.py
Original file line number Diff line number Diff line change
Expand Up @@ -2000,20 +2000,18 @@ def __init__(self, *args, **kwargs) -> None:
),
)
@pytest.mark.parametrize("stack_lev", range(2))
@pytest.mark.parametrize("sort", [True, False])
def test_stack_order_with_unsorted_levels(self, levels, stack_lev, sort):
def test_stack_order_with_unsorted_levels(self, levels, stack_lev):
# GH#16323
# deep check for 1-row case
columns = MultiIndex(levels=levels, codes=[[0, 0, 1, 1], [0, 1, 0, 1]])
df = DataFrame(columns=columns, data=[range(4)])
df_stacked = df.stack(stack_lev, sort=sort)
for row in df.index:
for col in df.columns:
expected = df.loc[row, col]
result_row = row, col[stack_lev]
result_col = col[1 - stack_lev]
result = df_stacked.loc[result_row, result_col]
assert result == expected
df_stacked = df.stack(stack_lev)
assert all(
df.loc[row, col]
== df_stacked.loc[(row, col[stack_lev]), col[1 - stack_lev]]
for row in df.index
for col in df.columns
)

def test_stack_order_with_unsorted_levels_multi_row(self):
# GH#16323
Expand All @@ -2032,26 +2030,6 @@ def test_stack_order_with_unsorted_levels_multi_row(self):
for col in df.columns
)

def test_stack_order_with_unsorted_levels_multi_row_2(self):
# GH#53636
levels = ((0, 1), (1, 0))
stack_lev = 1
columns = MultiIndex(levels=levels, codes=[[0, 0, 1, 1], [0, 1, 0, 1]])
df = DataFrame(columns=columns, data=[range(4)], index=[1, 0, 2, 3])
result = df.stack(stack_lev, sort=True)
expected_index = MultiIndex(
levels=[[0, 1, 2, 3], [0, 1]],
codes=[[1, 1, 0, 0, 2, 2, 3, 3], [1, 0, 1, 0, 1, 0, 1, 0]],
)
expected = DataFrame(
{
0: [0, 1, 0, 1, 0, 1, 0, 1],
1: [2, 3, 2, 3, 2, 3, 2, 3],
},
index=expected_index,
)
tm.assert_frame_equal(result, expected)

def test_stack_unstack_unordered_multiindex(self):
# GH# 18265
values = np.arange(5)
Expand Down