Skip to content

Commit b991274

Browse files
renanffernandorhshadrachmroeschke
authored
BUG: unstack with sort=False fails when used with the level parameter… (#56357)
* BUG: unstack with sort=False fails when used with the level parameter (#54987) Assign new codes to labels when sort=False. This is done so that the data appears to be already sorted, fixing the bug. * Minor refactor and cleanup * Cleanup & remove test * whatsnew * Revert test removal --------- Co-authored-by: richard <[email protected]> Co-authored-by: Richard Shadrach <[email protected]> Co-authored-by: Matthew Roeschke <[email protected]>
1 parent bdcb192 commit b991274

File tree

4 files changed

+30
-10
lines changed

4 files changed

+30
-10
lines changed

doc/source/whatsnew/v3.0.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -477,7 +477,7 @@ Groupby/resample/rolling
477477
Reshaping
478478
^^^^^^^^^
479479
- Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
480-
-
480+
- Bug in :meth:`DataFrame.unstack` producing incorrect results when ``sort=False`` (:issue:`54987`, :issue:`55516`)
481481

482482
Sparse
483483
^^^^^^

pandas/core/reshape/reshape.py

+13-7
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,9 @@ def _indexer_and_to_sort(
168168
v = self.level
169169

170170
codes = list(self.index.codes)
171+
if not self.sort:
172+
# Create new codes considering that labels are already sorted
173+
codes = [factorize(code)[0] for code in codes]
171174
levs = list(self.index.levels)
172175
to_sort = codes[:v] + codes[v + 1 :] + [codes[v]]
173176
sizes = tuple(len(x) for x in levs[:v] + levs[v + 1 :] + [levs[v]])
@@ -186,12 +189,9 @@ def sorted_labels(self) -> list[np.ndarray]:
186189
return to_sort
187190

188191
def _make_sorted_values(self, values: np.ndarray) -> np.ndarray:
189-
if self.sort:
190-
indexer, _ = self._indexer_and_to_sort
191-
192-
sorted_values = algos.take_nd(values, indexer, axis=0)
193-
return sorted_values
194-
return values
192+
indexer, _ = self._indexer_and_to_sort
193+
sorted_values = algos.take_nd(values, indexer, axis=0)
194+
return sorted_values
195195

196196
def _make_selectors(self) -> None:
197197
new_levels = self.new_index_levels
@@ -394,7 +394,13 @@ def _repeater(self) -> np.ndarray:
394394
@cache_readonly
395395
def new_index(self) -> MultiIndex | Index:
396396
# Does not depend on values or value_columns
397-
result_codes = [lab.take(self.compressor) for lab in self.sorted_labels[:-1]]
397+
if self.sort:
398+
labels = self.sorted_labels[:-1]
399+
else:
400+
v = self.level
401+
codes = list(self.index.codes)
402+
labels = codes[:v] + codes[v + 1 :]
403+
result_codes = [lab.take(self.compressor) for lab in labels]
398404

399405
# construct the new index
400406
if len(self.new_index_levels) == 1:

pandas/tests/frame/test_stack_unstack.py

+15
Original file line numberDiff line numberDiff line change
@@ -1321,6 +1321,21 @@ def test_unstack_sort_false(frame_or_series, dtype):
13211321
[("two", "z", "b"), ("two", "y", "a"), ("one", "z", "b"), ("one", "y", "a")]
13221322
)
13231323
obj = frame_or_series(np.arange(1.0, 5.0), index=index, dtype=dtype)
1324+
1325+
result = obj.unstack(level=0, sort=False)
1326+
1327+
if frame_or_series is DataFrame:
1328+
expected_columns = MultiIndex.from_tuples([(0, "two"), (0, "one")])
1329+
else:
1330+
expected_columns = ["two", "one"]
1331+
expected = DataFrame(
1332+
[[1.0, 3.0], [2.0, 4.0]],
1333+
index=MultiIndex.from_tuples([("z", "b"), ("y", "a")]),
1334+
columns=expected_columns,
1335+
dtype=dtype,
1336+
)
1337+
tm.assert_frame_equal(result, expected)
1338+
13241339
result = obj.unstack(level=-1, sort=False)
13251340

13261341
if frame_or_series is DataFrame:

pandas/tests/reshape/test_pivot.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -2705,14 +2705,13 @@ def test_pivot_table_with_margins_and_numeric_column_names(self):
27052705
tm.assert_frame_equal(result, expected)
27062706

27072707
@pytest.mark.parametrize("m", [1, 10])
2708-
def test_unstack_shares_memory(self, m):
2708+
def test_unstack_copy(self, m):
27092709
# GH#56633
27102710
levels = np.arange(m)
27112711
index = MultiIndex.from_product([levels] * 2)
27122712
values = np.arange(m * m * 100).reshape(m * m, 100)
27132713
df = DataFrame(values, index, np.arange(100))
27142714
df_orig = df.copy()
27152715
result = df.unstack(sort=False)
2716-
assert np.shares_memory(df._values, result._values) is (m == 1)
27172716
result.iloc[0, 0] = -1
27182717
tm.assert_frame_equal(df, df_orig)

0 commit comments

Comments
 (0)