Skip to content

Commit 7026169

Browse files
BUG: unstack with sort=False fails when used with the level parameter (pandas-dev#54987)
Assign new codes to labels when sort=False. This is done so that the data appears to be already sorted, fixing the bug.
1 parent 0e8174f commit 7026169

File tree

2 files changed

+40
-12
lines changed

2 files changed

+40
-12
lines changed

pandas/core/reshape/reshape.py

+25-12
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,16 @@ def __init__(
153153

154154
self._make_selectors()
155155

156+
@cache_readonly
157+
def sorted_labels(self) -> list[np.ndarray]:
158+
if self.sort:
159+
return self.labels
160+
161+
v = self.level
162+
codes = list(self.index.codes)
163+
to_sort = codes[:v] + codes[v + 1 :] + [codes[v]]
164+
return to_sort
165+
156166
@cache_readonly
157167
def _indexer_and_to_sort(
158168
self,
@@ -162,8 +172,14 @@ def _indexer_and_to_sort(
162172
]:
163173
v = self.level
164174

165-
codes = list(self.index.codes)
166175
levs = list(self.index.levels)
176+
codes = list(self.index.codes)
177+
178+
if not self.sort:
179+
codes = [list(l) for l in codes]
180+
ids_code = [(dict([(y, x) for x, y in enumerate(sorted(set(l), key=l.index))]), l) for l in codes]
181+
codes = [np.array([d[x] for x in code]) for d, code in ids_code]
182+
167183
to_sort = codes[:v] + codes[v + 1 :] + [codes[v]]
168184
sizes = tuple(len(x) for x in levs[:v] + levs[v + 1 :] + [levs[v]])
169185

@@ -172,27 +188,24 @@ def _indexer_and_to_sort(
172188

173189
indexer = get_group_index_sorter(comp_index, ngroups)
174190
return indexer, to_sort
175-
191+
176192
@cache_readonly
177-
def sorted_labels(self) -> list[np.ndarray]:
193+
def labels(self) -> list[np.ndarray]:
178194
indexer, to_sort = self._indexer_and_to_sort
179195
if self.sort:
180196
return [line.take(indexer) for line in to_sort]
181197
return to_sort
182198

183199
def _make_sorted_values(self, values: np.ndarray) -> np.ndarray:
184-
if self.sort:
185-
indexer, _ = self._indexer_and_to_sort
186-
187-
sorted_values = algos.take_nd(values, indexer, axis=0)
188-
return sorted_values
189-
return values
200+
indexer, _ = self._indexer_and_to_sort
201+
sorted_values = algos.take_nd(values, indexer, axis=0)
202+
return sorted_values
190203

191204
def _make_selectors(self):
192205
new_levels = self.new_index_levels
193206

194-
# make the mask
195-
remaining_labels = self.sorted_labels[:-1]
207+
remaining_labels = self.labels[:-1]
208+
choosen_labels = self.labels[-1]
196209
level_sizes = tuple(len(x) for x in new_levels)
197210

198211
comp_index, obs_ids = get_compressed_ids(remaining_labels, level_sizes)
@@ -202,7 +215,7 @@ def _make_selectors(self):
202215
stride = self.index.levshape[self.level] + self.lift
203216
self.full_shape = ngroups, stride
204217

205-
selector = self.sorted_labels[-1] + stride * comp_index + self.lift
218+
selector = choosen_labels + stride * comp_index + self.lift
206219
mask = np.zeros(np.prod(self.full_shape), dtype=bool)
207220
mask.put(selector, True)
208221

pandas/tests/frame/test_stack_unstack.py

+15
Original file line numberDiff line numberDiff line change
@@ -1318,6 +1318,21 @@ def test_unstack_sort_false(frame_or_series, dtype):
13181318
[("two", "z", "b"), ("two", "y", "a"), ("one", "z", "b"), ("one", "y", "a")]
13191319
)
13201320
obj = frame_or_series(np.arange(1.0, 5.0), index=index, dtype=dtype)
1321+
1322+
result = obj.unstack(level=0, sort=False)
1323+
1324+
if frame_or_series is DataFrame:
1325+
expected_columns = MultiIndex.from_tuples([(0, "two"), (0, "one")])
1326+
else:
1327+
expected_columns = ["two", "one"]
1328+
expected = DataFrame(
1329+
[[1.0, 3.0], [2.0, 4.0]],
1330+
index=MultiIndex.from_tuples([('z', 'b'), ('y', 'a')]),
1331+
columns=expected_columns,
1332+
dtype=dtype,
1333+
)
1334+
tm.assert_frame_equal(result, expected)
1335+
13211336
result = obj.unstack(level=-1, sort=False)
13221337

13231338
if frame_or_series is DataFrame:

0 commit comments

Comments
 (0)