Skip to content

Commit e4f7bc7

Browse files
arw2019Kevin D Smith
authored and
Kevin D Smith
committed
BUG: in DataFrame.reset_index() only call maybe_upcast_putmask with ndarrays (pandas-dev#36876)
* BUG: add check so maybe_upcast_putmask is only called with ndarray * TST: add tests * DOC: add whatsnew * feedback: test roundtrip * feedback: parametrize on both examples from OP * move test to frame/test_alter_axes.py * fill mask in index with nan when not calling maybe_upcast_putmask * restore the fix
1 parent 1ed7598 commit e4f7bc7

File tree

3 files changed

+51
-16
lines changed

3 files changed

+51
-16
lines changed

doc/source/whatsnew/v1.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,7 @@ Indexing
392392
- Bug in :meth:`Index.sort_values` where, when empty values were passed, the method would break by trying to compare missing values instead of pushing them to the end of the sort order. (:issue:`35584`)
393393
- Bug in :meth:`Index.get_indexer` and :meth:`Index.get_indexer_non_unique` where int64 arrays are returned instead of intp. (:issue:`36359`)
394394
- Bug in :meth:`DataFrame.sort_index` where parameter ascending passed as a list on a single level index gives wrong result. (:issue:`32334`)
395+
- Bug in :meth:`DataFrame.reset_index` was incorrectly raising a ``ValueError`` for input with a :class:`MultiIndex` with missing values in a level with ``Categorical`` dtype (:issue:`24206`)
395396

396397
Missing
397398
^^^^^^^

pandas/core/dtypes/cast.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -493,7 +493,10 @@ def maybe_casted_values(index, codes=None):
493493
values = values._data # TODO: can we de-kludge yet?
494494

495495
if mask.any():
496-
values, _ = maybe_upcast_putmask(values, mask, np.nan)
496+
if isinstance(values, np.ndarray):
497+
values, _ = maybe_upcast_putmask(values, mask, np.nan)
498+
else:
499+
values[mask] = np.nan
497500

498501
if issubclass(values_type, DatetimeLikeArrayMixin):
499502
values = values_type(values, dtype=values_dtype)

pandas/tests/frame/test_alter_axes.py

+46-15
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,12 @@
1212

1313
from pandas import (
1414
Categorical,
15+
CategoricalIndex,
1516
DataFrame,
1617
DatetimeIndex,
1718
Index,
1819
IntervalIndex,
20+
MultiIndex,
1921
Series,
2022
Timestamp,
2123
cut,
@@ -171,21 +173,6 @@ def test_assign_columns(self, float_frame):
171173
tm.assert_series_equal(float_frame["C"], df["baz"], check_names=False)
172174
tm.assert_series_equal(float_frame["hi"], df["foo2"], check_names=False)
173175

174-
def test_set_index_preserve_categorical_dtype(self):
175-
# GH13743, GH13854
176-
df = DataFrame(
177-
{
178-
"A": [1, 2, 1, 1, 2],
179-
"B": [10, 16, 22, 28, 34],
180-
"C1": Categorical(list("abaab"), categories=list("bac"), ordered=False),
181-
"C2": Categorical(list("abaab"), categories=list("bac"), ordered=True),
182-
}
183-
)
184-
for cols in ["C1", "C2", ["A", "C1"], ["A", "C2"], ["C1", "C2"]]:
185-
result = df.set_index(cols).reset_index()
186-
result = result.reindex(columns=df.columns)
187-
tm.assert_frame_equal(result, df)
188-
189176
def test_rename_signature(self):
190177
sig = inspect.signature(DataFrame.rename)
191178
parameters = set(sig.parameters)
@@ -266,3 +253,47 @@ def test_set_reset_index(self):
266253
df = df.set_index("B")
267254

268255
df = df.reset_index()
256+
257+
258+
class TestCategoricalIndex:
259+
def test_set_index_preserve_categorical_dtype(self):
260+
# GH13743, GH13854
261+
df = DataFrame(
262+
{
263+
"A": [1, 2, 1, 1, 2],
264+
"B": [10, 16, 22, 28, 34],
265+
"C1": Categorical(list("abaab"), categories=list("bac"), ordered=False),
266+
"C2": Categorical(list("abaab"), categories=list("bac"), ordered=True),
267+
}
268+
)
269+
for cols in ["C1", "C2", ["A", "C1"], ["A", "C2"], ["C1", "C2"]]:
270+
result = df.set_index(cols).reset_index()
271+
result = result.reindex(columns=df.columns)
272+
tm.assert_frame_equal(result, df)
273+
274+
@pytest.mark.parametrize(
275+
"codes", ([[0, 0, 1, 1], [0, 1, 0, 1]], [[0, 0, -1, 1], [0, 1, 0, 1]])
276+
)
277+
def test_reindexing_with_missing_values(self, codes):
278+
# GH 24206
279+
280+
index = MultiIndex(
281+
[CategoricalIndex(["A", "B"]), CategoricalIndex(["a", "b"])], codes
282+
)
283+
data = {"col": range(len(index))}
284+
df = DataFrame(data=data, index=index)
285+
286+
expected = DataFrame(
287+
{
288+
"level_0": Categorical.from_codes(codes[0], categories=["A", "B"]),
289+
"level_1": Categorical.from_codes(codes[1], categories=["a", "b"]),
290+
"col": range(4),
291+
}
292+
)
293+
294+
res = df.reset_index()
295+
tm.assert_frame_equal(res, expected)
296+
297+
# roundtrip
298+
res = expected.set_index(["level_0", "level_1"]).reset_index()
299+
tm.assert_frame_equal(res, expected)

0 commit comments

Comments
 (0)