Skip to content

BUG: Wrong dtype when resetting a multiindex with missing values. (#1… #27370

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 13 commits into from
40 changes: 13 additions & 27 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4610,33 +4610,19 @@ def _maybe_casted_values(index, labels=None):

# if we have the labels, extract the values with a mask
if labels is not None:
mask = labels == -1

# we can have situations where the whole mask is -1,
# meaning there is nothing found in labels, so make all nan's
if mask.all():
values = np.empty(len(mask))
values.fill(np.nan)
if isinstance(values, np.ndarray):
mask = labels == -1
# we can have situations where the whole mask is -1,
# meaning there is nothing found in labels, so make all nan's
if mask.all():
values = np.empty(len(mask), dtype=values.dtype)
values.fill(np.nan)
else:
values = values.take(labels)
if mask.any():
values, _ = maybe_upcast_putmask(values, mask, np.nan)
else:
values = values.take(labels)

# TODO(https://github.com/pandas-dev/pandas/issues/24206)
# Push this into maybe_upcast_putmask?
# We can't pass EAs there right now. Looks a bit
# complicated.
# So we unbox the ndarray_values, op, re-box.
values_type = type(values)
values_dtype = values.dtype

if issubclass(values_type, DatetimeLikeArray):
values = values._data

if mask.any():
values, changed = maybe_upcast_putmask(values, mask, np.nan)

if issubclass(values_type, DatetimeLikeArray):
values = values_type(values, dtype=values_dtype)

values = values.take(labels, allow_fill=True)
return values

new_index = ibase.default_index(len(new_obj))
Expand Down Expand Up @@ -4680,7 +4666,7 @@ def _maybe_casted_values(index, labels=None):
missing = self.columns.nlevels - len(name_lst)
name_lst += [col_fill] * missing
name = tuple(name_lst)
# to ndarray and maybe infer different dtype
# to array-like and maybe infer different dtype
level_values = _maybe_casted_values(lev, lab)
new_obj.insert(0, name, level_values)

Expand Down
31 changes: 31 additions & 0 deletions pandas/tests/frame/test_alter_axes.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from pandas import (
Categorical,
CategoricalIndex,
DataFrame,
DatetimeIndex,
Index,
Expand Down Expand Up @@ -1186,6 +1187,36 @@ def test_reset_index_multiindex_nan(self):
rs = df.set_index(["A", "B"]).reset_index()
tm.assert_frame_equal(rs, df)

# GH 19602
df = DataFrame({0: DatetimeIndex([]), 1: []})
rs = df.set_index([0, 1]).reset_index()
tm.assert_frame_equal(rs, df)

idx = MultiIndex(
levels=[DatetimeIndex([]),
DatetimeIndex(['2015-01-01 11:00:00'])],
codes=[[-1, -1], [0, -1]],
names=[0, 1]
)
df = DataFrame(index=idx).reset_index()

xp = DataFrame({
0: DatetimeIndex([np.nan, np.nan]),
1: DatetimeIndex(['2015-01-01 11:00:00', np.nan])
})
tm.assert_frame_equal(df, xp)

# GH 24206
idx = MultiIndex([CategoricalIndex(['A', 'B']), CategoricalIndex(['a', 'b'])],
[[0, 0, 1, 1], [0, 1, 0, -1]])
df = DataFrame({'col': range(len(idx))}, index=idx).reset_index()
xp = DataFrame({
'level_0': CategoricalIndex(['A', 'A', 'B', 'B']),
'level_1': CategoricalIndex(['a', 'b', 'a', np.nan]),
'col': [0, 1, 2, 3]
})
tm.assert_frame_equal(df, xp)

def test_reset_index_with_datetimeindex_cols(self):
# GH5818
#
Expand Down