pandas-dev · ldmnt · Jul 12, 2019 · Jul 12, 2019 · Jul 13, 2019 · Jul 15, 2019
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -151,7 +151,8 @@ Missing
 MultiIndex
 ^^^^^^^^^^
 
--
+- Bug in :meth:`DataFrame.reset_index` where dtype was sometimes not preserved for :class:`MultiIndex` that is empty or with missing values (:issue:`19602`)
+- Bug in which :meth:`DataFrame.reset_index` did not work for :class:`MultiIndex` with :class:`CategoricalIndex` levels with missing values (:issue:`24206`)
 -
 
 I/O

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -1391,3 +1391,41 @@ def maybe_cast_to_integer_array(arr, dtype, copy=False):
 
     if is_integer_dtype(dtype) and (is_float_dtype(arr) or is_object_dtype(arr)):
         raise ValueError("Trying to coerce float values to integers")
+
+
+def maybe_casted_values(index, codes=None):
+    """
+    Convert an index, given directly or as a pair (level, codes), to a 1D array.
+
+    Parameters
+    ----------
+    index : Index
+    codes : sequence of integers (optional)
+
+    Returns
+    -------
+    ExtensionArray or ndarray
+        If codes is `None`, the values of `index`.
+        If codes is passed, an array obtained by taking from `index` the indices
+        contained in `codes`.
+    """
+    values = index._values
+    if values.dtype == np.object_:
+        values = lib.maybe_convert_objects(values)
+
+    # if we have the labels, extract the values with a mask
+    if codes is not None:
+        if isinstance(values, np.ndarray):
+            mask = codes == -1
+            # we can have situations where the whole mask is -1,
+            # meaning there is nothing found in labels, so make all nan's
+            if mask.all():
+                values = np.empty(len(mask), dtype=values.dtype)
+                values.fill(np.nan)
+            else:
+                values = values.take(codes)
+                if mask.any():
+                    values, _ = maybe_upcast_putmask(values, mask, np.nan)
+        else:
+            values = values.take(codes, allow_fill=True)
+    return values
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -41,11 +41,11 @@
     infer_dtype_from_scalar,
     invalidate_string_dtypes,
     maybe_cast_to_datetime,
+    maybe_casted_values,
     maybe_convert_platform,
     maybe_downcast_to_dtype,
     maybe_infer_to_datetimelike,
     maybe_upcast,
-    maybe_upcast_putmask,
 )
 from pandas.core.dtypes.common import (
     ensure_float64,
@@ -83,7 +83,6 @@
 from pandas.core import algorithms, common as com, nanops, ops
 from pandas.core.accessor import CachedAccessor
 from pandas.core.arrays import Categorical, ExtensionArray
-from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin as DatetimeLikeArray
 from pandas.core.arrays.sparse import SparseFrameAccessor
 from pandas.core.generic import NDFrame, _shared_docs
 from pandas.core.index import (
@@ -93,9 +92,7 @@
     ensure_index_from_sequences,
 )
 from pandas.core.indexes import base as ibase
-from pandas.core.indexes.datetimes import DatetimeIndex
 from pandas.core.indexes.multi import maybe_droplevels
-from pandas.core.indexes.period import PeriodIndex
 from pandas.core.indexing import check_bool_indexer, convert_to_index_sliceable
 from pandas.core.internals import BlockManager
 from pandas.core.internals.construction import (
@@ -4550,43 +4547,6 @@ class    max    type
         else:
             new_obj = self.copy()
 
-        def _maybe_casted_values(index, labels=None):
-            values = index._values
-            if not isinstance(index, (PeriodIndex, DatetimeIndex)):
-                if values.dtype == np.object_:
-                    values = lib.maybe_convert_objects(values)
-
-            # if we have the labels, extract the values with a mask
-            if labels is not None:
-                mask = labels == -1
-
-                # we can have situations where the whole mask is -1,
-                # meaning there is nothing found in labels, so make all nan's
-                if mask.all():
-                    values = np.empty(len(mask))
-                    values.fill(np.nan)
-                else:
-                    values = values.take(labels)
-
-                    # TODO(https://github.com/pandas-dev/pandas/issues/24206)
-                    # Push this into maybe_upcast_putmask?
-                    # We can't pass EAs there right now. Looks a bit
-                    # complicated.
-                    # So we unbox the ndarray_values, op, re-box.
-                    values_type = type(values)
-                    values_dtype = values.dtype
-
-                    if issubclass(values_type, DatetimeLikeArray):
-                        values = values._data
-
-                    if mask.any():
-                        values, changed = maybe_upcast_putmask(values, mask, np.nan)
-
-                    if issubclass(values_type, DatetimeLikeArray):
-                        values = values_type(values, dtype=values_dtype)
-
-            return values
-
         new_index = ibase.default_index(len(new_obj))
         if level is not None:
             if not isinstance(level, (tuple, list)):
@@ -4628,8 +4588,7 @@ def _maybe_casted_values(index, labels=None):
                     missing = self.columns.nlevels - len(name_lst)
                     name_lst += [col_fill] * missing
                     name = tuple(name_lst)
-                # to ndarray and maybe infer different dtype
-                level_values = _maybe_casted_values(lev, lab)
+                level_values = maybe_casted_values(lev, lab)
                 new_obj.insert(0, name, level_values)
 
         new_obj.index = new_index

diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py
@@ -12,6 +12,7 @@
 
 from pandas import (
     Categorical,
+    CategoricalIndex,
     DataFrame,
     DatetimeIndex,
     Index,
@@ -1157,34 +1158,43 @@ def test_reset_index_multiindex_col(self):
         )
         tm.assert_frame_equal(rs, xp)
 
-    def test_reset_index_multiindex_nan(self):
-        # GH6322, testing reset_index on MultiIndexes
-        # when we have a nan or all nan
-        df = DataFrame(
-            {"A": ["a", "b", "c"], "B": [0, 1, np.nan], "C": np.random.rand(3)}
-        )
-        rs = df.set_index(["A", "B"]).reset_index()
-        tm.assert_frame_equal(rs, df)
-
-        df = DataFrame(
-            {"A": [np.nan, "b", "c"], "B": [0, 1, 2], "C": np.random.rand(3)}
-        )
-        rs = df.set_index(["A", "B"]).reset_index()
-        tm.assert_frame_equal(rs, df)
-
-        df = DataFrame({"A": ["a", "b", "c"], "B": [0, 1, 2], "C": [np.nan, 1.1, 2.2]})
-        rs = df.set_index(["A", "B"]).reset_index()
-        tm.assert_frame_equal(rs, df)
+    @pytest.mark.parametrize(
+        "columns",
+        [
+            [["a", "b", "c"], [0, 1, np.nan], np.random.rand(3)],
+            [[np.nan, "b", "c"], [0, 1, 2], np.random.rand(3)],
+            [["a", "b", "c"], [0, 1, 2], [np.nan, 1.1, 2.2]],
+            [["a", "b", "c"], [np.nan, np.nan, np.nan], np.random.rand(3)],
+            [
+                DatetimeIndex([np.nan, np.nan]),
+                DatetimeIndex(["2015-01-01 11:00:00", np.nan]),
+                np.random.rand(2),
+            ],
+            [
+                CategoricalIndex(["A", "A", "B", "B"]),
+                CategoricalIndex(["a", "b", "a", np.nan]),
+                np.random.rand(4),
+            ],
+            [DatetimeIndex([]), [], []],
+        ],
+    )
+    def test_reset_index_multiindex_nan(self, columns):
+        # GH6322, GH19602, GH24206: testing reset_index on MultiIndex
+        # with some nans or all nans
+        column_names = ["A", "B", "C"]
+        columns = dict(zip(column_names, columns))
+        df = DataFrame(columns, columns=column_names)
+        result = df.set_index(column_names[:2]).reset_index()
+        tm.assert_frame_equal(df, result)
 
-        df = DataFrame(
-            {
-                "A": ["a", "b", "c"],
-                "B": [np.nan, np.nan, np.nan],
-                "C": np.random.rand(3),
-            }
-        )
-        rs = df.set_index(["A", "B"]).reset_index()
-        tm.assert_frame_equal(rs, df)
+    def test_reset_index_multiindex_empty(self):
+        # GH19602: preserve dtypes when resetting multiindex of
+        # empty dataframe
+        idx = MultiIndex.from_product([[0, 1], [1, 2]])
+        empty_df = DataFrame(index=idx)[:0]
+        types = empty_df.reset_index().dtypes
+        assert types[0] == np.int64
+        assert types[1] == np.int64
 
     def test_reset_index_with_datetimeindex_cols(self):
         # GH5818