pandas-dev · jorisvandenbossche · Jul 2, 2018 · Jun 27, 2018 · Jun 27, 2018 · Jun 27, 2018
diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
@@ -55,6 +55,8 @@ Fixed Regressions
 - Fixed regression in :meth:`to_csv` when handling file-like object incorrectly (:issue:`21471`)
 - Re-allowed duplicate level names of a ``MultiIndex``. Accessing a level that has a duplicate name by name still raises an error (:issue:`19029`).
 - Bug in both :meth:`DataFrame.first_valid_index` and :meth:`Series.first_valid_index` raised for a row index having duplicate values (:issue:`21441`)
+- Fixed regression in :meth:`~DataFrame.reindex` and :meth:`~DataFrame.groupby`
+  with a MultiIndex or multiple keys that contains categorical datetime-like values (:issue:`21390`).
 - Fixed regression in unary negative operations with object dtype (:issue:`21380`)
 - Bug in :meth:`Timestamp.ceil` and :meth:`Timestamp.floor` when timestamp is a multiple of the rounding frequency (:issue:`21262`)
 

diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -11,6 +11,8 @@
 from pandas.compat.numpy import function as nv
 from pandas import compat
 
+from pandas.core.dtypes.dtypes import (
+    ExtensionDtype, PandasExtensionDtype)
 from pandas.core.dtypes.common import (
     _ensure_int64,
     _ensure_platform_int,
@@ -807,20 +809,16 @@ def values(self):
             return self._tuples
 
         values = []
-        for lev, lab in zip(self.levels, self.labels):
-            # Need to box timestamps, etc.
-            box = hasattr(lev, '_box_values')
-            # Try to minimize boxing.
-            if box and len(lev) > len(lab):
-                taken = lev._box_values(algos.take_1d(lev._ndarray_values,
-                                                      lab))
-            elif box:
-                taken = algos.take_1d(lev._box_values(lev._ndarray_values),
-                                      lab,
-                                      fill_value=lev._na_value)
-            else:
-                taken = algos.take_1d(np.asarray(lev._values), lab)
-            values.append(taken)
+
+        for i in range(self.nlevels):
+            vals = self._get_level_values(i)
+            if is_categorical_dtype(vals):
+                vals = vals.get_values()
+            if (isinstance(vals.dtype, (PandasExtensionDtype, ExtensionDtype))
+                    or hasattr(vals, '_box_values')):
+                vals = vals.astype(object)
+            vals = np.array(vals, copy=False)
+            values.append(vals)
 
         self._tuples = lib.fast_zip(values)
         return self._tuples

diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py
@@ -10,7 +10,7 @@
 import numpy as np
 
 from pandas.compat import lrange, lzip, u
-from pandas import (compat, DataFrame, Series, Index, MultiIndex,
+from pandas import (compat, DataFrame, Series, Index, MultiIndex, Categorical,
                     date_range, isna)
 import pandas as pd
 
@@ -1129,6 +1129,19 @@ def test_reindex_multi(self):
 
         assert_frame_equal(result, expected)
 
+    def test_reindex_multi_categorical_time(self):
+        # https://github.com/pandas-dev/pandas/issues/21390
+        midx = pd.MultiIndex.from_product(
+            [Categorical(['a', 'b', 'c']),
+             Categorical(date_range("2012-01-01", periods=3, freq='H'))])
+        df = pd.DataFrame({'a': range(len(midx))}, index=midx)
+        df2 = df.iloc[[0, 1, 2, 3, 4, 5, 6, 8]]
+
+        result = df2.reindex(midx)
+        expected = pd.DataFrame(
+            {'a': [0, 1, 2, 3, 4, 5, 6, np.nan, 8]}, index=midx)
+        assert_frame_equal(result, expected)
+
     data = [[1, 2, 3], [1, 2, 3]]
 
     @pytest.mark.parametrize('actual', [

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
@@ -850,3 +850,23 @@ def test_empty_prod():
     result = df.groupby("A", observed=False).B.prod(min_count=1)
     expected = pd.Series([2, 1, np.nan], expected_idx, name='B')
     tm.assert_series_equal(result, expected)
+
+
+def test_groupby_multiindex_categorical_datetime():
+    # https://github.com/pandas-dev/pandas/issues/21390
+
+    df = pd.DataFrame({
+        'key1': pd.Categorical(list('abcbabcba')),
+        'key2': pd.Categorical(
+            list(pd.date_range('2018-06-01 00', freq='1T', periods=3)) * 3),
+        'values': np.arange(9),
+    })
+    result = df.groupby(['key1', 'key2']).mean()
+
+    idx = pd.MultiIndex.from_product(
+        [pd.Categorical(['a', 'b', 'c']),
+         pd.Categorical(pd.date_range('2018-06-01 00', freq='1T', periods=3))],
+        names=['key1', 'key2'])
+    expected = pd.DataFrame(
+        {'values': [0, 4, 8, 3, 4, 5, 6, np.nan, 2]}, index=idx)
+    assert_frame_equal(result, expected)
diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
@@ -12,8 +12,8 @@
 
 import pandas as pd
 
-from pandas import (CategoricalIndex, DataFrame, Index, MultiIndex,
-                    compat, date_range, period_range)
+from pandas import (CategoricalIndex, Categorical, DataFrame, Index,
+                    MultiIndex, compat, date_range, period_range)
 from pandas.compat import PY3, long, lrange, lzip, range, u, PYPY
 from pandas.errors import PerformanceWarning, UnsortedIndexError
 from pandas.core.dtypes.dtypes import CategoricalDtype
@@ -1596,6 +1596,14 @@ def test_get_indexer_nearest(self):
         with pytest.raises(NotImplementedError):
             midx.get_indexer(['a'], method='pad', tolerance=2)
 
+    def test_get_indexer_categorical_time(self):
+        # https://github.com/pandas-dev/pandas/issues/21390
+        midx = MultiIndex.from_product(
+            [Categorical(['a', 'b', 'c']),
+             Categorical(date_range("2012-01-01", periods=3, freq='H'))])
+        result = midx.get_indexer(midx)
+        tm.assert_numpy_array_equal(result, np.arange(9, dtype=np.intp))
+
     def test_hash_collisions(self):
         # non-smoke test that we don't get hash collisions