Skip to content

Commit 0e213e1

Browse files
BUG: fix reindexing MultiIndex with categorical datetime-like level (pandas-dev#21657)
(cherry picked from commit 1cc5471)
1 parent 278e4f7 commit 0e213e1

File tree

5 files changed

+58
-17
lines changed

5 files changed

+58
-17
lines changed

doc/source/whatsnew/v0.23.2.txt

+2
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ Fixed Regressions
5555
- Fixed regression in :meth:`to_csv` when handling file-like object incorrectly (:issue:`21471`)
5656
- Re-allowed duplicate level names of a ``MultiIndex``. Accessing a level that has a duplicate name by name still raises an error (:issue:`19029`).
5757
- Bug in both :meth:`DataFrame.first_valid_index` and :meth:`Series.first_valid_index` raised for a row index having duplicate values (:issue:`21441`)
58+
- Fixed regression in :meth:`~DataFrame.reindex` and :meth:`~DataFrame.groupby`
59+
with a MultiIndex or multiple keys that contains categorical datetime-like values (:issue:`21390`).
5860
- Fixed regression in unary negative operations with object dtype (:issue:`21380`)
5961
- Bug in :meth:`Timestamp.ceil` and :meth:`Timestamp.floor` when timestamp is a multiple of the rounding frequency (:issue:`21262`)
6062
- Fixed regression in :func:`to_clipboard` that defaulted to copying dataframes with space delimited instead of tab delimited (:issue:`21104`)

pandas/core/indexes/multi.py

+12-14
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
from pandas.compat.numpy import function as nv
1212
from pandas import compat
1313

14+
from pandas.core.dtypes.dtypes import (
15+
ExtensionDtype, PandasExtensionDtype)
1416
from pandas.core.dtypes.common import (
1517
_ensure_int64,
1618
_ensure_platform_int,
@@ -808,20 +810,16 @@ def values(self):
808810
return self._tuples
809811

810812
values = []
811-
for lev, lab in zip(self.levels, self.labels):
812-
# Need to box timestamps, etc.
813-
box = hasattr(lev, '_box_values')
814-
# Try to minimize boxing.
815-
if box and len(lev) > len(lab):
816-
taken = lev._box_values(algos.take_1d(lev._ndarray_values,
817-
lab))
818-
elif box:
819-
taken = algos.take_1d(lev._box_values(lev._ndarray_values),
820-
lab,
821-
fill_value=lev._na_value)
822-
else:
823-
taken = algos.take_1d(np.asarray(lev._values), lab)
824-
values.append(taken)
813+
814+
for i in range(self.nlevels):
815+
vals = self._get_level_values(i)
816+
if is_categorical_dtype(vals):
817+
vals = vals.get_values()
818+
if (isinstance(vals.dtype, (PandasExtensionDtype, ExtensionDtype))
819+
or hasattr(vals, '_box_values')):
820+
vals = vals.astype(object)
821+
vals = np.array(vals, copy=False)
822+
values.append(vals)
825823

826824
self._tuples = lib.fast_zip(values)
827825
return self._tuples

pandas/tests/frame/test_axis_select_reindex.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import numpy as np
1111

1212
from pandas.compat import lrange, lzip, u
13-
from pandas import (compat, DataFrame, Series, Index, MultiIndex,
13+
from pandas import (compat, DataFrame, Series, Index, MultiIndex, Categorical,
1414
date_range, isna)
1515
import pandas as pd
1616

@@ -1129,6 +1129,19 @@ def test_reindex_multi(self):
11291129

11301130
assert_frame_equal(result, expected)
11311131

1132+
def test_reindex_multi_categorical_time(self):
1133+
# https://github.com/pandas-dev/pandas/issues/21390
1134+
midx = pd.MultiIndex.from_product(
1135+
[Categorical(['a', 'b', 'c']),
1136+
Categorical(date_range("2012-01-01", periods=3, freq='H'))])
1137+
df = pd.DataFrame({'a': range(len(midx))}, index=midx)
1138+
df2 = df.iloc[[0, 1, 2, 3, 4, 5, 6, 8]]
1139+
1140+
result = df2.reindex(midx)
1141+
expected = pd.DataFrame(
1142+
{'a': [0, 1, 2, 3, 4, 5, 6, np.nan, 8]}, index=midx)
1143+
assert_frame_equal(result, expected)
1144+
11321145
data = [[1, 2, 3], [1, 2, 3]]
11331146

11341147
@pytest.mark.parametrize('actual', [

pandas/tests/groupby/test_categorical.py

+20
Original file line numberDiff line numberDiff line change
@@ -850,3 +850,23 @@ def test_empty_prod():
850850
result = df.groupby("A", observed=False).B.prod(min_count=1)
851851
expected = pd.Series([2, 1, np.nan], expected_idx, name='B')
852852
tm.assert_series_equal(result, expected)
853+
854+
855+
def test_groupby_multiindex_categorical_datetime():
856+
# https://github.com/pandas-dev/pandas/issues/21390
857+
858+
df = pd.DataFrame({
859+
'key1': pd.Categorical(list('abcbabcba')),
860+
'key2': pd.Categorical(
861+
list(pd.date_range('2018-06-01 00', freq='1T', periods=3)) * 3),
862+
'values': np.arange(9),
863+
})
864+
result = df.groupby(['key1', 'key2']).mean()
865+
866+
idx = pd.MultiIndex.from_product(
867+
[pd.Categorical(['a', 'b', 'c']),
868+
pd.Categorical(pd.date_range('2018-06-01 00', freq='1T', periods=3))],
869+
names=['key1', 'key2'])
870+
expected = pd.DataFrame(
871+
{'values': [0, 4, 8, 3, 4, 5, 6, np.nan, 2]}, index=idx)
872+
assert_frame_equal(result, expected)

pandas/tests/indexes/test_multi.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212

1313
import pandas as pd
1414

15-
from pandas import (CategoricalIndex, DataFrame, Index, MultiIndex,
16-
compat, date_range, period_range)
15+
from pandas import (CategoricalIndex, Categorical, DataFrame, Index,
16+
MultiIndex, compat, date_range, period_range)
1717
from pandas.compat import PY3, long, lrange, lzip, range, u, PYPY
1818
from pandas.errors import PerformanceWarning, UnsortedIndexError
1919
from pandas.core.dtypes.dtypes import CategoricalDtype
@@ -1595,6 +1595,14 @@ def test_get_indexer_nearest(self):
15951595
with pytest.raises(NotImplementedError):
15961596
midx.get_indexer(['a'], method='pad', tolerance=2)
15971597

1598+
def test_get_indexer_categorical_time(self):
1599+
# https://github.com/pandas-dev/pandas/issues/21390
1600+
midx = MultiIndex.from_product(
1601+
[Categorical(['a', 'b', 'c']),
1602+
Categorical(date_range("2012-01-01", periods=3, freq='H'))])
1603+
result = midx.get_indexer(midx)
1604+
tm.assert_numpy_array_equal(result, np.arange(9, dtype=np.intp))
1605+
15981606
def test_hash_collisions(self):
15991607
# non-smoke test that we don't get hash collisions
16001608

0 commit comments

Comments
 (0)