diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index c07760a94d3f1..c9c22de9141fe 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1629,6 +1629,7 @@ Indexing - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) - Bug in ``pd.concat()`` where the names of ``MultiIndex`` of resulting ``DataFrame`` are not handled correctly when ``None`` is presented in the names of ``MultiIndex`` of input ``DataFrame`` (:issue:`15787`) - Bug in ``DataFrame.sort_index()`` and ``Series.sort_index()`` where ``na_position`` doesn't work with a ``MultiIndex`` (:issue:`14784`, :issue:`16604`) + - Bug in in ``pd.concat()`` when combining objects with a ``CategoricalIndex`` (:issue:`16111`) I/O ^^^ diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 257ca86947f2b..5f38b19742f71 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -12,6 +12,7 @@ is_scalar) from pandas.core.common import _asarray_tuplesafe from pandas.core.dtypes.missing import array_equivalent +from pandas.core.algorithms import take_1d from pandas.util.decorators import Appender, cache_readonly @@ -470,8 +471,10 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): codes = target.codes else: if isinstance(target, CategoricalIndex): - target = target.categories - codes = self.categories.get_indexer(target) + code_indexer = self.categories.get_indexer(target.categories) + codes = take_1d(code_indexer, target.codes, fill_value=-1) + else: + codes = self.categories.get_indexer(target) indexer, _ = self._engine.get_indexer_non_unique(codes) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index b877a9d181848..cc71cf6b1a4dc 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1928,6 +1928,27 @@ def test_concat_multiindex_dfs_with_deepcopy(self): result_no_copy = pd.concat(example_dict, names=['testname']) tm.assert_frame_equal(result_no_copy, expected) + def test_concat_categoricalindex(self): + # GH 16111, categories that aren't lexsorted + categories = [9, 0, 1, 2, 3] + + a = pd.Series(1, index=pd.CategoricalIndex([9, 0], + categories=categories)) + b = pd.Series(2, index=pd.CategoricalIndex([0, 1], + categories=categories)) + c = pd.Series(3, index=pd.CategoricalIndex([1, 2], + categories=categories)) + + result = pd.concat([a, b, c], axis=1) + + exp_idx = pd.CategoricalIndex([0, 1, 2, 9]) + exp = pd.DataFrame({0: [1, np.nan, np.nan, 1], + 1: [2, 2, np.nan, np.nan], + 2: [np.nan, 3, 3, np.nan]}, + columns=[0, 1, 2], + index=exp_idx) + tm.assert_frame_equal(result, exp) + @pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel]) @pytest.mark.parametrize('dt', np.sctypes['float'])