diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index a1a2149da7cf6..5a95883e12fd1 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -407,7 +407,6 @@ upon serialization. (Related issue :issue:`12997`) # Roundtripping now works pd.read_json(a.to_json(date_format='iso'), typ="series").index == a.index - .. _whatsnew_150.notable_bug_fixes.groupby_value_counts_categorical: DataFrameGroupBy.value_counts with non-grouping categorical columns and ``observed=True`` @@ -888,8 +887,9 @@ Bug fixes Categorical ^^^^^^^^^^^ -- Bug in :meth:`Categorical.view` not accepting integer dtypes (:issue:`25464`) -- Bug in :meth:`CategoricalIndex.union` when the index's categories are integer-dtype and the index contains ``NaN`` values incorrectly raising instead of casting to ``float64`` (:issue:`45362`) +- Bug in :meth:`.Categorical.view` not accepting integer dtypes (:issue:`25464`) +- Bug in :meth:`.CategoricalIndex.union` when the index's categories are integer-dtype and the index contains ``NaN`` values incorrectly raising instead of casting to ``float64`` (:issue:`45362`) +- Bug in :meth:`DataFrame.concat` when concatenating two (or more) unordered ``CategoricalIndex`` variables, whose categories are permutations, yields incorrect index values (:issue:`24845`) Datetimelike ^^^^^^^^^^^^ diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index e068c1434fd4e..d1bdedee5caa0 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -573,7 +573,9 @@ def map(self, mapper): def _concat(self, to_concat: list[Index], name: Hashable) -> Index: # if calling index is category, don't check dtype of others try: - codes = np.concatenate([self._is_dtype_compat(c).codes for c in to_concat]) + cat = Categorical._concat_same_type( + [self._is_dtype_compat(c) for c in to_concat] + ) except TypeError: # not all to_concat elements are among our categories (or NA) from pandas.core.dtypes.concat import concat_compat @@ -581,5 +583,4 @@ def _concat(self, to_concat: list[Index], name: Hashable) -> Index: res = concat_compat([x._values for x in to_concat]) return Index(res, name=name) else: - cat = self._data._from_backing_data(codes) return type(self)._simple_new(cat, name=name) diff --git a/pandas/tests/reshape/concat/test_categorical.py b/pandas/tests/reshape/concat/test_categorical.py index 5bafd2e8e8503..f00d3b369a94d 100644 --- a/pandas/tests/reshape/concat/test_categorical.py +++ b/pandas/tests/reshape/concat/test_categorical.py @@ -238,3 +238,19 @@ def test_categorical_missing_from_one_frame(self): index=[0, 1, 2, 0, 1, 2], ) tm.assert_frame_equal(result, expected) + + def test_concat_categorical_same_categories_different_order(self): + # https://github.com/pandas-dev/pandas/issues/24845 + + c1 = pd.CategoricalIndex(["a", "a"], categories=["a", "b"], ordered=False) + c2 = pd.CategoricalIndex(["b", "b"], categories=["b", "a"], ordered=False) + c3 = pd.CategoricalIndex( + ["a", "a", "b", "b"], categories=["a", "b"], ordered=False + ) + + df1 = DataFrame({"A": [1, 2]}, index=c1) + df2 = DataFrame({"A": [3, 4]}, index=c2) + + result = pd.concat((df1, df2)) + expected = DataFrame({"A": [1, 2, 3, 4]}, index=c3) + tm.assert_frame_equal(result, expected)