Skip to content

Commit 2202283

Browse files
committed
BUG: fix bug where appending unordered CategoricalIndex variables overrides index (pandas-dev#24845)
1 parent a62897a commit 2202283

File tree

3 files changed

+24
-3
lines changed

3 files changed

+24
-3
lines changed

doc/source/whatsnew/v1.5.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -814,7 +814,7 @@ Categorical
814814
^^^^^^^^^^^
815815
- Bug in :meth:`.Categorical.view` not accepting integer dtypes (:issue:`25464`)
816816
- Bug in :meth:`.CategoricalIndex.union` when the index's categories are integer-dtype and the index contains ``NaN`` values incorrectly raising instead of casting to ``float64`` (:issue:`45362`)
817-
-
817+
- Bug in :meth:`DataFrame.concat` when concatenating two (or more) unordered ``CategoricalIndex`` variables, whose categories are permutations, yields incorrect index values (:issue:`24845`)
818818

819819
Datetimelike
820820
^^^^^^^^^^^^

pandas/core/indexes/category.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -571,14 +571,17 @@ def map(self, mapper):
571571

572572
def _concat(self, to_concat: list[Index], name: Hashable) -> Index:
573573
# if calling index is category, don't check dtype of others
574+
574575
try:
575-
codes = np.concatenate([self._is_dtype_compat(c).codes for c in to_concat])
576+
cat = Categorical._concat_same_type(
577+
[self._is_dtype_compat(c) for c in to_concat]
578+
)
576579
except TypeError:
577580
# not all to_concat elements are among our categories (or NA)
578581
from pandas.core.dtypes.concat import concat_compat
579582

580583
res = concat_compat([x._values for x in to_concat])
584+
581585
return Index(res, name=name)
582586
else:
583-
cat = self._data._from_backing_data(codes)
584587
return type(self)._simple_new(cat, name=name)

pandas/tests/reshape/concat/test_categorical.py

+18
Original file line numberDiff line numberDiff line change
@@ -238,3 +238,21 @@ def test_categorical_missing_from_one_frame(self):
238238
index=[0, 1, 2, 0, 1, 2],
239239
)
240240
tm.assert_frame_equal(result, expected)
241+
242+
def test_concat_categorical_same_categories_different_order(self):
243+
# https://github.com/pandas-dev/pandas/issues/24845
244+
245+
c1 = pd.CategoricalIndex(["a", "a"], categories=["a", "b"], ordered=False)
246+
c2 = pd.CategoricalIndex(["b", "b"], categories=["b", "a"], ordered=False)
247+
c3 = pd.CategoricalIndex(
248+
["a", "a", "b", "b"], categories=["a", "b"], ordered=False
249+
)
250+
251+
df1 = DataFrame({"A": [1, 2]}, index=c1)
252+
df2 = DataFrame({"A": [3, 4]}, index=c2)
253+
254+
result = pd.concat((df1, df2))
255+
256+
expected = DataFrame({"A": [1, 2, 3, 4]}, index=c3)
257+
258+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)