Skip to content

Commit e4f2ef2

Browse files
GivyBoynoatamir
authored andcommitted
BUG: fix bug where appending unordered CategoricalIndex variables overrides index (pandas-dev#47841)
BUG: fix bug where appending unordered ``CategoricalIndex`` variables overrides index (pandas-dev#24845)
1 parent f3e921d commit e4f2ef2

File tree

3 files changed

+22
-5
lines changed

3 files changed

+22
-5
lines changed

doc/source/whatsnew/v1.5.0.rst

+3-3
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,6 @@ upon serialization. (Related issue :issue:`12997`)
407407
# Roundtripping now works
408408
pd.read_json(a.to_json(date_format='iso'), typ="series").index == a.index
409409
410-
411410
.. _whatsnew_150.notable_bug_fixes.groupby_value_counts_categorical:
412411

413412
DataFrameGroupBy.value_counts with non-grouping categorical columns and ``observed=True``
@@ -888,8 +887,9 @@ Bug fixes
888887

889888
Categorical
890889
^^^^^^^^^^^
891-
- Bug in :meth:`Categorical.view` not accepting integer dtypes (:issue:`25464`)
892-
- Bug in :meth:`CategoricalIndex.union` when the index's categories are integer-dtype and the index contains ``NaN`` values incorrectly raising instead of casting to ``float64`` (:issue:`45362`)
890+
- Bug in :meth:`.Categorical.view` not accepting integer dtypes (:issue:`25464`)
891+
- Bug in :meth:`.CategoricalIndex.union` when the index's categories are integer-dtype and the index contains ``NaN`` values incorrectly raising instead of casting to ``float64`` (:issue:`45362`)
892+
- Bug in :meth:`DataFrame.concat` when concatenating two (or more) unordered ``CategoricalIndex`` variables, whose categories are permutations, yields incorrect index values (:issue:`24845`)
893893

894894
Datetimelike
895895
^^^^^^^^^^^^

pandas/core/indexes/category.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -573,13 +573,14 @@ def map(self, mapper):
573573
def _concat(self, to_concat: list[Index], name: Hashable) -> Index:
574574
# if calling index is category, don't check dtype of others
575575
try:
576-
codes = np.concatenate([self._is_dtype_compat(c).codes for c in to_concat])
576+
cat = Categorical._concat_same_type(
577+
[self._is_dtype_compat(c) for c in to_concat]
578+
)
577579
except TypeError:
578580
# not all to_concat elements are among our categories (or NA)
579581
from pandas.core.dtypes.concat import concat_compat
580582

581583
res = concat_compat([x._values for x in to_concat])
582584
return Index(res, name=name)
583585
else:
584-
cat = self._data._from_backing_data(codes)
585586
return type(self)._simple_new(cat, name=name)

pandas/tests/reshape/concat/test_categorical.py

+16
Original file line numberDiff line numberDiff line change
@@ -238,3 +238,19 @@ def test_categorical_missing_from_one_frame(self):
238238
index=[0, 1, 2, 0, 1, 2],
239239
)
240240
tm.assert_frame_equal(result, expected)
241+
242+
def test_concat_categorical_same_categories_different_order(self):
243+
# https://github.com/pandas-dev/pandas/issues/24845
244+
245+
c1 = pd.CategoricalIndex(["a", "a"], categories=["a", "b"], ordered=False)
246+
c2 = pd.CategoricalIndex(["b", "b"], categories=["b", "a"], ordered=False)
247+
c3 = pd.CategoricalIndex(
248+
["a", "a", "b", "b"], categories=["a", "b"], ordered=False
249+
)
250+
251+
df1 = DataFrame({"A": [1, 2]}, index=c1)
252+
df2 = DataFrame({"A": [3, 4]}, index=c2)
253+
254+
result = pd.concat((df1, df2))
255+
expected = DataFrame({"A": [1, 2, 3, 4]}, index=c3)
256+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)