Skip to content

Commit 12367c0

Browse files
committed
BUG: fix bug where appending unordered CategoricalIndex variables overrides index (#24845)
1 parent a62897a commit 12367c0

File tree

3 files changed

+65
-3
lines changed

3 files changed

+65
-3
lines changed

doc/source/whatsnew/v1.5.0.rst

+42-1
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,47 @@ upon serialization. (Related issue :issue:`12997`)
390390
# Roundtripping now works
391391
pd.read_json(a.to_json(date_format='iso'), typ="series").index == a.index
392392
393+
.. _whatsnew_150.notable_bug_fixes.groupby_value_counts_categorical:
394+
395+
DataFrameGroupBy.value_counts with non-grouping categorical columns and ``observed=True``
396+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
397+
398+
Calling :meth:`.DataFrameGroupBy.value_counts` with ``observed=True`` would incorrectly drop non-observed categories of non-grouping columns (:issue:`46357`).
399+
400+
.. code-block:: ipython
401+
402+
In [6]: df = pd.DataFrame(["a", "b", "c"], dtype="category").iloc[0:2]
403+
In [7]: df
404+
Out[7]:
405+
0
406+
0 a
407+
1 b
408+
409+
*Old Behavior*
410+
411+
.. code-block:: ipython
412+
413+
In [8]: df.groupby(level=0, observed=True).value_counts()
414+
Out[8]:
415+
0 a 1
416+
1 b 1
417+
dtype: int64
418+
419+
420+
*New Behavior*
421+
422+
.. code-block:: ipython
423+
424+
In [9]: df.groupby(level=0, observed=True).value_counts()
425+
Out[9]:
426+
0 a 1
427+
1 a 0
428+
b 1
429+
0 b 0
430+
c 0
431+
1 c 0
432+
dtype: int64
433+
393434
.. ---------------------------------------------------------------------------
394435
.. _whatsnew_150.api_breaking:
395436

@@ -814,7 +855,7 @@ Categorical
814855
^^^^^^^^^^^
815856
- Bug in :meth:`.Categorical.view` not accepting integer dtypes (:issue:`25464`)
816857
- Bug in :meth:`.CategoricalIndex.union` when the index's categories are integer-dtype and the index contains ``NaN`` values incorrectly raising instead of casting to ``float64`` (:issue:`45362`)
817-
-
858+
- Bug in :meth:`DataFrame.concat` when concatenating two (or more) unordered ``CategoricalIndex`` variables, whose categories are permutations, yields incorrect index values (:issue:`24845`)
818859

819860
Datetimelike
820861
^^^^^^^^^^^^

pandas/core/indexes/category.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -571,14 +571,17 @@ def map(self, mapper):
571571

572572
def _concat(self, to_concat: list[Index], name: Hashable) -> Index:
573573
# if calling index is category, don't check dtype of others
574+
574575
try:
575-
codes = np.concatenate([self._is_dtype_compat(c).codes for c in to_concat])
576+
cat = Categorical._concat_same_type(
577+
[self._is_dtype_compat(c) for c in to_concat]
578+
)
576579
except TypeError:
577580
# not all to_concat elements are among our categories (or NA)
578581
from pandas.core.dtypes.concat import concat_compat
579582

580583
res = concat_compat([x._values for x in to_concat])
584+
581585
return Index(res, name=name)
582586
else:
583-
cat = self._data._from_backing_data(codes)
584587
return type(self)._simple_new(cat, name=name)

pandas/tests/reshape/concat/test_categorical.py

+18
Original file line numberDiff line numberDiff line change
@@ -238,3 +238,21 @@ def test_categorical_missing_from_one_frame(self):
238238
index=[0, 1, 2, 0, 1, 2],
239239
)
240240
tm.assert_frame_equal(result, expected)
241+
242+
def test_concat_categorical_same_categories_different_order(self):
243+
# https://github.com/pandas-dev/pandas/issues/24845
244+
245+
c1 = pd.CategoricalIndex(["a", "a"], categories=["a", "b"], ordered=False)
246+
c2 = pd.CategoricalIndex(["b", "b"], categories=["b", "a"], ordered=False)
247+
c3 = pd.CategoricalIndex(
248+
["a", "a", "b", "b"], categories=["a", "b"], ordered=False
249+
)
250+
251+
df1 = DataFrame({"A": [1, 2]}, index=c1)
252+
df2 = DataFrame({"A": [3, 4]}, index=c2)
253+
254+
result = pd.concat((df1, df2))
255+
256+
expected = DataFrame({"A": [1, 2, 3, 4]}, index=c3)
257+
258+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)